1 #ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2 #define _C4_YML_PARSE_ENGINE_DEF_HPP_
5 #include "c4/error.hpp"
11 #include "c4/yml/detail/dbgprint.hpp"
14 #include <c4/dump.hpp>
15 #include "c4/yml/detail/print.hpp"
17 do { RYML_DEBUG_BREAK(); this->_err(RYML_LOC_HERE(), __VA_ARGS__); } while(0)
20 this->_err(RYML_LOC_HERE(), __VA_ARGS__)
24 #if defined(RYML_WITH_TAB_TOKENS)
25 #define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
26 #define _RYML_WITHOUT_TAB_TOKENS(...)
27 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
29 #define _RYML_WITH_TAB_TOKENS(...)
30 #define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
31 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
36 #define _c4dbgnextline() \
38 _c4dbgq("\n-----------"); \
39 _c4dbgt("handling line={}, offset={}B", \
40 m_evt_handler->m_curr->pos.line, \
41 m_evt_handler->m_curr->pos.offset); \
46 # pragma warning(push)
47 # pragma warning(disable: 4296)
48 # pragma warning(disable: 4702)
49 #elif defined(__clang__)
50 # pragma clang diagnostic push
51 # pragma clang diagnostic ignored "-Wtype-limits"
52 # pragma clang diagnostic ignored "-Wformat-nonliteral"
53 # pragma clang diagnostic ignored "-Wold-style-cast"
54 #elif defined(__GNUC__)
55 # pragma GCC diagnostic push
56 # pragma GCC diagnostic ignored "-Wtype-limits"
57 # pragma GCC diagnostic ignored "-Wformat-nonliteral"
58 # pragma GCC diagnostic ignored "-Wold-style-cast"
60 # pragma GCC diagnostic ignored "-Wduplicated-branches"
71 C4_HOT C4_ALWAYS_INLINE
bool _is_blck_token(csubstr s) noexcept
73 _RYML_ASSERT_BASIC(s.len > 0);
74 _RYML_ASSERT_BASIC(s.str[0] ==
'-' || s.str[0] ==
':' || s.str[0] ==
'?');
78 inline bool _is_doc_begin_token(csubstr s)
80 _RYML_ASSERT_BASIC(s.begins_with(
'-'));
81 _RYML_ASSERT_BASIC(!s.ends_with(
"\n"));
82 _RYML_ASSERT_BASIC(!s.ends_with(
"\r"));
83 return (s.len >= 3 && s.str[1] ==
'-' && s.str[2] ==
'-')
87 inline bool _is_doc_end_token(csubstr s)
89 _RYML_ASSERT_BASIC(s.begins_with(
'.'));
90 _RYML_ASSERT_BASIC(!s.ends_with(
"\n"));
91 _RYML_ASSERT_BASIC(!s.ends_with(
"\r"));
92 return (s.len >= 3 && s.str[1] ==
'.' && s.str[2] ==
'.')
96 inline bool _is_doc_token(csubstr s) noexcept
124 return (s.str[1] ==
'-' && s.str[2] ==
'-')
128 return (s.str[1] ==
'.' && s.str[2] ==
'.')
135 inline size_t _is_special_json_scalar(csubstr s)
137 _RYML_ASSERT_BASIC(s.len);
141 if(s.len >= 5 && s.begins_with(
"false"))
145 if(s.len >= 4 && s.begins_with(
"true"))
149 if(s.len >= 4 && s.begins_with(
"null"))
159 C4_ALWAYS_INLINE
size_t _extend_from_combined_newline(
char nl,
char following)
161 return (nl ==
'\n' && following ==
'\r') || (nl ==
'\r' && following ==
'\n');
165 inline substr from_next_line(substr rem)
167 size_t nlpos = rem.first_of(
"\r\n");
170 const char nl = rem[nlpos];
171 rem = rem.right_of(nlpos);
174 if(_extend_from_combined_newline(nl, rem.front()))
182 inline size_t _count_following_newlines(csubstr r,
size_t *C4_RESTRICT i)
184 _RYML_ASSERT_BASIC(r[*i] ==
'\n');
185 size_t numnl_following = 0;
187 for( ; *i < r.len; ++(*i))
189 if(r.str[*i] ==
'\n')
192 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
197 return numnl_following;
202 inline size_t _count_following_newlines(csubstr r,
size_t *C4_RESTRICT i,
size_t indentation)
204 _RYML_ASSERT_BASIC(r[*i] ==
'\n');
205 size_t numnl_following = 0;
209 for( ; *i < r.len; ++(*i))
211 if(r.str[*i] ==
'\n')
214 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
222 for( ; *i < r.len; ++(*i))
224 if(r.str[*i] ==
'\n')
228 size_t stop = *i + indentation;
229 for( ; *i < r.len; ++(*i))
231 if(r.str[*i] !=
' ' && r.str[*i] !=
'\r')
233 _RYML_ASSERT_BASIC(*i < stop);
238 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
244 return numnl_following;
254 template<
class EventHandler>
261 template<
class EventHandler>
266 , m_evt_handler(evt_handler)
267 , m_pending_anchors()
269 , m_was_inside_qmrk(false)
273 , m_newline_offsets()
274 , m_newline_offsets_size(0)
275 , m_newline_offsets_capacity(0)
276 , m_newline_offsets_buf()
278 _RYML_CHECK_BASIC(evt_handler);
281 template<
class EventHandler>
283 : m_options(that.m_options)
284 , m_file(that.m_file)
286 , m_evt_handler(that.m_evt_handler)
287 , m_pending_anchors(that.m_pending_anchors)
288 , m_pending_tags(that.m_pending_tags)
289 , m_was_inside_qmrk(
false)
293 , m_newline_offsets(that.m_newline_offsets)
294 , m_newline_offsets_size(that.m_newline_offsets_size)
295 , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
296 , m_newline_offsets_buf(that.m_newline_offsets_buf)
301 template<
class EventHandler>
303 : m_options(that.m_options)
304 , m_file(that.m_file)
306 , m_evt_handler(that.m_evt_handler)
307 , m_pending_anchors(that.m_pending_anchors)
308 , m_pending_tags(that.m_pending_tags)
309 , m_was_inside_qmrk(false)
313 , m_newline_offsets()
314 , m_newline_offsets_size()
315 , m_newline_offsets_capacity()
316 , m_newline_offsets_buf()
318 if(that.m_newline_offsets_capacity)
320 _resize_locations(that.m_newline_offsets_capacity);
321 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
322 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size *
sizeof(
size_t));
323 m_newline_offsets_size = that.m_newline_offsets_size;
327 template<
class EventHandler>
331 m_options = (that.m_options);
332 m_file = (that.m_file);
333 m_buf = (that.m_buf);
334 m_evt_handler = that.m_evt_handler;
335 m_pending_anchors = that.m_pending_anchors;
336 m_pending_tags = that.m_pending_tags;
337 m_was_inside_qmrk = that.m_was_inside_qmrk;
338 m_doc_empty = that.m_doc_empty;
339 m_prev_colon = that.m_prev_colon;
340 m_encoding = that.m_encoding;
341 m_newline_offsets = (that.m_newline_offsets);
342 m_newline_offsets_size = (that.m_newline_offsets_size);
343 m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
344 m_newline_offsets_buf = (that.m_newline_offsets_buf);
349 template<
class EventHandler>
355 m_options = (that.m_options);
356 m_file = (that.m_file);
357 m_buf = (that.m_buf);
358 m_evt_handler = that.m_evt_handler;
359 m_pending_anchors = that.m_pending_anchors;
360 m_pending_tags = that.m_pending_tags;
361 m_was_inside_qmrk = that.m_was_inside_qmrk;
362 m_doc_empty = that.m_doc_empty;
363 m_prev_colon = that.m_prev_colon;
364 m_encoding = that.m_encoding;
365 if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
366 _resize_locations(that.m_newline_offsets_capacity);
367 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
368 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
369 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size *
sizeof(
size_t));
370 m_newline_offsets_size = that.m_newline_offsets_size;
371 m_newline_offsets_buf = that.m_newline_offsets_buf;
376 template<
class EventHandler>
383 m_pending_anchors = {};
385 m_was_inside_qmrk =
false;
389 m_newline_offsets = {};
390 m_newline_offsets_size = {};
391 m_newline_offsets_capacity = {};
392 m_newline_offsets_buf = {};
395 template<
class EventHandler>
396 void ParseEngine<EventHandler>::_free()
398 if(m_newline_offsets)
400 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
401 m_newline_offsets =
nullptr;
402 m_newline_offsets_size = 0u;
403 m_newline_offsets_capacity = 0u;
404 m_newline_offsets_buf =
nullptr;
411 template<
class EventHandler>
412 void ParseEngine<EventHandler>::_reset()
414 m_pending_anchors = {};
417 m_was_inside_qmrk =
false;
422 if(m_options.locations())
424 _prepare_locations();
431 template<
class EventHandler>
432 void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena)
434 #define _ryml_relocate(s) \
435 if((s).is_sub(prev_arena)) \
437 (s).str = next_arena.str + ((s).str - prev_arena.str); \
441 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
443 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
445 #undef _ryml_relocate
448 template<
class EventHandler>
449 void ParseEngine<EventHandler>::_s_relocate_arena(
void* data, csubstr prev_arena, substr next_arena)
451 ((ParseEngine*)data)->_relocate_arena(prev_arena, next_arena);
458 template<
class EventHandler>
459 template<
class DumpFn>
460 C4_NO_INLINE
void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn)
const
462 auto const *
const C4_RESTRICT st = m_evt_handler->m_curr;
463 auto const& lc = st->line_contents;
464 csubstr contents = lc.full.first(lc.num_cols);
468 size_t offs = 3u +
to_chars(substr{}, st->pos.line) +
to_chars(substr{}, st->pos.col);
471 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{}:", m_file);
472 offs += m_file.len + 1;
474 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{}:{}: ", st->pos.line, st->pos.col);
475 csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
476 csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr(
"..."));
477 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len);
479 size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin());
480 size_t lastcol = firstcol + lc.rem.len;
481 for(
size_t i = 0; i < offs + firstcol; ++i)
482 std::forward<DumpFn>(dumpfn)(
" ");
483 std::forward<DumpFn>(dumpfn)(
"^");
484 for(
size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i)
485 std::forward<DumpFn>(dumpfn)(
"~");
486 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
490 std::forward<DumpFn>(dumpfn)(
"\n");
495 _dbg_dump(std::forward<DumpFn>(dumpfn),
"top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
499 template<
class EventHandler>
500 void ParseEngine<EventHandler>::_print_state_stack(substr buf)
const
504 for(
typename EventHandler::state
const& s : m_evt_handler->m_stack)
505 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(buf, s.flags));
509 template<
class EventHandler>
510 void ParseEngine<EventHandler>::_print_state_stack()
const
513 _print_state_stack(buf);
520 template<
class EventHandler>
521 template<
class ...Args>
522 C4_NORETURN C4_NO_INLINE
void ParseEngine<EventHandler>::_err(Location
const& cpploc, Location
const& ymlloc,
const char* fmt, Args
const& ...args)
const
524 m_evt_handler->cancel_parse();
525 err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, ymlloc}, fmt, args...);
528 template<
class EventHandler>
529 template<
class ...Args>
530 C4_NORETURN C4_NO_INLINE
void ParseEngine<EventHandler>::_err(Location
const& cpploc,
const char *fmt, Args
const& ...args)
const
532 m_evt_handler->cancel_parse();
533 err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, m_evt_handler->m_curr->pos}, fmt, args...);
539 template<
class EventHandler>
540 template<
class ...Args>
541 void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args
const& ...args)
const
545 _dbg_printf(fmt, args...);
547 _fmt_msg(_dbg_dumper);
554 template<
class EventHandler>
555 bool ParseEngine<EventHandler>::_finished_file()
const
557 bool ret = m_evt_handler->m_curr->pos.offset >= m_buf.len;
560 _c4dbgp(
"finished file!!!");
565 template<
class EventHandler>
566 C4_HOT C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_finished_line()
const
568 return m_evt_handler->m_curr->line_contents.rem.empty();
574 template<
class EventHandler>
575 void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
577 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
583 _c4dbgpf(
"skip {} whitespace characters", pos);
584 _line_progressed(pos);
588 template<
class EventHandler>
589 void ParseEngine<EventHandler>::_maybe_skipchars(
char c)
591 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
592 if(rem.len && rem.str[0] == c)
594 size_t pos = rem.first_not_of(c);
597 _c4dbgpf(
"skip {}x'{}'", pos, c);
598 _line_progressed(pos);
602 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
603 template<
class EventHandler>
604 void ParseEngine<EventHandler>::_maybe_skipchars_up_to(
char c,
size_t max_to_skip)
606 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
607 if(rem.len && rem.str[0] == c)
609 size_t pos = rem.first_not_of(c);
612 if(pos > max_to_skip)
614 _c4dbgpf(
"skip {}x'{}'", pos, c);
615 _line_progressed(pos);
620 template<
class EventHandler>
622 void ParseEngine<EventHandler>::_skipchars(
const char (&chars)[N])
624 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars));
625 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
627 pos = m_evt_handler->m_curr->line_contents.rem.len;
628 _c4dbgpf(
"skip {} characters", pos);
629 _line_progressed(pos);
632 template<
class EventHandler>
633 void ParseEngine<EventHandler>::_skip_comment()
635 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'));
636 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full));
637 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
638 csubstr line = m_evt_handler->m_curr->line_contents.full;
640 if(!line.begins_with(
'#'))
642 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.str > line.str);
643 const char c = line[(size_t)(rem.str - line.str - 1)];
644 if(C4_UNLIKELY(c !=
' ' && c !=
'\t'))
645 _c4err(
"comment not preceded by whitespace");
649 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.str == line.str);
651 _c4dbgpf(
"comment was '{}'", rem);
652 _line_progressed(rem.len);
655 template<
class EventHandler>
656 void ParseEngine<EventHandler>::_maybe_skip_comment()
658 csubstr s = m_evt_handler->m_curr->line_contents.rem.triml(
' ');
659 if(s.begins_with(
'#'))
661 _line_progressed((
size_t)(s.str - m_evt_handler->m_curr->line_contents.rem.str));
666 template<
class EventHandler>
667 bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
669 if(m_evt_handler->m_curr->line_contents.rem.len)
671 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
673 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
675 pos = m_evt_handler->m_curr->line_contents.rem.len;
676 _c4dbgpf(
"skip {}x'{}'", pos,
' ');
677 _line_progressed(pos);
679 if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] ==
':'))
681 if(m_evt_handler->m_curr->line_contents.rem.len == 1
682 || m_evt_handler->m_curr->line_contents.rem.str[1] ==
' '
686 _c4dbgp(
"found ':' colon next");
695 template<
class EventHandler>
696 bool ParseEngine<EventHandler>::_maybe_scan_following_comma() noexcept
698 if(m_evt_handler->m_curr->line_contents.rem.len)
700 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
702 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
704 pos = m_evt_handler->m_curr->line_contents.rem.len;
705 _c4dbgpf(
"skip {}x'{}'", pos,
' ');
706 _line_progressed(pos);
708 if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] ==
','))
710 _c4dbgp(
"found ',' comma next");
721 template<
class EventHandler>
722 csubstr ParseEngine<EventHandler>::_scan_anchor()
724 csubstr s = m_evt_handler->m_curr->line_contents.rem;
725 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'&'));
726 csubstr anchor = s.range(1, s.first_of(
' '));
727 _line_progressed(1u + anchor.len);
728 _maybe_skipchars(
' ');
732 template<
class EventHandler>
733 csubstr ParseEngine<EventHandler>::_scan_ref_seq()
735 csubstr s = m_evt_handler->m_curr->line_contents.rem;
736 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'));
737 csubstr ref = s.first(s.first_of(
",] :"));
738 _line_progressed(ref.len);
742 template<
class EventHandler>
743 csubstr ParseEngine<EventHandler>::_scan_ref_map()
745 csubstr s = m_evt_handler->m_curr->line_contents.rem;
746 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'));
747 csubstr ref = s.first(s.first_of(
",} "));
748 _line_progressed(ref.len);
752 template<
class EventHandler>
753 csubstr ParseEngine<EventHandler>::_scan_tag()
755 csubstr rem = m_evt_handler->m_curr->line_contents.rem.triml(
' ');
756 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.begins_with(
'!'));
758 if(rem.begins_with(
"!!"))
760 _c4dbgp(
"begins with '!!'");
762 t = rem.left_of(rem.first_of(
" ,"));
764 t = rem.left_of(rem.first_of(
' '));
766 else if(rem.begins_with(
"!<"))
768 _c4dbgp(
"begins with '!<'");
769 t = rem.left_of(rem.first_of(
'>'),
true);
771 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
772 else if(rem.begins_with(
"!h!"))
774 _c4dbgp(
"begins with '!h!'");
775 t = rem.left_of(rem.first_of(
' '));
780 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.begins_with(
'!'));
781 _c4dbgp(
"begins with '!'");
783 t = rem.left_of(rem.first_of(
" ,"));
785 t = rem.left_of(rem.first_of(
' '));
787 _line_progressed(t.len);
788 _maybe_skip_whitespace_tokens();
795 template<
class EventHandler>
796 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
798 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.empty());
815 _c4dbgpf(
"not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
832 _c4dbgpf(
"not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
839 _c4err(
"invalid token \":{}\"", _c4prc(s.str[1]));
859 _c4dbgpf(
"not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
865 _c4err(
"invalid token \"?{}\"", _c4prc(s.str[1]));
884 template<
class EventHandler>
885 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
887 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP));
888 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK));
889 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ|
RSEQIMAP));
890 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW));
891 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL));
893 substr s = m_buf.sub(m_evt_handler->m_curr->pos.offset);
894 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
895 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
'\n'));
897 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem));
899 if(!s.len || !_is_valid_start_scalar_plain_flow(s))
902 _c4dbgp(
"scanning seqflow scalar...");
904 bool needs_filter =
false;
908 for( ; offs < s.len; ++offs, ++col)
910 const char c = s.str[offs];
915 _c4dbgpf(
"found terminating character at {}: '{}'", offs, c);
916 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, offs > 0);
919 _c4dbgpf(
"found newline. offs={} col={}", offs, col);
924 if(next_line.begins_with_any(
",]#"))
926 _c4dbgpf(
"found terminating character beginning next line: '{}'", next_line.str[0]);
932 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
941 _c4dbgp(
"found suspicious ':'");
945 char next = s.str[offsp1];
946 _c4dbgpf(
"next char is '{}'", _c4prc(next));
949 csubstr after = s.sub(offsp1).triml(
'\r');
953 _c4dbgpf(
"skip \\r to '{}'", _c4prc(next));
959 _c4dbgp(
"map starting!");
964 _c4dbgp(
"':' nothing to see here");
969 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.len == offsp1);
970 _line_progressed(col);
971 _c4err(
"missing termination: '{}'", c);
976 _c4dbgp(
"found suspicious '#'");
977 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, offs > 0);
978 char prev = s.str[offs - 1];
981 _c4dbgpf(
"found terminating character at {}: '{}'", offs, c);
989 _line_progressed(col);
990 _c4err(
"invalid character: '{}'", c);
998 _line_progressed(col);
1001 sc->needs_filter = needs_filter;
1003 _c4prscalar(
"scanned plain scalar", sc->scalar,
true);
1008 template<
class EventHandler>
1009 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
1011 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ) || has_any(
RSEQIMAP));
1012 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK));
1013 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP|
RSEQIMAP));
1014 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW));
1015 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK));
1017 substr s = m_evt_handler->m_curr->line_contents.rem;
1018 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
1023 if(!_is_valid_start_scalar_plain_flow(s))
1026 _c4dbgp(
"scanning scalar...");
1028 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1029 bool needs_filter =
false;
1032 for(
size_t i = 0; i < s.len; ++i)
1034 const char c = s.str[i];
1039 _line_progressed(i);
1040 _c4dbgpf(
"found terminating character: '{}'", c);
1043 if(s.len == i+1 || s.str[i+1] ==
' ' || s.str[i+1] ==
',' || s.str[i+1] ==
'}' _RYML_WITH_TAB_TOKENS(|| s.str[i+1] ==
'\t'))
1045 _line_progressed(i);
1046 _c4dbgpf(
"found terminating character: '{}'", c);
1052 _line_progressed(i);
1053 _c4err(
"invalid character: '{}'", c);
1056 _line_progressed(i);
1060 _c4err(
"invalid character: '{}'", c);
1065 _line_progressed(i);
1066 _c4dbgpf(
"found terminating character: '{}'", c);
1074 _c4dbgp(
"next line!");
1075 _line_progressed(s.len);
1076 if(!_finished_file())
1078 _c4dbgp(
"next line!");
1084 _c4dbgp(
"file finished!");
1087 s = m_evt_handler->m_curr->line_contents.rem;
1088 needs_filter =
true;
1094 sc->needs_filter = needs_filter;
1096 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1098 return sc->scalar.len > 0u;
1101 template<
class EventHandler>
1102 bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1104 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP));
1105 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK));
1106 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ));
1107 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW));
1109 substr s = m_evt_handler->m_curr->line_contents.rem;
1110 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
1115 _c4dbgp(
"scanning scalar...");
1122 _c4dbgp(
"not a scalar.");
1127 const size_t len = _is_special_json_scalar(s);
1130 sc->scalar = s.first(len);
1131 sc->needs_filter =
false;
1132 _c4dbgpf(
"special json scalar: '{}'", sc->scalar);
1133 _line_progressed(len);
1140 for( ; i < s.len; ++i)
1142 const char c = s.str[i];
1149 _c4dbgpf(
"found terminating character: '{}'", c);
1152 if(!i || s.str[i-1] ==
' ')
1154 _c4dbgpf(
"found terminating character: '{}'", c);
1165 if(C4_LIKELY(i > 0))
1167 _line_progressed(i);
1168 sc->scalar = s.first(i);
1169 sc->needs_filter =
false;
1170 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1177 template<
class EventHandler>
1178 bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1180 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ));
1181 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK));
1182 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP));
1183 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW));
1184 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL));
1186 substr s = m_evt_handler->m_curr->line_contents.rem;
1187 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
1192 _c4dbgp(
"scanning scalar...");
1195 const size_t len = _is_special_json_scalar(s);
1198 sc->scalar = s.first(len);
1199 sc->needs_filter =
false;
1200 _c4dbgpf(
"special json scalar: '{}'", sc->scalar);
1201 _line_progressed(len);
1208 for( ; i < s.len; ++i)
1210 const char c = s.str[i];
1217 _c4dbgpf(
"found terminating character: '{}'", c);
1220 if(!i || s.str[i-1] ==
' ')
1222 _c4dbgpf(
"found terminating character: '{}'", c);
1233 if(C4_LIKELY(i > 0))
1235 _line_progressed(i);
1236 sc->scalar = s.first(i);
1237 sc->needs_filter =
false;
1238 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1245 template<
class EventHandler>
1246 bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1248 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s[0] ==
'-');
1249 return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_begin_token(s));
1252 template<
class EventHandler>
1253 bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1255 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s[0] ==
'.');
1256 return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_end_token(s));
1259 template<
class EventHandler>
1260 bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc,
size_t indentation)
1262 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW));
1263 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP));
1264 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK|
RUNK|
USTY));
1266 substr s = m_evt_handler->m_curr->line_contents.rem;
1267 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
1275 if(_is_blck_token(s))
1279 else if(_is_doc_begin(s))
1281 _c4dbgp(
"token is doc start");
1287 if(_is_blck_token(s))
1300 _c4dbgp(
"token is doc end");
1306 _c4dbgpf(
"plain scalar! indentation={}", indentation);
1308 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1309 const size_t start_line = m_evt_handler->m_curr->pos.line;
1311 bool needs_filter =
false;
1314 _c4dbgpf(
"plain scalar line: [{}]~~~{}~~~", s.len, s);
1315 for(
size_t i = 0; i < s.len; ++i)
1317 const char curr = s.str[i];
1322 _c4dbgpf(
"[{}]: got suspicious ':'", i);
1326 _c4dbgpf(
"followed by '{}'", i+1 == s.len ? csubstr(
"\\n") : _c4prc(s.str[i+1]));
1327 _line_progressed(i);
1329 if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1331 _c4dbgp(
"start line. scalar ends here");
1342 while(j + 1 < s.len && s.str[j+1] ==
':')
1344 _c4dbgp(
"skip colon");
1347 i = j > i ? j-1 : i;
1348 _c4dbgp(
"nothing to see here");
1352 _c4dbgp(
"got suspicious '#'");
1353 if(!i || (s.str[i-1] ==
' ' || s.str[i-1] ==
'\t'))
1355 _c4dbgp(
"comment! scalar ends here");
1356 _line_progressed(i);
1361 _c4dbgp(
"nothing to see here");
1366 _line_progressed(s.len);
1367 csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1368 next_peeked = next_peeked.trimr(
"\n\r");
1369 const size_t next_indentation = next_peeked.first_not_of(
' ');
1370 _c4dbgpf(
"indentation curr={} next={}", indentation, next_indentation);
1371 if(next_indentation < indentation)
1373 _c4dbgp(
"smaller indentation! scalar ended");
1376 else if(next_indentation == 0 && next_peeked.len > 0)
1378 const char first = next_peeked.str[0];
1382 next_peeked = next_peeked.trimr(
"\n\r");
1383 _c4dbgpf(
"doc begin? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ?
"..." :
"");
1384 if(_is_doc_begin_token(next_peeked))
1386 _c4dbgp(
"doc begin! scalar ended");
1391 next_peeked = next_peeked.trimr(
"\n\r");
1392 _c4dbgpf(
"doc end? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ?
"..." :
"");
1393 if(_is_doc_end_token(next_peeked))
1395 _c4dbgp(
"doc end! scalar ended");
1402 _c4dbgp(
"next line!");
1403 if(!_finished_file())
1405 _c4dbgp(
"next line!");
1411 _c4dbgp(
"file finished!");
1414 s = m_evt_handler->m_curr->line_contents.rem;
1415 needs_filter =
true;
1420 sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(
" \n\r\t");
1421 sc->needs_filter = needs_filter;
1423 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1428 template<
class EventHandler>
1429 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc)
1431 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP));
1432 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW));
1433 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP));
1434 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ));
1435 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK));
1436 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL));
1437 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1440 template<
class EventHandler>
1441 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc)
1443 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ));
1444 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW));
1445 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP));
1446 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK));
1447 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK));
1448 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1451 template<
class EventHandler>
1452 bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc)
1454 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RUNK|
USTY));
1455 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1461 template<
class EventHandler>
1462 substr ParseEngine<EventHandler>::_peek_next_line(
size_t pos)
const
1466 pos = pos ==
npos ? m_evt_handler->m_curr->pos.offset : pos;
1467 if(pos >= m_buf.len)
1471 rem = from_next_line(m_buf.sub(pos));
1476 nlpos = rem.first_of(
"\r\n");
1478 nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1479 rem = rem.left_of(nlpos,
true);
1481 _c4dbgpf(
"peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr(
"\r\n"));
1485 _c4dbgpf(
"peek next line @ {}: (len=0)''", pos);
1491 template<
class EventHandler>
1492 void ParseEngine<EventHandler>::_scan_line()
1494 if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < m_buf.len))
1495 m_evt_handler->m_curr->line_contents.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
1497 m_evt_handler->m_curr->line_contents.reset_with_next_line(m_buf.last(0), 0);
1500 template<
class EventHandler>
1501 void ParseEngine<EventHandler>::_line_progressed(
size_t ahead)
1503 _c4dbgpf(
"line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}",
1504 m_evt_handler->m_curr->pos.line,
1505 m_evt_handler->m_curr->line_contents.full.len,
1506 ahead, m_evt_handler->m_curr->pos.col,
1507 m_evt_handler->m_curr->pos.col+ahead,
1508 m_evt_handler->m_curr->pos.offset,
1509 m_evt_handler->m_curr->pos.offset+ahead);
1510 m_evt_handler->m_curr->pos.offset += ahead;
1511 m_evt_handler->m_curr->pos.col += ahead;
1512 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.num_cols+1);
1513 m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1516 template<
class EventHandler>
1517 void ParseEngine<EventHandler>::_line_ended()
1519 _c4dbgpf(
"line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1520 m_evt_handler->m_curr->pos.line,
1521 m_evt_handler->m_curr->line_contents.full.len,
1522 m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols,
1523 m_evt_handler->m_curr->pos.col, 1);
1524 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.num_cols + 1);
1525 m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1526 ++m_evt_handler->m_curr->pos.line;
1527 m_evt_handler->m_curr->pos.col = 1;
1530 template<
class EventHandler>
1531 void ParseEngine<EventHandler>::_line_ended_undo()
1533 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u);
1534 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u);
1535 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols);
1536 const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1537 _c4dbgpf(
"line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1538 m_evt_handler->m_curr->pos.offset -= delta;
1539 --m_evt_handler->m_curr->pos.line;
1540 m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.num_cols + 1u;
1543 m_evt_handler->m_curr->line_contents.rem = m_buf.sub(m_evt_handler->m_curr->pos.offset, 0);
1548 template<
class EventHandler>
1549 void ParseEngine<EventHandler>::_set_indentation(
size_t indentation)
1551 m_evt_handler->m_curr->indref = indentation;
1552 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1555 template<
class EventHandler>
1556 void ParseEngine<EventHandler>::_save_indentation()
1558 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full));
1559 m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1560 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1566 template<
class EventHandler>
1567 void ParseEngine<EventHandler>::_flow_container_was_a_key(
size_t orig_indent)
1569 if(_maybe_scan_following_colon())
1571 _c4dbgpf(
"flow container is followed by colon! orig_indent={}", orig_indent);
1572 m_evt_handler->actually_val_is_first_key_of_new_map_block();
1574 _set_indentation(orig_indent);
1575 _maybe_skip_whitespace_tokens();
1579 template<
class EventHandler>
1580 void ParseEngine<EventHandler>::_end_map_flow()
1582 bool multiline = m_options.detect_flow_ml() && m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1583 size_t orig_indent = m_evt_handler->m_curr->indref;
1584 _c4dbgpf(
"mapflow: end, multiline={}", multiline);
1585 m_evt_handler->end_map_flow(multiline);
1586 if(has_none(
RFLOW) && (has_any(
RUNK|
RSEQ) || m_was_inside_qmrk))
1587 _flow_container_was_a_key(orig_indent);
1590 template<
class EventHandler>
1591 void ParseEngine<EventHandler>::_end_seq_flow()
1593 bool multiline = m_options.detect_flow_ml() && m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1594 size_t orig_indent = m_evt_handler->m_curr->indref;
1595 _c4dbgpf(
"seqflow: end, multiline={}", multiline);
1596 m_evt_handler->end_seq_flow(multiline);
1597 if(has_none(
RFLOW) && (has_any(
RUNK|
RSEQ) || m_was_inside_qmrk))
1598 _flow_container_was_a_key(orig_indent);
1601 template<
class EventHandler>
1602 void ParseEngine<EventHandler>::_end_map_blck()
1604 _c4dbgp(
"mapblck: end");
1607 _c4dbgp(
"mapblck: set missing val");
1608 _handle_annotations_before_blck_val_scalar();
1609 m_evt_handler->set_val_scalar_plain_empty();
1611 else if(has_any(
QMRK))
1613 _c4dbgp(
"mapblck: set missing keyval");
1614 _handle_annotations_before_blck_key_scalar();
1615 m_evt_handler->set_key_scalar_plain_empty();
1616 _handle_annotations_before_blck_val_scalar();
1617 m_evt_handler->set_val_scalar_plain_empty();
1619 m_evt_handler->end_map_block();
1622 template<
class EventHandler>
1623 void ParseEngine<EventHandler>::_end_seq_blck()
1627 _c4dbgp(
"seqblck: set missing val");
1628 _handle_annotations_before_blck_val_scalar();
1629 m_evt_handler->set_val_scalar_plain_empty();
1631 m_evt_handler->end_seq_block();
1634 template<
class EventHandler>
1635 void ParseEngine<EventHandler>::_end2_map()
1637 _c4dbgp(
"map: end");
1638 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP));
1645 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW));
1646 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
USTY));
1647 m_evt_handler->_pop();
1651 template<
class EventHandler>
1652 void ParseEngine<EventHandler>::_end2_seq()
1654 _c4dbgp(
"seq: end");
1655 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ));
1662 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW));
1663 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
USTY));
1664 m_evt_handler->_pop();
1668 template<
class EventHandler>
1669 void ParseEngine<EventHandler>::_begin2_doc()
1671 _c4dbgp(
"begin_doc");
1674 m_evt_handler->begin_doc();
1675 m_evt_handler->m_curr->indref = 0;
1678 template<
class EventHandler>
1679 void ParseEngine<EventHandler>::_begin2_doc_expl()
1681 _c4dbgp(
"begin_doc_expl");
1684 m_evt_handler->begin_doc_expl();
1685 m_evt_handler->m_curr->indref = 0;
1688 template<
class EventHandler>
1689 void ParseEngine<EventHandler>::_end2_doc()
1691 _c4dbgp(
"doc: end");
1692 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC));
1693 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1695 _c4dbgp(
"doc was empty; add empty val");
1696 _handle_annotations_before_blck_val_scalar();
1697 m_evt_handler->set_val_scalar_plain_empty();
1699 m_evt_handler->end_doc();
1703 template<
class EventHandler>
1704 void ParseEngine<EventHandler>::_end2_doc_expl()
1706 _c4dbgp(
"doc: end");
1707 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1709 _c4dbgp(
"doc: no children; add empty val");
1710 _handle_annotations_before_blck_val_scalar();
1711 m_evt_handler->set_val_scalar_plain_empty();
1713 m_evt_handler->end_doc_expl();
1717 template<
class EventHandler>
1718 void ParseEngine<EventHandler>::_maybe_begin_doc()
1722 _c4dbgp(
"doc must be started");
1726 template<
class EventHandler>
1727 void ParseEngine<EventHandler>::_maybe_end_doc()
1731 _c4dbgp(
"doc must be finished");
1734 else if(m_doc_empty && (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1736 _c4dbgp(
"no doc to finish, but pending annotations");
1737 m_evt_handler->begin_doc();
1738 _handle_annotations_before_blck_val_scalar();
1739 m_evt_handler->set_val_scalar_plain_empty();
1740 m_evt_handler->end_doc();
1744 template<
class EventHandler>
1745 void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1747 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
1748 if(m_evt_handler->m_stack[0].flags &
RDOC)
1750 _c4dbgp(
"root is RDOC");
1751 if(m_evt_handler->m_curr->level != 0)
1752 _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1754 else if((m_evt_handler->m_stack.size() > 1) && (m_evt_handler->m_stack[1].flags &
RDOC))
1756 _c4dbgp(
"root is STREAM");
1757 if(m_evt_handler->m_curr->level != 1)
1758 _handle_indentation_pop(&m_evt_handler->m_stack[1]);
1762 _c4err(
"internal error");
1764 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC));
1767 template<
class EventHandler>
1768 void ParseEngine<EventHandler>::_end_doc_suddenly()
1770 _c4dbgp(
"end doc suddenly");
1771 _end_doc_suddenly__pop();
1776 template<
class EventHandler>
1777 void ParseEngine<EventHandler>::_start_doc_suddenly()
1779 _c4dbgp(
"start doc suddenly");
1780 _end_doc_suddenly__pop();
1785 template<
class EventHandler>
1786 void ParseEngine<EventHandler>::_end_stream()
1788 _c4dbgpf(
"end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1790 _c4err(
"missing terminating ]");
1792 _c4err(
"missing terminating }");
1793 if(m_evt_handler->m_stack.size() > 1)
1794 _handle_indentation_pop(m_evt_handler->m_stack.begin());
1801 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1805 m_evt_handler->begin_doc();
1806 _handle_annotations_before_blck_val_scalar();
1807 m_evt_handler->set_val_scalar_plain_empty();
1808 m_evt_handler->end_doc();
1812 m_evt_handler->end_stream();
1816 template<
class EventHandler>
1817 void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState
const* popto)
1819 _c4dbgpf(
"popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ?
"s" :
""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
1820 while(m_evt_handler->m_curr != popto)
1824 _c4dbgpf(
"popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1827 else if(has_any(
RMAP))
1829 _c4dbgpf(
"popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1837 _c4dbgpf(
"current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1840 template<
class EventHandler>
1841 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
1844 using state_type =
typename EventHandler::state;
1845 state_type
const* popto =
nullptr;
1846 auto &stack = m_evt_handler->m_stack;
1847 _RYML_ASSERT_BASIC_(stack.m_callbacks, stack.is_contiguous());
1848 _RYML_ASSERT_BASIC_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1849 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1851 _print_state_stack();
1853 for(state_type
const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
1855 _c4dbgpf(
"searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
1856 if(s->indref == ind)
1858 _c4dbgpf(
"gotit!!! level={} node={}", s->level, s->node_id);
1863 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1865 _c4err(
"parse error: incorrect indentation?");
1867 _handle_indentation_pop(popto);
1870 template<
class EventHandler>
1871 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
1874 using state_type =
typename EventHandler::state;
1875 auto &stack = m_evt_handler->m_stack;
1876 _RYML_ASSERT_BASIC_(stack.m_callbacks, stack.is_contiguous());
1877 _RYML_ASSERT_BASIC_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1878 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1879 state_type
const* popto =
nullptr;
1882 _print_state_stack(flagbuf_);
1884 for(state_type
const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s)
1886 _c4dbgpf(
"searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
1891 else if(s->indref == ind)
1893 _c4dbgpf(
"same indentation!!! level={} node={}", s->level, s->node_id);
1894 if(popto && has_any(
RTOP, s) && has_none(
RMAP|
RSEQ, s))
1901 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1902 const size_t first = rem.first_not_of(
' ');
1903 _RYML_ASSERT_BASIC_(stack.m_callbacks, first == ind || first ==
npos);
1904 rem = rem.right_of(first,
true);
1905 _c4dbgpf(
"indentless? rem='{}' first={}", rem, first);
1906 if(rem.begins_with(
'-') && _is_blck_token(rem))
1908 _c4dbgp(
"parent was indentless seq");
1914 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1916 _c4err(
"parse error: incorrect indentation?");
1918 _handle_indentation_pop(popto);
1923 template<
class EventHandler>
1924 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
1930 size_t b = m_evt_handler->m_curr->pos.offset;
1931 substr s = m_buf.sub(b);
1932 if(s.begins_with(
' '))
1935 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1936 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1937 _line_progressed((
size_t)(s.begin() - m_buf.sub(b).begin()));
1939 b = m_evt_handler->m_curr->pos.offset;
1940 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'\''));
1943 _line_progressed(1);
1946 bool needs_filter =
false;
1948 size_t numlines = 1;
1950 while( ! _finished_file())
1952 const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1953 bool line_is_blank =
true;
1954 _c4dbgpf(
"scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_evt_handler->m_curr->pos.line, line);
1955 for(
size_t i = 0; i < line.len; ++i)
1957 const char curr = line.str[i];
1960 const char next = i+1 < line.len ? line.str[i+1] :
'~';
1968 needs_filter =
true;
1972 else if(curr !=
' ')
1974 line_is_blank =
false;
1979 needs_filter = needs_filter
1982 || (_at_line_begin() && line.begins_with(
' '));
1986 _line_progressed(line.len);
1991 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
1992 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] ==
'\'');
1993 _line_progressed(pos + 1);
1994 pos = m_evt_handler->m_curr->pos.offset - b - 1;
2004 _c4err(
"reached end of file while looking for closing quote");
2008 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, pos > 0);
2009 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
2010 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() ==
'\'');
2011 s = s.sub(0, pos-1);
2014 _c4prscalar(
"scanned squoted scalar", s,
true);
2016 return ScannedScalar { s, needs_filter };
2021 template<
class EventHandler>
2022 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
2028 size_t b = m_evt_handler->m_curr->pos.offset;
2029 substr s = m_buf.sub(b);
2030 if(s.begins_with(
' '))
2033 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
2034 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
2035 _line_progressed((
size_t)(s.begin() - m_buf.sub(b).begin()));
2037 b = m_evt_handler->m_curr->pos.offset;
2038 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'"'));
2041 _line_progressed(1);
2044 bool needs_filter =
false;
2046 size_t numlines = 1;
2048 auto *st = m_evt_handler->m_curr;
2049 while( ! _finished_file())
2051 const csubstr line = st->line_contents.rem;
2052 #if defined(__GNUC__) && (__GNUC__ == 11 || __GNUC__ == 8)
2053 C4_DONT_OPTIMIZE(line);
2055 bool line_is_blank =
true;
2056 _c4dbgpf(
"scanning double quoted scalar @ line[{}]: line='{}'", st->pos.line, line);
2057 for(
size_t i = 0; i < line.len; ++i)
2059 const char curr = line.str[i];
2061 line_is_blank =
false;
2065 const char next = i+1 < line.len ? line.str[i+1] :
'~';
2066 needs_filter =
true;
2067 if(next ==
'"' || next ==
'\\')
2070 else if(curr ==
'"')
2078 needs_filter = needs_filter
2081 || (_at_line_begin() && line.begins_with(
' '));
2085 _line_progressed(line.len);
2090 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
2091 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf[st->pos.offset + pos] ==
'"');
2092 _line_progressed(pos + 1);
2093 pos = st->pos.offset - b - 1;
2103 _c4err(
"reached end of file looking for closing quote");
2107 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, pos > 0);
2108 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() ==
'"');
2109 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
2110 s = s.sub(0, pos-1);
2113 _c4prscalar(
"scanned dquoted scalar", s,
true);
2115 return ScannedScalar{s, needs_filter};
2120 template<
class EventHandler>
2121 void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb,
size_t indref)
2123 _c4dbgpf(
"blck: indref={}", indref);
2124 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, indref !=
npos);
2127 csubstr s = m_evt_handler->m_curr->line_contents.rem;
2128 csubstr trimmed = s.triml(
' ');
2129 if(trimmed.str > s.str)
2131 _c4dbgp(
"skipping whitespace");
2132 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, trimmed.str >= s.str);
2133 _line_progressed(
static_cast<size_t>(trimmed.str - s.str));
2136 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'|') || s.begins_with(
'>'));
2138 _c4dbgpf(
"blck: specs=[{}]~~~{}~~~", s.len, s);
2142 size_t indentation =
npos;
2146 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(
"|>"));
2147 csubstr t = s.sub(1);
2148 _c4dbgpf(
"blck: spec is multichar: '{}'", t);
2149 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, t.len >= 1);
2150 size_t pos = t.first_of(
"-+");
2151 _c4dbgpf(
"blck: spec chomp char at {}", pos);
2156 else if(t[pos] ==
'+')
2164 digits = t.left_of(t.first_not_of(
"0123456789"));
2165 if( ! digits.empty())
2167 if(C4_UNLIKELY(digits.len > 1))
2168 _c4err(
"parse error: invalid indentation");
2169 _c4dbgpf(
"blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
2170 if(C4_UNLIKELY( !
c4::atou(digits, &indentation)))
2171 _c4err(
"parse error: could not read indentation as decimal");
2172 if(C4_UNLIKELY( ! indentation))
2173 _c4err(
"parse error: null indentation");
2174 _c4dbgpf(
"blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2175 indentation += m_evt_handler->m_curr->indref;
2179 _c4dbgpf(
"blck: style={} chomp={} indentation={}", s.begins_with(
'>') ?
"fold" :
"literal", chomp==
CHOMP_CLIP ?
"clip" : (chomp==
CHOMP_STRIP ?
"strip" :
"keep"), indentation);
2182 _line_progressed(s.len);
2187 substr raw_block(m_buf.data() + m_evt_handler->m_curr->pos.offset,
size_t(0));
2188 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.str);
2196 size_t num_lines = 0;
2197 size_t first = m_evt_handler->m_curr->pos.line;
2198 size_t provisional_indentation =
npos;
2200 while(( ! _finished_file()))
2203 lc.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
2204 #if defined(__GNUC__) && (__GNUC__ == 12 || __GNUC__ == 13)
2205 C4_DONT_OPTIMIZE(lc.rem);
2207 _c4dbgpf(
"blck: peeking at [{}]~~~{}~~~", lc.rem.trimr(
"\r\n").len, lc.rem.trimr(
"\r\n"));
2209 if(indentation !=
npos)
2211 _c4dbgpf(
"blck: indentation={}", indentation);
2213 if(lc.indentation < indentation && ( ! lc.rem.trim(
" \t").empty()))
2217 _c4dbgpf(
"blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2221 _c4err(
"indentation decreased without any scalar");
2225 else if(indentation == 0)
2227 _c4dbgpf(
"blck: noindent. lc.rem=[{}]~~~{}~~~", lc.rem.len, lc.rem);
2228 if(_is_doc_token(lc.rem))
2230 _c4dbgp(
"blck: stop. indentation=0 and doc ended");
2237 const size_t fns = lc.rem.first_not_of(
' ');
2238 _c4dbgpf(
"blck: indentation ref not set. firstnonws={}", fns);
2242 if(C4_UNLIKELY(lc.full.begins_with(
'\t')))
2245 _c4dbgpf(
"blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2246 if(provisional_indentation ==
npos)
2248 if(lc.indentation < indref)
2250 _c4dbgpf(
"blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2251 if(raw_block.len == 0)
2253 _c4dbgp(
"blck: was empty, undo next line");
2258 else if(lc.indentation == m_evt_handler->m_curr->indref)
2262 _c4dbgpf(
"blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2266 _c4dbgpf(
"blck: set indentation ref from this line: ref={}", lc.indentation);
2267 indentation = lc.indentation;
2271 if(lc.indentation >= provisional_indentation)
2273 _c4dbgpf(
"blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2275 indentation = lc.indentation;
2286 _c4dbgpf(
"blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.rem.len, lc.indentation, provisional_indentation);
2287 if(provisional_indentation !=
npos)
2289 if(lc.rem.len >= provisional_indentation)
2291 _c4dbgpf(
"blck: increase provisional_ref {} -> {}", provisional_indentation, lc.rem.len);
2292 provisional_indentation = lc.rem.len;
2294 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
2295 else if(lc.indentation >= provisional_indentation && lc.indentation !=
npos)
2297 _c4dbgpf(
"blck: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation);
2298 provisional_indentation = lc.indentation;
2304 provisional_indentation = lc.indentation ? lc.indentation : has_any(
RSEQ|
RVAL);
2305 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2306 if(provisional_indentation ==
npos)
2308 provisional_indentation = lc.rem.len ? lc.rem.len : has_any(
RSEQ|
RVAL);
2309 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2311 if(provisional_indentation < indref)
2313 provisional_indentation = indref;
2314 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2320 m_evt_handler->m_curr->line_contents = lc;
2321 _c4dbgpf(
"blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2322 raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2323 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2327 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0));
2328 C4_UNUSED(num_lines);
2331 if(indentation ==
npos)
2333 _c4dbgpf(
"blck: set indentation from provisional: {}", provisional_indentation);
2334 indentation = provisional_indentation;
2340 _c4prscalar(
"scanned block", raw_block,
true);
2342 sb->scalar = raw_block;
2343 sb->indentation = indentation;
2355 #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2357 #define _c4dbgfws(...)
2360 template<
class EventHandler>
2361 template<
class FilterProcessor>
2362 bool ParseEngine<EventHandler>::_filter_ws_handle_to_first_non_space(FilterProcessor &proc)
2364 _c4dbgfws(
"found whitespace '{}'", _c4prc(proc.curr()));
2365 _RYML_ASSERT_BASIC_(this->callbacks(), proc.curr() ==
' ' || proc.curr() ==
'\t');
2367 const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(
" \t", proc.rpos) : proc.src.first_not_of(
' ', proc.rpos);
2368 if(first_pos !=
npos)
2370 const char first_char = proc.src[first_pos];
2371 _c4dbgfws(
"firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2372 if(first_char ==
'\n' || first_char ==
'\r')
2374 _c4dbgfws(
"whitespace is trailing on line",
"");
2375 proc.skip(first_pos - proc.rpos);
2380 _c4dbgfws(
"legit whitespace. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2384 _c4dbgfws(
"whitespace is trailing on line",
"");
2388 template<
class EventHandler>
2389 template<
class FilterProcessor>
2390 void ParseEngine<EventHandler>::_filter_ws_copy_trailing(FilterProcessor &proc)
2392 if(!_filter_ws_handle_to_first_non_space(proc))
2394 _c4dbgfws(
"... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2395 proc.copy(proc.src.len - proc.rpos);
2399 template<
class EventHandler>
2400 template<
class FilterProcessor>
2401 void ParseEngine<EventHandler>::_filter_ws_skip_trailing(FilterProcessor &proc)
2403 if(!_filter_ws_handle_to_first_non_space(proc))
2405 _c4dbgfws(
"... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2406 proc.skip(proc.src.len - proc.rpos);
2420 #define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2422 #define _c4dbgfps(fmt, ...)
2425 template<
class EventHandler>
2426 template<
class FilterProcessor>
2427 void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation)
2429 _RYML_ASSERT_BASIC_(this->callbacks(), proc.curr() ==
'\n');
2431 _c4dbgfps(
"found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2432 size_t ii = proc.rpos;
2433 const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2436 proc.set(
'\n', numnl_following);
2437 _c4dbgfps(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2441 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2445 _c4dbgfps(
"single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2449 _c4dbgfps(
"last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2456 template<
class EventHandler>
2457 template<
class FilterProcessor>
2458 auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation) -> decltype(proc.result())
2460 _RYML_ASSERT_BASIC_(this->callbacks(), indentation !=
npos);
2461 _c4dbgfps(
"before=[{}]~~~{}~~~", proc.src.len, proc.src);
2463 while(proc.has_more_chars())
2465 const char curr = proc.curr();
2466 _c4dbgfps(
"'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2471 _c4dbgfps(
"whitespace", curr);
2472 _filter_ws_skip_trailing(proc);
2475 _c4dbgfps(
"newline", curr);
2476 _filter_nl_plain(proc, indentation);
2479 _c4dbgfps(
"carriage return, ignore", curr);
2488 _c4dbgfps(
"after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2490 return proc.result();
2496 template<
class EventHandler>
2499 FilterProcessorSrcDst proc(scalar, dst);
2500 return _filter_plain(proc, indentation);
2503 template<
class EventHandler>
2506 FilterProcessorInplaceEndExtending proc(dst, cap);
2507 return _filter_plain(proc, indentation);
2518 #define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2520 #define _c4dbgfsq(fmt, ...)
2523 template<
class EventHandler>
2524 template<
class FilterProcessor>
2525 void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2527 _RYML_ASSERT_BASIC_(this->callbacks(), proc.curr() ==
'\n');
2529 _c4dbgfsq(
"found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2530 size_t ii = proc.rpos;
2531 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2534 proc.set(
'\n', numnl_following);
2535 _c4dbgfsq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2539 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2543 _c4dbgfsq(
"single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2548 _c4dbgfsq(
"single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2554 template<
class EventHandler>
2555 template<
class FilterProcessor>
2556 auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2558 _c4dbgfsq(
"before=[{}]~~~{}~~~", proc.src.len, proc.src);
2562 while(proc.has_more_chars())
2564 const char curr = proc.curr();
2565 _c4dbgfsq(
"'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2570 _c4dbgfsq(
"whitespace", curr);
2571 _filter_ws_copy_trailing(proc);
2574 _c4dbgfsq(
"newline", curr);
2575 _filter_nl_squoted(proc);
2578 _c4dbgfsq(
"skip cr", curr);
2582 _c4dbgfsq(
"squote", curr);
2583 if(proc.next() ==
'\'')
2585 _c4dbgfsq(
"two consecutive squotes", curr);
2600 _c4dbgfsq(
": #filteredchars={} after=~~~[{}]{}~~~", proc.src.len-proc.sofar().len, proc.sofar().len, proc.sofar());
2602 return proc.result();
2607 template<
class EventHandler>
2610 FilterProcessorSrcDst proc(scalar, dst);
2611 return _filter_squoted(proc);
2614 template<
class EventHandler>
2617 FilterProcessorInplaceEndExtending proc(dst, cap);
2618 return _filter_squoted(proc);
2629 #define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2631 #define _c4dbgfdq(...)
2634 template<
class EventHandler>
2635 template<
class FilterProcessor>
2636 void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2638 _RYML_ASSERT_BASIC_(this->callbacks(), proc.curr() ==
'\n');
2640 _c4dbgfdq(
"found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2641 size_t ii = proc.rpos;
2642 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2645 proc.set(
'\n', numnl_following);
2646 _c4dbgfdq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2650 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2654 _c4dbgfdq(
"single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2659 _c4dbgfdq(
"single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2661 if(ii < proc.src.len && proc.src.str[ii] ==
'\\')
2663 _c4dbgfdq(
"backslash at [{}]", ii);
2664 const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] :
'\0';
2665 if(next ==
' ' || next ==
'\t')
2667 _c4dbgfdq(
"extend skip to backslash",
"");
2675 template<
class EventHandler>
2676 template<
class FilterProcessor>
2677 void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2679 char next = proc.next();
2680 _c4dbgfdq(
"backslash, next='{}'", _c4prc(next));
2683 if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] ==
'\n')
2687 _c4dbgfdq(
"[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2693 size_t ii = proc.rpos + 2;
2694 for( ; ii < proc.src.len; ++ii)
2697 if(proc.src.str[ii] ==
' ' || proc.src.str[ii] ==
'\t')
2702 proc.skip(ii - proc.rpos);
2704 else if(next ==
'"' || next ==
'/' || next ==
' ' || next ==
'\t')
2707 proc.translate_esc(next);
2708 _c4dbgfdq(
"here, used '{}'", _c4prc(next));
2710 else if(next ==
'\r')
2714 else if(next ==
'n')
2716 proc.translate_esc(
'\n');
2718 else if(next ==
'r')
2720 proc.translate_esc(
'\r');
2722 else if(next ==
't')
2724 proc.translate_esc(
'\t');
2726 else if(next ==
'\\')
2728 proc.translate_esc(
'\\');
2730 else if(next ==
'x')
2732 if(C4_UNLIKELY(proc.rpos + 1u + 2u >= proc.src.len))
2733 _c4err(
"\\x requires 2 hex digits. scalar pos={}", proc.rpos);
2735 csubstr codepoint = proc.src.sub(proc.rpos + 2u, 2u);
2736 _c4dbgfdq(
"utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2737 uint32_t codepoint_val = {};
2738 if(C4_UNLIKELY(!
read_hex(codepoint, &codepoint_val)))
2739 _c4err(
"failed to read \\x codepoint. scalar pos={}", proc.rpos);
2740 const size_t numbytes =
decode_code_point((uint8_t*)readbuf,
sizeof(readbuf), codepoint_val);
2741 if(C4_UNLIKELY(numbytes == 0))
2742 _c4err(
"failed to decode code point={}", proc.rpos);
2743 _RYML_ASSERT_BASIC_(callbacks(), numbytes <= 4);
2744 proc.translate_esc_bulk(readbuf, numbytes, 3u);
2745 _c4dbgfdq(
"utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2747 else if(next ==
'u')
2749 if(C4_UNLIKELY(proc.rpos + 1u + 4u >= proc.src.len))
2750 _c4err(
"\\u requires 4 hex digits. scalar pos={}", proc.rpos);
2752 csubstr codepoint = proc.src.sub(proc.rpos + 2u, 4u);
2753 uint32_t codepoint_val = {};
2754 if(C4_UNLIKELY(!
read_hex(codepoint, &codepoint_val)))
2755 _c4err(
"failed to parse \\u codepoint. scalar pos={}", proc.rpos);
2756 const size_t numbytes =
decode_code_point((uint8_t*)readbuf,
sizeof(readbuf), codepoint_val);
2757 if(C4_UNLIKELY(numbytes == 0))
2758 _c4err(
"failed to decode code point={}", proc.rpos);
2759 _RYML_ASSERT_BASIC_(callbacks(), numbytes <= 4);
2760 proc.translate_esc_bulk(readbuf, numbytes, 5u);
2762 else if(next ==
'U')
2764 if(C4_UNLIKELY(proc.rpos + 1u + 8u >= proc.src.len))
2765 _c4err(
"\\U requires 8 hex digits. scalar pos={}", proc.rpos);
2767 csubstr codepoint = proc.src.sub(proc.rpos + 2u, 8u);
2768 uint32_t codepoint_val = {};
2769 if(C4_UNLIKELY(!
read_hex(codepoint, &codepoint_val)))
2770 _c4err(
"failed to parse \\U codepoint. scalar pos={}", proc.rpos);
2771 const size_t numbytes =
decode_code_point((uint8_t*)readbuf,
sizeof(readbuf), codepoint_val);
2772 if(C4_UNLIKELY(numbytes == 0))
2773 _c4err(
"failed to decode code point={}", proc.rpos);
2774 _RYML_ASSERT_BASIC_(callbacks(), numbytes <= 4);
2775 proc.translate_esc_bulk(readbuf, numbytes, 9u);
2778 else if(next ==
'0')
2780 proc.translate_esc(
'\0');
2782 else if(next ==
'b')
2784 proc.translate_esc(
'\b');
2786 else if(next ==
'f')
2788 proc.translate_esc(
'\f');
2790 else if(next ==
'a')
2792 proc.translate_esc(
'\a');
2794 else if(next ==
'v')
2796 proc.translate_esc(
'\v');
2798 else if(next ==
'e')
2800 proc.translate_esc(
'\x1b');
2802 else if(next ==
'_')
2805 const char payload[] = {
2806 _RYML_CHCONST(-0x3e, 0xc2),
2807 _RYML_CHCONST(-0x60, 0xa0),
2809 proc.translate_esc_bulk(payload, 2, 1);
2811 else if(next ==
'N')
2814 const char payload[] = {
2815 _RYML_CHCONST(-0x3e, 0xc2),
2816 _RYML_CHCONST(-0x7b, 0x85),
2818 proc.translate_esc_bulk(payload, 2, 1);
2820 else if(next ==
'L')
2823 const char payload[] = {
2824 _RYML_CHCONST(-0x1e, 0xe2),
2825 _RYML_CHCONST(-0x80, 0x80),
2826 _RYML_CHCONST(-0x58, 0xa8),
2828 proc.translate_esc_extending(payload, 3, 1);
2830 else if(next ==
'P')
2833 const char payload[] = {
2834 _RYML_CHCONST(-0x1e, 0xe2),
2835 _RYML_CHCONST(-0x80, 0x80),
2836 _RYML_CHCONST(-0x57, 0xa9),
2838 proc.translate_esc_extending(payload, 3, 1);
2840 else if(next ==
'\0')
2846 _c4err(
"unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2848 _c4dbgfdq(
"backslash...sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2852 template<
class EventHandler>
2853 template<
class FilterProcessor>
2854 auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2856 _c4dbgfdq(
"before=[{}]~~~{}~~~", proc.src.len, proc.src);
2859 while(proc.has_more_chars())
2861 const char curr = proc.curr();
2862 _c4dbgfdq(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2868 _c4dbgfdq(
"whitespace", curr);
2869 _filter_ws_copy_trailing(proc);
2874 _c4dbgfdq(
"newline", curr);
2875 _filter_nl_dquoted(proc);
2880 _c4dbgfdq(
"carriage return, ignore", curr);
2886 _filter_dquoted_backslash(proc);
2896 _c4dbgfdq(
"after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2897 return proc.result();
2903 template<
class EventHandler>
2906 FilterProcessorSrcDst proc(scalar, dst);
2907 return _filter_dquoted(proc);
2910 template<
class EventHandler>
2913 FilterProcessorInplaceMidExtending proc(dst, cap);
2914 return _filter_dquoted(proc);
2923 C4_NO_INLINE
inline size_t _find_last_newline_and_larger_indentation(csubstr s,
size_t indentation) noexcept
2925 if(indentation + 1 > s.len)
2927 for(
size_t i = s.len-indentation-1; i !=
size_t(-1); --i)
2929 if(s.str[i] ==
'\n')
2931 csubstr rem = s.sub(i + 1);
2932 size_t first = rem.first_not_of(
' ');
2933 first = (first !=
npos) ? first : rem.len;
2934 if(first > indentation)
2941 template<
class EventHandler>
2942 template<
class FilterProcessor>
2943 void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc,
BlockChomp_e chomp,
size_t indentation)
2946 _RYML_ASSERT_BASIC_(this->callbacks(), proc.rem().first_not_of(
" \n\r") ==
npos);
2950 #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2952 #define _c4dbgchomp(...)
2957 size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
2960 _c4dbgchomp(
"found newline and larger indentation. last={}", last);
2961 last = proc.rpos + last + size_t(1) + indentation;
2962 _RYML_ASSERT_BASIC_(this->callbacks(), last <= proc.src.len);
2964 while((proc.rpos < last) && proc.has_more_chars())
2966 const char curr = proc.curr();
2967 _c4dbgchomp(
"curr='{}'", _c4prc(curr));
2972 _c4dbgchomp(
"newline! remlen={}", proc.rem().len);
2975 csubstr at_next_line = proc.rem();
2976 if(at_next_line.begins_with(
' '))
2978 _c4dbgchomp(
"next line begins with spaces. indentation={}", indentation);
2980 size_t first_non_space = at_next_line.first_not_of(
' ');
2981 _c4dbgchomp(
"first_non_space={}", first_non_space);
2982 if(first_non_space ==
npos)
2984 _c4dbgchomp(
"{} spaces, to the end", at_next_line.len);
2985 first_non_space = at_next_line.len;
2987 if(first_non_space <= indentation)
2989 _c4dbgchomp(
"skip spaces={}<=indentation={}", first_non_space, indentation);
2990 proc.skip(first_non_space);
2994 _c4dbgchomp(
"skip indentation={}<spaces={}", indentation, first_non_space);
2995 proc.skip(indentation);
2997 _c4dbgchomp(
"copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
2998 proc.copy(first_non_space - indentation);
3019 bool had_one =
false;
3020 while(proc.has_more_chars())
3022 const char curr = proc.curr();
3023 _c4dbgchomp(
"CLIP: '{}'", _c4prc(curr));
3028 _c4dbgchomp(
"copy newline!", curr);
3036 _c4dbgchomp(
"skip!", curr);
3043 _c4dbgchomp(
"chomp=CLIP: add missing newline @{}", proc.wpos);
3050 _c4dbgchomp(
"chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
3051 while(proc.has_more_chars())
3053 const char curr = proc.curr();
3054 _c4dbgchomp(
"KEEP: '{}'", _c4prc(curr));
3058 _c4dbgchomp(
"copy newline!", curr);
3063 _c4dbgchomp(
"skip!", curr);
3072 _c4dbgchomp(
"chomp=STRIP: strip {} characters", proc.rem().len);
3084 #define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3086 #define _c4dbgfb(...)
3089 template<
class EventHandler>
3090 template<
class FilterProcessor>
3091 void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc,
size_t indentation)
3093 csubstr rem = proc.rem();
3096 size_t first = rem.first_not_of(
' ');
3099 _c4dbgfb(
"{} spaces follow before next nonws character", first);
3100 if(first < indentation)
3102 _c4dbgfb(
"skip {}<{} spaces from indentation", first, indentation);
3107 _c4dbgfb(
"skip {} spaces from indentation", indentation);
3108 proc.skip(indentation);
3111 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
3114 _c4dbgfb(
"all spaces to the end: {} spaces", first);
3118 if(first < indentation)
3120 _c4dbgfb(
"skip everything", first);
3121 proc.skip(proc.src.len - proc.rpos);
3125 _c4dbgfb(
"skip {} spaces from indentation", indentation);
3126 proc.skip(indentation);
3134 template<
class EventHandler>
3135 template<
class FilterProcessor>
3136 size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc,
BlockChomp_e chomp)
3138 csubstr contents = proc.src.trimr(
" \n\r");
3139 _c4dbgfb(
"ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
3142 _c4dbgfb(
"ws: all whitespace: len={}", proc.src.len);
3145 _c4dbgfb(
"ws: chomp=KEEP all {} newlines", proc.src.count(
'\n'));
3146 while(proc.has_more_chars())
3148 const char curr = proc.curr();
3160 return contents.len;
3163 template<
class EventHandler>
3164 template<
class FilterProcessor>
3165 size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc,
size_t contents_len)
3167 _c4dbgfb(
"contents_len={}", contents_len);
3169 _RYML_ASSERT_BASIC_(this->callbacks(), contents_len > 0u);
3173 size_t firstnewl = proc.src.first_of(
'\n', contents_len);
3174 if(firstnewl !=
npos)
3176 contents_len = firstnewl;
3177 _c4dbgfb(
"contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3181 contents_len = proc.src.len;
3182 _c4dbgfb(
"contents_len={} <--- src.len={}", contents_len, proc.src.len);
3185 return contents_len;
3197 #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3199 #define _c4dbgfbl(...)
3202 template<
class EventHandler>
3203 template<
class FilterProcessor>
3204 auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
BlockChomp_e chomp) -> decltype(proc.result())
3206 _c4dbgfbl(
"indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3208 size_t contents_len = _handle_all_whitespace(proc, chomp);
3210 return proc.result();
3212 contents_len = _extend_to_chomp(proc, contents_len);
3214 _c4dbgfbl(
"to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3216 _filter_block_indentation(proc, indentation);
3219 while(proc.has_more_chars(contents_len))
3221 const char curr = proc.curr();
3222 _c4dbgfbl(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3227 _c4dbgfbl(
"found newline. skip indentation on the next line", curr);
3229 _filter_block_indentation(proc, indentation);
3241 _c4dbgfbl(
"before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3243 _filter_chomp(proc, chomp, indentation);
3245 _c4dbgfbl(
"final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3247 return proc.result();
3252 template<
class EventHandler>
3255 FilterProcessorSrcDst proc(scalar, dst);
3256 return _filter_block_literal(proc, indentation, chomp);
3259 template<
class EventHandler>
3262 FilterProcessorInplaceEndExtending proc(scalar, cap);
3263 return _filter_block_literal(proc, indentation, chomp);
3273 #define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3275 #define _c4dbgfbf(...)
3279 template<
class EventHandler>
3280 template<
class FilterProcessor>
3281 void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len)
3283 _filter_block_indentation(proc, indentation);
3284 while(proc.has_more_chars(len))
3286 const char curr = proc.curr();
3287 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3291 _c4dbgfbf(
"newline.", curr);
3293 _filter_block_indentation(proc, indentation);
3301 size_t first = proc.rem().first_not_of(
" \t");
3302 _c4dbgfbf(
"space. first={}", first);
3304 first = proc.rem().len;
3305 _c4dbgfbf(
"... indentation increased to {}", first);
3306 _filter_block_folded_indented_block(proc, indentation, len, first);
3310 _c4dbgfbf(
"newl leading: not space, not newline. stop.", 0);
3316 template<
class EventHandler>
3317 template<
class FilterProcessor>
3318 size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc,
size_t num_newl,
size_t wpos_at_first_newl)
3323 _c4dbgfbf(
"... this is the first newline. turn into space. wpos={}", proc.wpos);
3324 wpos_at_first_newl = proc.wpos;
3329 _c4dbgfbf(
"... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3330 _RYML_ASSERT_BASIC_(this->callbacks(), wpos_at_first_newl !=
npos);
3331 _RYML_ASSERT_BASIC_(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
' ');
3332 _RYML_ASSERT_BASIC_(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos);
3334 proc.set_at(wpos_at_first_newl,
'\n');
3335 _RYML_ASSERT_BASIC_(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
'\n');
3338 _c4dbgfbf(
"... subsequent newline (num_newl={}). copy", num_newl);
3342 return wpos_at_first_newl;
3345 template<
class EventHandler>
3346 template<
class FilterProcessor>
3347 void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len)
3349 _RYML_ASSERT_BASIC_(this->callbacks(), proc.curr() ==
'\n');
3350 size_t num_newl = 0;
3351 size_t wpos_at_first_newl =
npos;
3352 while(proc.has_more_chars(len))
3354 const char curr = proc.curr();
3355 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3360 _c4dbgfbf(
"newline. sofar={}", num_newl);
3396 wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3397 _filter_block_indentation(proc, indentation);
3403 size_t first = proc.rem().first_not_of(
" \t");
3404 _c4dbgfbf(
"space. first={}", first);
3406 first = proc.rem().len;
3407 _c4dbgfbf(
"... indentation increased to {}", first);
3410 _c4dbgfbf(
"... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3411 proc.set_at(wpos_at_first_newl,
'\n');
3415 _c4dbgfbf(
"... add missing newline", wpos_at_first_newl);
3418 _filter_block_folded_indented_block(proc, indentation, len, first);
3420 wpos_at_first_newl =
npos;
3427 _c4dbgfbf(
"not space, not newline. stop.", 0);
3434 template<
class EventHandler>
3435 template<
class FilterProcessor>
3436 void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len,
size_t curr_indentation) noexcept
3438 _RYML_ASSERT_BASIC_(this->callbacks(), (proc.rem().first_not_of(
" \t") == curr_indentation) || (proc.rem().first_not_of(
" \t") ==
npos));
3439 if(curr_indentation)
3440 proc.copy(curr_indentation);
3441 while(proc.has_more_chars(len))
3443 const char curr = proc.curr();
3444 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3450 _filter_block_indentation(proc, indentation);
3451 csubstr rem = proc.rem();
3452 const size_t first = rem.first_not_of(
' ');
3453 _c4dbgfbf(
"newline. firstns={}", first);
3456 const char c = rem[first];
3457 _c4dbgfbf(
"firstns={}='{}'", first, _c4prc(c));
3458 if(c ==
'\n' || c ==
'\r')
3464 _c4dbgfbf(
"done with indented block", first);
3468 else if(first !=
npos)
3471 _c4dbgfbf(
"copy all {} spaces", first);
3489 template<
class EventHandler>
3490 template<
class FilterProcessor>
3491 auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
BlockChomp_e chomp) -> decltype(proc.result())
3493 _c4dbgfbf(
"indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3495 size_t contents_len = _handle_all_whitespace(proc, chomp);
3497 return proc.result();
3499 contents_len = _extend_to_chomp(proc, contents_len);
3501 _c4dbgfbf(
"to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3503 _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3506 while(proc.has_more_chars(contents_len))
3508 const char curr = proc.curr();
3509 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3514 _c4dbgfbf(
"found newline", curr);
3515 _filter_block_folded_newlines(proc, indentation, contents_len);
3527 _c4dbgfbf(
"before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3529 _filter_chomp(proc, chomp, indentation);
3531 _c4dbgfbf(
"final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3533 return proc.result();
3538 template<
class EventHandler>
3541 FilterProcessorSrcDst proc(scalar, dst);
3542 return _filter_block_folded(proc, indentation, chomp);
3545 template<
class EventHandler>
3548 FilterProcessorInplaceEndExtending proc(scalar, cap);
3549 return _filter_block_folded(proc, indentation, chomp);
3557 template<
class EventHandler>
3558 csubstr ParseEngine<EventHandler>::_filter_scalar_plain(substr s,
size_t indentation)
3560 _c4dbgpf(
"filtering plain scalar: s=[{}]~~~{}~~~", s.len, s);
3561 FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3562 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, r.valid());
3563 _c4dbgpf(
"filtering plain scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3569 template<
class EventHandler>
3570 csubstr ParseEngine<EventHandler>::_filter_scalar_squot(substr s)
3572 _c4dbgpf(
"filtering squo scalar: s=[{}]~~~{}~~~", s.len, s);
3573 FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3574 _RYML_ASSERT_BASIC_(this->callbacks(), r.valid());
3575 _c4dbgpf(
"filtering squo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3582 template<
class EventHandler>
3583 csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
3585 _c4dbgpf(
"filtering dquo scalar: s=[{}]~~~{}~~~", s.len, s);
3586 FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3587 if(C4_LIKELY(r.valid()))
3589 _c4dbgpf(
"filtering dquo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3594 const size_t len = r.required_len();
3595 _c4dbgpf(
"filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3596 substr dst = m_evt_handler->alloc_arena(len, &s);
3597 _c4dbgpf(
"filtering dquo scalar: dst.len={}", dst.len);
3600 _RYML_ASSERT_BASIC_(this->callbacks(), dst.len == len);
3601 FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3602 _c4dbgpf(
"filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3603 _RYML_ASSERT_BASIC_(this->callbacks(), rsd.required_len() <= len);
3604 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3605 _c4dbgpf(
"filtering dquo scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3615 template<
class EventHandler>
3616 csubstr ParseEngine<EventHandler>::_move_scalar_left_and_add_newline(substr s)
3620 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.str > m_buf.str);
3621 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.str-1 >= m_buf.str);
3623 memmove(s.str - 1, s.str, s.len);
3625 s.str[s.len] =
'\n';
3631 substr dst = m_evt_handler->alloc_arena(s.len + 1);
3633 memcpy(dst.str, s.str, s.len);
3639 template<
class EventHandler>
3640 csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s,
size_t indentation,
BlockChomp_e chomp)
3642 _c4dbgpf(
"filtering block literal scalar: s=[{}]~~~{}~~~", s.len, s);
3643 FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3645 if(C4_LIKELY(r.valid()))
3651 _c4dbgpf(
"filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3652 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1);
3655 result = _move_scalar_left_and_add_newline(s);
3657 _c4dbgpf(
"filtering block literal scalar: success! s=[{}]~~~{}~~~", result.len, result);
3663 template<
class EventHandler>
3664 csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s,
size_t indentation,
BlockChomp_e chomp)
3666 _c4dbgpf(
"filtering block folded scalar: s=[{}]~~~{}~~~", s.len, s);
3667 FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3669 if(C4_LIKELY(r.valid()))
3675 _c4dbgpf(
"filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3676 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1);
3679 result = _move_scalar_left_and_add_newline(s);
3681 _c4dbgpf(
"filtering block folded scalar: success! s=[{}]~~~{}~~~", result.len, result);
3688 template<
class EventHandler>
3689 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar
const& C4_RESTRICT sc,
size_t indentation)
3693 if(m_options.scalar_filtering())
3695 return _filter_scalar_plain(sc.scalar, indentation);
3699 _c4dbgp(
"plain scalar left unfiltered");
3700 m_evt_handler->mark_key_scalar_unfiltered();
3705 _c4dbgp(
"plain scalar doesn't need filtering");
3710 template<
class EventHandler>
3711 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar
const& C4_RESTRICT sc,
size_t indentation)
3715 if(m_options.scalar_filtering())
3717 return _filter_scalar_plain(sc.scalar, indentation);
3721 _c4dbgp(
"plain scalar left unfiltered");
3722 m_evt_handler->mark_val_scalar_unfiltered();
3727 _c4dbgp(
"plain scalar doesn't need filtering");
3735 template<
class EventHandler>
3736 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar
const& C4_RESTRICT sc)
3740 if(m_options.scalar_filtering())
3742 return _filter_scalar_squot(sc.scalar);
3746 _c4dbgp(
"squo key scalar left unfiltered");
3747 m_evt_handler->mark_key_scalar_unfiltered();
3752 _c4dbgp(
"squo key scalar doesn't need filtering");
3757 template<
class EventHandler>
3758 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar
const& C4_RESTRICT sc)
3762 if(m_options.scalar_filtering())
3764 return _filter_scalar_squot(sc.scalar);
3768 _c4dbgp(
"squo val scalar left unfiltered");
3769 m_evt_handler->mark_val_scalar_unfiltered();
3774 _c4dbgp(
"squo val scalar doesn't need filtering");
3782 template<
class EventHandler>
3783 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar
const& C4_RESTRICT sc)
3787 if(m_options.scalar_filtering())
3789 return _filter_scalar_dquot(sc.scalar);
3793 _c4dbgp(
"dquo scalar left unfiltered");
3794 m_evt_handler->mark_key_scalar_unfiltered();
3799 _c4dbgp(
"dquo scalar doesn't need filtering");
3804 template<
class EventHandler>
3805 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar
const& C4_RESTRICT sc)
3809 if(m_options.scalar_filtering())
3811 return _filter_scalar_dquot(sc.scalar);
3815 _c4dbgp(
"dquo scalar left unfiltered");
3816 m_evt_handler->mark_val_scalar_unfiltered();
3821 _c4dbgp(
"dquo scalar doesn't need filtering");
3829 template<
class EventHandler>
3830 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_literal(ScannedBlock
const& C4_RESTRICT sb)
3832 if(m_options.scalar_filtering())
3834 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3838 _c4dbgp(
"literal scalar left unfiltered");
3839 m_evt_handler->mark_key_scalar_unfiltered();
3844 template<
class EventHandler>
3845 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_literal(ScannedBlock
const& C4_RESTRICT sb)
3847 if(m_options.scalar_filtering())
3849 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3853 _c4dbgp(
"literal scalar left unfiltered");
3854 m_evt_handler->mark_val_scalar_unfiltered();
3862 template<
class EventHandler>
3863 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_folded(ScannedBlock
const& C4_RESTRICT sb)
3865 if(m_options.scalar_filtering())
3867 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3871 _c4dbgp(
"folded scalar left unfiltered");
3872 m_evt_handler->mark_key_scalar_unfiltered();
3877 template<
class EventHandler>
3878 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_folded(ScannedBlock
const& C4_RESTRICT sb)
3880 if(m_options.scalar_filtering())
3882 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3886 _c4dbgp(
"folded scalar left unfiltered");
3887 m_evt_handler->mark_val_scalar_unfiltered();
3899 template<
class EventHandler>
3900 void ParseEngine<EventHandler>::add_flags(
ParserFlag_t on, ParserState * s)
3902 char buf1_[64], buf2_[64], buf3_[64];
3903 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3904 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3905 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
3906 _c4dbgpf(
"state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
3910 template<
class EventHandler>
3913 char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
3914 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3915 csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
3916 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
3917 csubstr buf4 = detail::_parser_flags_to_str(buf4_, ((s->flags|on)&(~off)));
3918 _c4dbgpf(
"state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
3923 template<
class EventHandler>
3924 void ParseEngine<EventHandler>::rem_flags(
ParserFlag_t off, ParserState * s)
3926 char buf1_[64], buf2_[64], buf3_[64];
3927 csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
3928 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3929 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
3930 _c4dbgpf(
"state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
3934 inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf,
ParserFlag_t flags)
3937 bool gotone =
false;
3939 #define _prflag(fl) \
3940 if((flags & fl) == (fl)) \
3944 if(pos + 1 < buf.len) \
3948 csubstr fltxt = #fl; \
3949 if(pos + fltxt.len <= buf.len) \
3950 memcpy(buf.str + pos, fltxt.str, fltxt.len); \
3980 _RYML_CHECK_BASIC(pos <= buf.len);
3982 return buf.first(pos);
3992 template<
class EventHandler>
3995 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, loc.offset < m_buf.len);
3996 return m_buf.sub(loc.offset);
3999 template<
class EventHandler>
4002 if(C4_UNLIKELY(val ==
nullptr))
4003 return {m_file, 0, 0, 0};
4004 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4007 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str);
4008 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len);
4009 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4010 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
4011 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets !=
nullptr);
4012 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
4014 csubstr src = m_buf;
4015 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, val !=
nullptr || src.str ==
nullptr);
4016 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str ==
nullptr && val ==
nullptr));
4018 using lineptr_type =
size_t const* C4_RESTRICT;
4019 lineptr_type lineptr =
nullptr;
4020 size_t offset = (size_t)(val - src.begin());
4024 for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
4039 size_t count = m_newline_offsets_size;
4042 lineptr = m_newline_offsets;
4046 it = lineptr + step;
4058 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
4059 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
4060 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
4063 loc.offset = offset;
4064 loc.line = (size_t)(lineptr - m_newline_offsets);
4065 if(lineptr > m_newline_offsets)
4066 loc.col = (offset - *(lineptr-1) - 1u);
4072 template<
class EventHandler>
4073 void ParseEngine<EventHandler>::_prepare_locations()
4075 m_newline_offsets_buf = m_buf;
4076 size_t numnewlines = 1u + m_buf.count(
'\n');
4077 _resize_locations(numnewlines);
4078 m_newline_offsets_size = 0;
4079 for(
size_t i = 0; i < m_buf.len; i++)
4080 if(m_buf[i] ==
'\n')
4081 m_newline_offsets[m_newline_offsets_size++] = i;
4082 m_newline_offsets[m_newline_offsets_size++] = m_buf.len;
4083 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4086 template<
class EventHandler>
4087 void ParseEngine<EventHandler>::_resize_locations(
size_t numnewlines)
4089 if(numnewlines > m_newline_offsets_capacity)
4091 if(m_newline_offsets)
4092 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
4093 m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks,
size_t, numnewlines, m_newline_offsets);
4094 m_newline_offsets_capacity = numnewlines;
4098 template<
class EventHandler>
4099 bool ParseEngine<EventHandler>::_locations_dirty()
const
4101 return !m_newline_offsets_size;
4109 template<
class EventHandler>
4110 void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4113 if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4115 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
4117 _c4dbgpf(
"starts with whitespace: '{}'", _c4prc(m_evt_handler->m_curr->line_contents.rem.str[0]));
4121 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
4123 _c4dbgpf(
"it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4124 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4133 template<
class EventHandler>
4134 void ParseEngine<EventHandler>::_handle_colon()
4136 size_t curr = m_evt_handler->m_curr->pos.line;
4137 if(m_prev_colon !=
npos)
4139 if(curr == m_prev_colon)
4140 _c4err(
"two colons on same line");
4142 m_prev_colon = curr;
4145 template<
class EventHandler>
4146 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str,
size_t indentation,
size_t line)
4148 _c4dbgpf(
"store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, str, indentation, line);
4149 if(C4_UNLIKELY(dst->num_entries >= C4_COUNTOF(dst->annotations)))
4150 _c4err(
"too many annotations");
4151 dst->annotations[dst->num_entries].str = str;
4152 dst->annotations[dst->num_entries].indentation = indentation;
4153 dst->annotations[dst->num_entries].line = line;
4157 template<
class EventHandler>
4158 void ParseEngine<EventHandler>::_clear_annotations(Annotation *C4_RESTRICT dst)
4160 dst->num_entries = 0;
4163 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
4164 template<
class EventHandler>
4165 bool ParseEngine<EventHandler>::_handle_indentation_from_annotations()
4167 if(m_pending_anchors.num_entries == 1u || m_pending_tags.num_entries == 1u)
4169 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries < 2u && m_pending_tags.num_entries < 2u);
4170 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.annotations[0].line < m_evt_handler->m_curr->pos.line);
4171 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.annotations[1].line < m_evt_handler->m_curr->pos.line);
4172 size_t to_skip = m_evt_handler->m_curr->indref;
4173 if(m_pending_anchors.num_entries)
4174 to_skip = m_pending_anchors.annotations[0].indentation > to_skip ? m_pending_anchors.annotations[0].indentation : to_skip;
4175 if(m_pending_tags.num_entries)
4176 to_skip = m_pending_tags.annotations[0].indentation > to_skip ? m_pending_tags.annotations[0].indentation : to_skip;
4177 _c4dbgpf(
"annotations pending, skip indentation up to {}!", to_skip);
4178 _maybe_skipchars_up_to(
' ', to_skip);
4185 template<
class EventHandler>
4186 bool ParseEngine<EventHandler>::_annotations_require_key_container()
const
4188 return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4191 template<
class EventHandler>
4192 void ParseEngine<EventHandler>::_check_tag(csubstr tag)
4194 if(!tag.begins_with(
"!<"))
4196 if(C4_UNLIKELY(tag.first_of(
"[]{},") !=
npos))
4197 _c4err(
"tags must not contain any of '[]{},'");
4201 if(C4_UNLIKELY(!tag.ends_with(
'>')))
4206 template<
class EventHandler>
4207 void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4209 _c4dbgpf(
"annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4210 if(m_pending_tags.num_entries)
4212 _c4dbgpf(
"annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4213 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4215 _check_tag(m_pending_tags.annotations[0].str);
4216 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4217 _clear_annotations(&m_pending_tags);
4224 if(m_pending_anchors.num_entries)
4226 _c4dbgpf(
"annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4227 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4229 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4230 _clear_annotations(&m_pending_anchors);
4234 _c4err(
"too many anchors");
4239 template<
class EventHandler>
4240 void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4242 _c4dbgpf(
"annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4243 if(m_pending_tags.num_entries)
4245 _c4dbgpf(
"annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4246 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4248 _check_tag(m_pending_tags.annotations[0].str);
4249 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4250 _clear_annotations(&m_pending_tags);
4257 if(m_pending_anchors.num_entries)
4259 _c4dbgpf(
"annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4260 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4262 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4263 _clear_annotations(&m_pending_anchors);
4267 _c4err(
"too many anchors");
4272 template<
class EventHandler>
4273 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(
size_t current_line)
4275 _c4dbgpf(
"annotations_before_start_mapblck, current_line={}", current_line);
4276 if(m_pending_tags.num_entries == 2)
4278 _c4dbgp(
"2 tags, setting entry 0");
4279 _check_tag(m_pending_tags.annotations[0].str);
4280 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4282 else if(m_pending_tags.num_entries == 1)
4284 _c4dbgpf(
"1 tag. line={}, curr={}", m_pending_tags.annotations[0].line);
4285 if(m_pending_tags.annotations[0].line < current_line)
4287 _c4dbgp(
"...tag is for the map. setting it.");
4288 _check_tag(m_pending_tags.annotations[0].str);
4289 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4290 _clear_annotations(&m_pending_tags);
4294 if(m_pending_anchors.num_entries == 2)
4296 _c4dbgp(
"2 anchors, setting entry 0");
4297 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4299 else if(m_pending_anchors.num_entries == 1)
4301 _c4dbgpf(
"1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line);
4302 if(m_pending_anchors.annotations[0].line < current_line)
4304 _c4dbgp(
"...anchor is for the map. setting it.");
4305 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4306 _clear_annotations(&m_pending_anchors);
4311 template<
class EventHandler>
4312 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4314 _c4dbgp(
"annotations_before_start_mapblck_as_key");
4315 if(m_pending_tags.num_entries == 2)
4317 _check_tag(m_pending_tags.annotations[0].str);
4318 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4320 if(m_pending_anchors.num_entries == 2)
4322 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4326 template<
class EventHandler>
4327 void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(
size_t key_indentation,
size_t key_line)
4329 _c4dbgp(
"annotations_after_start_mapblck");
4330 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2);
4331 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2);
4332 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4334 key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4335 switch(m_pending_tags.num_entries)
4338 _check_tag(m_pending_tags.annotations[0].str);
4339 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4340 _clear_annotations(&m_pending_tags);
4343 _check_tag(m_pending_tags.annotations[1].str);
4344 m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4345 _clear_annotations(&m_pending_tags);
4348 switch(m_pending_anchors.num_entries)
4351 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4352 _clear_annotations(&m_pending_anchors);
4355 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4356 _clear_annotations(&m_pending_anchors);
4360 _set_indentation(key_indentation);
4363 template<
class EventHandler>
4364 size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(
size_t val_indentation,
size_t val_line)
4366 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries || m_pending_anchors.num_entries);
4368 auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4369 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4371 auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4372 if(ann.line > curr->line)
4374 else if(ann.indentation < curr->indentation)
4377 for(
size_t j = 0; j < m_pending_tags.num_entries; ++j)
4379 auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4380 if(ann.line > curr->line)
4382 else if(ann.indentation < curr->indentation)
4385 return curr->line < val_line ? val_indentation : curr->indentation;
4388 template<
class EventHandler>
4389 void ParseEngine<EventHandler>::_handle_directive(csubstr rem)
4391 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.is_sub(m_evt_handler->m_curr->line_contents.rem));
4392 const size_t pos = rem.find(
'#');
4393 _c4dbgpf(
"handle_directive: pos={} rem={}", pos, rem);
4396 m_evt_handler->add_directive(rem);
4397 _line_progressed(rem.len);
4401 csubstr to_comment = rem.first(pos);
4402 csubstr trimmed = to_comment.trimr(
" \t");
4403 m_evt_handler->add_directive(trimmed);
4404 _line_progressed(pos);
4409 template<
class EventHandler>
4410 bool ParseEngine<EventHandler>::_handle_bom()
4412 const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4415 const csubstr rest = rem.sub(1);
4417 #define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f')
4418 if(rem.begins_with(csubstr{
"\x00\x00\xfe\xff", 4}) || (rem.begins_with(csubstr{
"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
4420 _c4dbgp(
"byte order mark: UTF32BE");
4422 _line_progressed(4);
4426 else if(rem.begins_with(csubstr{
"\xff\xfe\x00\x00", 4}) || (rest.begins_with(csubstr{
"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
4428 _c4dbgp(
"byte order mark: UTF32LE");
4430 _line_progressed(4);
4434 else if(rem.begins_with(
"\xfe\xff") || (rem.begins_with(
'\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
4436 _c4dbgp(
"byte order mark: UTF16BE");
4438 _line_progressed(2);
4442 else if(rem.begins_with(
"\xff\xfe") || (rest.begins_with(
'\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
4444 _c4dbgp(
"byte order mark: UTF16LE");
4446 _line_progressed(2);
4450 else if(rem.begins_with(
"\xef\xbb\xbf"))
4452 _c4dbgp(
"byte order mark: UTF8");
4454 _line_progressed(3);
4463 template<
class EventHandler>
4464 void ParseEngine<EventHandler>::_handle_bom(
Encoding_e enc)
4466 if(m_encoding ==
NOBOM)
4468 if(enc ==
UTF8 || (m_evt_handler->m_curr->line_contents.rem.str == m_buf.str))
4471 _c4err(
"non-UTF8 byte order mark can appear only at the beginning of the file");
4473 else if(enc != m_encoding)
4475 _c4err(
"byte order mark can only be set once");
4482 template<
class EventHandler>
4483 void ParseEngine<EventHandler>::_handle_seq_json()
4486 _c4dbgpf(
"handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4488 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4489 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ));
4490 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW));
4491 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT));
4492 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT));
4494 _handle_flow_skip_whitespace();
4495 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4501 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4502 const char first = rem.str[0];
4503 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
4508 _c4dbgp(
"seqjson[RVAL]: scanning double-quoted scalar");
4509 ScannedScalar sc = _scan_scalar_dquot();
4510 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4511 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4517 _c4dbgp(
"seqjson[RVAL]: start child seqjson");
4519 m_evt_handler->begin_seq_val_flow();
4521 _line_progressed(1);
4526 _c4dbgp(
"seqjson[RVAL]: start child mapjson");
4528 m_evt_handler->begin_map_val_flow();
4530 _line_progressed(1);
4531 goto seqjson_finish;
4535 _c4dbgp(
"seqjson[RVAL]: end!");
4538 _line_progressed(1);
4540 goto seqjson_finish;
4546 if(_scan_scalar_seq_json(&sc))
4548 _c4dbgp(
"seqjson[RVAL]: it's a plain scalar.");
4549 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4550 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4562 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
4563 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4564 const char first = rem.str[0];
4565 _c4dbgpf(
"mapjson[RNXT]: '{}'", first);
4570 _c4dbgp(
"seqjson[RNXT]: expect next val");
4572 m_evt_handler->add_sibling();
4573 _line_progressed(1);
4578 _c4dbgp(
"seqjson[RNXT]: end!");
4580 _line_progressed(1);
4581 goto seqjson_finish;
4589 _c4dbgt(
"seqjson: go again", 0);
4590 if(_finished_line())
4592 if(C4_LIKELY(!_finished_file()))
4600 _c4err(
"missing terminating ]");
4606 _c4dbgp(
"seqjson: finish");
4612 template<
class EventHandler>
4613 void ParseEngine<EventHandler>::_handle_map_json()
4616 _c4dbgpf(
"handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4618 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
4619 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW));
4620 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4621 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT));
4622 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT)));
4624 _handle_flow_skip_whitespace();
4625 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4631 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4632 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4633 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4634 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4635 const char first = rem.str[0];
4636 _c4dbgpf(
"mapjson[RKEY]: '{}'", first);
4641 _c4dbgp(
"mapjson[RKEY]: scanning double-quoted scalar");
4642 ScannedScalar sc = _scan_scalar_dquot();
4643 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4644 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4650 _c4dbgp(
"mapjson[RKEY]: end!");
4652 _line_progressed(1);
4653 goto mapjson_finish;
4659 else if(has_any(
RVAL))
4661 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4662 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4663 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4664 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4665 const char first = rem.str[0];
4666 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
4671 _c4dbgp(
"mapjson[RVAL]: scanning double-quoted scalar");
4672 ScannedScalar sc = _scan_scalar_dquot();
4673 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4674 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4680 _c4dbgp(
"mapjson[RVAL]: start val seqjson");
4682 m_evt_handler->begin_seq_val_flow();
4683 _set_indentation(m_evt_handler->m_parent->indref);
4685 _line_progressed(1);
4686 goto mapjson_finish;
4690 _c4dbgp(
"mapjson[RVAL]: start val mapjson");
4692 m_evt_handler->begin_map_val_flow();
4693 _set_indentation(m_evt_handler->m_parent->indref);
4695 _line_progressed(1);
4702 if(_scan_scalar_map_json(&sc))
4704 _c4dbgp(
"mapjson[RVAL]: plain scalar.");
4705 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4706 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4717 else if(has_any(
RKCL))
4719 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4720 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4721 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4722 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4723 const char first = rem.str[0];
4724 _c4dbgpf(
"mapjson[RKCL]: '{}'", first);
4727 _c4dbgp(
"mapjson[RKCL]: found the colon");
4729 _line_progressed(1);
4736 else if(has_any(
RNXT))
4738 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4739 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4740 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4741 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4742 _c4dbgpf(
"mapjson[RNXT]: '{}'", rem.str[0]);
4743 if(rem.begins_with(
','))
4745 _c4dbgp(
"mapjson[RNXT]: expect next keyval");
4746 m_evt_handler->add_sibling();
4748 _line_progressed(1);
4750 else if(rem.begins_with(
'}'))
4752 _c4dbgp(
"mapjson[RNXT]: end!");
4754 _line_progressed(1);
4755 goto mapjson_finish;
4764 _c4dbgt(
"mapjson: go again", 0);
4765 if(_finished_line())
4767 if(C4_LIKELY(!_finished_file()))
4775 _c4err(
"missing terminating }");
4781 _c4dbgp(
"mapjson: finish");
4787 template<
class EventHandler>
4788 void ParseEngine<EventHandler>::_handle_seq_imap()
4791 _c4dbgpf(
"handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4793 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQIMAP));
4794 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4795 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT|
QMRK|
RKCL));
4796 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, 1 == has_all(
RVAL) + has_all(
RNXT) + has_all(
QMRK) + has_all(
RKCL));
4797 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3);
4799 _handle_flow_skip_whitespace();
4800 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4806 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL));
4807 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4808 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4809 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4810 const char first = rem.str[0];
4811 _c4dbgpf(
"seqimap[RVAL]: '{}'", _c4prc(first));
4815 _c4dbgp(
"seqimap[RVAL]: scanning single-quoted scalar");
4816 sc = _scan_scalar_squot();
4817 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
4818 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
4820 goto seqimap_finish;
4822 else if(first ==
'"')
4824 _c4dbgp(
"seqimap[RVAL]: scanning double-quoted scalar");
4825 sc = _scan_scalar_dquot();
4826 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4827 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4829 goto seqimap_finish;
4832 else if(_scan_scalar_plain_map_flow(&sc))
4834 _c4dbgp(
"seqimap[RVAL]: it's a scalar.");
4835 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4836 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4838 goto seqimap_finish;
4840 else if(first ==
'[')
4842 _c4dbgp(
"seqimap[RVAL]: start child seqflow");
4844 m_evt_handler->begin_seq_val_flow();
4846 _set_indentation(m_evt_handler->m_parent->indref);
4847 _line_progressed(1);
4848 goto seqimap_finish;
4850 else if(first ==
'{')
4852 _c4dbgp(
"seqimap[RVAL]: start child mapflow");
4854 m_evt_handler->begin_map_val_flow();
4856 _set_indentation(m_evt_handler->m_parent->indref);
4857 _line_progressed(1);
4858 goto seqimap_finish;
4860 else if(first ==
',' || first ==
']')
4862 _c4dbgp(
"seqimap[RVAL]: finish without val.");
4863 m_evt_handler->set_val_scalar_plain_empty();
4865 goto seqimap_finish;
4867 else if(first ==
'&')
4869 csubstr anchor = _scan_anchor();
4870 _c4dbgp(
"seqimap[RVAL]: anchor!");
4871 m_evt_handler->set_val_anchor(anchor);
4873 else if(first ==
'*')
4875 csubstr ref = _scan_ref_seq();
4876 _c4dbgp(
"seqimap[RVAL]: ref!");
4877 m_evt_handler->set_val_ref(ref);
4885 else if(has_any(
RNXT))
4887 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
4888 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4889 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4890 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4891 const char first = rem.str[0];
4892 _c4dbgpf(
"seqimap[RNXT]: '{}'", _c4prc(first));
4893 if(first ==
',' || first ==
']')
4897 _c4dbgp(
"seqimap: done");
4899 goto seqimap_finish;
4906 else if(has_any(
QMRK))
4908 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
QMRK));
4909 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4910 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4911 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4912 const char first = rem.str[0];
4913 _c4dbgpf(
"seqimap[QMRK]: '{}'", _c4prc(first));
4917 _c4dbgp(
"seqimap[QMRK]: scanning single-quoted scalar");
4918 sc = _scan_scalar_squot();
4919 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
4920 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
4924 else if(first ==
'"')
4926 _c4dbgp(
"seqimap[QMRK]: scanning double-quoted scalar");
4927 sc = _scan_scalar_dquot();
4928 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4929 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4934 else if(_scan_scalar_plain_map_flow(&sc))
4936 _c4dbgp(
"seqimap[QMRK]: it's a scalar.");
4937 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
4938 m_evt_handler->set_key_scalar_plain(maybe_filtered);
4942 else if(first ==
'[')
4944 _c4dbgp(
"seqimap[QMRK]: start child seqflow");
4946 m_evt_handler->begin_seq_key_flow();
4948 _set_indentation(m_evt_handler->m_parent->indref);
4949 _line_progressed(1);
4950 goto seqimap_finish;
4952 else if(first ==
'{')
4954 _c4dbgp(
"seqimap[QMRK]: start child mapflow");
4956 m_evt_handler->begin_map_key_flow();
4958 _set_indentation(m_evt_handler->m_parent->indref);
4959 _line_progressed(1);
4960 goto seqimap_finish;
4962 else if(first ==
',' || first ==
']')
4964 _c4dbgp(
"seqimap[QMRK]: finish without key.");
4965 m_evt_handler->set_key_scalar_plain_empty();
4966 m_evt_handler->set_val_scalar_plain_empty();
4968 goto seqimap_finish;
4970 else if(first ==
'&')
4972 csubstr anchor = _scan_anchor();
4973 _c4dbgp(
"seqimap[QMRK]: anchor!");
4974 m_evt_handler->set_key_anchor(anchor);
4976 else if(first ==
'*')
4978 csubstr ref = _scan_ref_seq();
4979 _c4dbgp(
"seqimap[QMRK]: ref!");
4980 m_evt_handler->set_key_ref(ref);
4988 else if(has_any(
RKCL))
4990 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4991 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4992 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4993 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RKCL));
4994 const char first = rem.str[0];
4995 _c4dbgpf(
"seqimap[RKCL]: '{}'", _c4prc(first));
4998 _c4dbgp(
"seqimap[RKCL]: found ':'");
5000 _line_progressed(1);
5003 else if(first ==
',' || first ==
']')
5005 _c4dbgp(
"seqimap[RKCL]: found ','. finish without val");
5006 m_evt_handler->set_val_scalar_plain_empty();
5008 goto seqimap_finish;
5017 _c4dbgt(
"seqimap: go again", 0);
5018 if(_finished_line())
5020 if(C4_LIKELY(!_finished_file()))
5034 _c4dbgp(
"seqimap: finish");
5040 template<
class EventHandler>
5041 void ParseEngine<EventHandler>::_handle_seq_flow()
5044 _c4dbgpf(
"handle_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5046 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5047 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ));
5048 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW));
5049 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT));
5050 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT));
5051 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos);
5053 _handle_flow_skip_whitespace();
5055 if(!m_evt_handler->m_curr->line_contents.rem.len)
5060 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5061 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5065 _c4dbgp(
"seqflow[RVAL]: scanning single-quoted scalar");
5066 sc = _scan_scalar_squot();
5067 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5068 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5071 else if(first ==
'"')
5073 _c4dbgp(
"seqflow[RVAL]: scanning double-quoted scalar");
5074 sc = _scan_scalar_dquot();
5075 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5076 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5080 else if(_scan_scalar_plain_seq_flow(&sc))
5082 _c4dbgp(
"seqflow[RVAL]: it's a scalar.");
5083 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5084 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5087 else if(first ==
'[')
5089 _c4dbgp(
"seqflow[RVAL]: start child seqflow");
5091 m_evt_handler->begin_seq_val_flow();
5092 _set_indentation(m_evt_handler->m_parent->indref);
5094 _line_progressed(1);
5096 else if(first ==
'{')
5098 _c4dbgp(
"seqflow[RVAL]: start child mapflow");
5100 m_evt_handler->begin_map_val_flow();
5101 _set_indentation(m_evt_handler->m_parent->indref);
5103 _line_progressed(1);
5104 goto seqflow_finish;
5106 else if(first ==
']')
5108 _c4dbgp(
"seqflow[RVAL]: end!");
5109 _line_progressed(1);
5111 goto seqflow_finish;
5113 else if(first ==
'*')
5115 csubstr ref = _scan_ref_seq();
5116 _c4dbgpf(
"seqflow[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5117 m_evt_handler->set_val_ref(ref);
5120 else if(first ==
'&')
5122 csubstr anchor = _scan_anchor();
5123 _c4dbgpf(
"seqflow[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5124 m_evt_handler->set_val_anchor(anchor);
5125 if(_maybe_scan_following_comma())
5127 _c4dbgp(
"seqflow[RVAL]: empty scalar!");
5128 m_evt_handler->set_val_scalar_plain_empty();
5129 m_evt_handler->add_sibling();
5132 else if(first ==
'!')
5134 csubstr tag = _scan_tag();
5135 _c4dbgpf(
"seqflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5137 m_evt_handler->set_val_tag(tag);
5138 if(_maybe_scan_following_comma())
5140 _c4dbgp(
"seqflow[RVAL]: empty scalar!");
5141 m_evt_handler->set_val_scalar_plain_empty();
5142 m_evt_handler->add_sibling();
5145 else if(first ==
':')
5147 _c4dbgpf(
"seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5149 m_evt_handler->begin_map_val_flow();
5150 _set_indentation(m_evt_handler->m_parent->indref);
5151 m_evt_handler->set_key_scalar_plain_empty();
5153 _line_progressed(1);
5154 goto seqflow_finish;
5156 else if(first ==
'?')
5158 _c4dbgp(
"seqflow[RVAL]: start child mapflow, explicit key");
5160 m_was_inside_qmrk =
true;
5161 m_evt_handler->begin_map_val_flow();
5162 _set_indentation(m_evt_handler->m_parent->indref);
5164 _line_progressed(1);
5165 _maybe_skip_whitespace_tokens();
5166 goto seqflow_finish;
5175 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
5176 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5177 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5180 _c4dbgp(
"seqflow[RNXT]: expect next val");
5182 m_evt_handler->add_sibling();
5183 _line_progressed(1);
5185 else if(first ==
']')
5187 _c4dbgp(
"seqflow[RNXT]: end!");
5188 _line_progressed(1);
5190 goto seqflow_finish;
5192 else if(first ==
':')
5194 _c4dbgpf(
"seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5195 m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5196 _set_indentation(m_evt_handler->m_parent->indref);
5197 _line_progressed(1);
5199 goto seqflow_finish;
5208 _c4dbgt(
"seqflow: go again", 0);
5209 if(_finished_line())
5211 if(C4_LIKELY(!_finished_file()))
5219 _c4err(
"missing terminating ]");
5225 _c4dbgp(
"seqflow: finish");
5231 template<
class EventHandler>
5232 void ParseEngine<EventHandler>::_handle_map_flow()
5235 _c4dbgpf(
"handle_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5237 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
5238 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW));
5240 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)));
5242 _handle_flow_skip_whitespace();
5243 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5249 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5250 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5251 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5252 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5253 const char first = rem.str[0];
5254 _c4dbgpf(
"mapflow[RKEY]: '{}'", first);
5258 _c4dbgp(
"mapflow[RKEY]: scanning single-quoted scalar");
5259 sc = _scan_scalar_squot();
5260 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5261 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5264 else if(first ==
'"')
5266 _c4dbgp(
"mapflow[RKEY]: scanning double-quoted scalar");
5267 sc = _scan_scalar_dquot();
5268 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5269 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5273 else if(_scan_scalar_plain_map_flow(&sc))
5275 _c4dbgp(
"mapflow[RKEY]: plain scalar");
5276 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5277 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5280 else if(first ==
'?')
5282 _c4dbgp(
"mapflow[RKEY]: explicit key");
5283 _line_progressed(1);
5285 _maybe_skip_whitespace_tokens();
5287 else if(first ==
':')
5289 _c4dbgp(
"mapflow[RKEY]: setting empty key");
5290 m_evt_handler->set_key_scalar_plain_empty();
5292 _line_progressed(1);
5293 _maybe_skip_whitespace_tokens();
5295 else if(first ==
',')
5297 _c4dbgp(
"mapflow[RKEY]: empty key+val!");
5298 m_evt_handler->set_key_scalar_plain_empty();
5299 m_evt_handler->set_val_scalar_plain_empty();
5303 else if(first ==
'}')
5305 _c4dbgp(
"mapflow[RKEY]: end!");
5306 _line_progressed(1);
5308 goto mapflow_finish;
5310 else if(first ==
'&')
5312 csubstr anchor = _scan_anchor();
5313 _c4dbgpf(
"mapflow[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5314 m_evt_handler->set_key_anchor(anchor);
5316 else if(first ==
'*')
5318 csubstr ref = _scan_ref_map();
5319 _c4dbgpf(
"mapflow[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
5320 m_evt_handler->set_key_ref(ref);
5323 else if(first ==
'[')
5328 _c4dbgp(
"mapflow[RKEY]: start child seqflow (!)");
5330 m_evt_handler->begin_seq_key_flow();
5332 _set_indentation(m_evt_handler->m_parent->indref);
5333 _line_progressed(1);
5334 goto mapflow_finish;
5336 else if(first ==
'{')
5341 _c4dbgp(
"mapflow[RKEY]: start child mapflow (!)");
5343 m_evt_handler->begin_map_key_flow();
5345 _set_indentation(m_evt_handler->m_parent->indref);
5346 _line_progressed(1);
5349 else if(first ==
'!')
5351 csubstr tag = _scan_tag();
5352 _c4dbgpf(
"mapflow[RKEY]: tag! [{}]~~~{}~~~", tag.len, tag);
5354 m_evt_handler->set_key_tag(tag);
5361 else if(has_any(
RKCL))
5363 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5364 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5365 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5366 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5367 const char first = rem.str[0];
5368 _c4dbgpf(
"mapflow[RKCL]: '{}'", first);
5371 _c4dbgp(
"mapflow[RKCL]: found the colon");
5373 _line_progressed(1);
5375 else if(first ==
'}')
5377 _c4dbgp(
"mapflow[RKCL]: end with missing val!");
5379 m_evt_handler->set_val_scalar_plain_empty();
5380 _line_progressed(1);
5382 goto mapflow_finish;
5384 else if(first ==
',')
5386 _c4dbgp(
"mapflow[RKCL]: got comma. val is missing");
5387 m_evt_handler->set_val_scalar_plain_empty();
5388 m_evt_handler->add_sibling();
5390 _line_progressed(1);
5397 else if(has_any(
RVAL))
5399 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5400 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5401 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5402 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5403 const char first = rem.str[0];
5404 _c4dbgpf(
"mapflow[RVAL]: '{}'", first);
5408 _c4dbgp(
"mapflow[RVAL]: scanning single-quoted scalar");
5409 sc = _scan_scalar_squot();
5410 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5411 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5414 else if(first ==
'"')
5416 _c4dbgp(
"mapflow[RVAL]: scanning double-quoted scalar");
5417 sc = _scan_scalar_dquot();
5418 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5419 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5423 else if(_scan_scalar_plain_map_flow(&sc))
5425 _c4dbgp(
"mapflow[RVAL]: plain scalar.");
5426 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5427 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5430 else if(first ==
'[')
5432 _c4dbgp(
"mapflow[RVAL]: start val seqflow");
5434 m_evt_handler->begin_seq_val_flow();
5435 _set_indentation(m_evt_handler->m_parent->indref);
5437 _line_progressed(1);
5438 goto mapflow_finish;
5440 else if(first ==
'{')
5442 _c4dbgp(
"mapflow[RVAL]: start val mapflow");
5444 m_evt_handler->begin_map_val_flow();
5445 _set_indentation(m_evt_handler->m_parent->indref);
5447 _line_progressed(1);
5450 else if(first ==
'}')
5452 _c4dbgp(
"mapflow[RVAL]: end!");
5453 m_evt_handler->set_val_scalar_plain_empty();
5454 _line_progressed(1);
5456 goto mapflow_finish;
5458 else if(first ==
',')
5460 _c4dbgp(
"mapflow[RVAL]: empty val!");
5461 m_evt_handler->set_val_scalar_plain_empty();
5465 else if(first ==
'*')
5467 csubstr ref = _scan_ref_map();
5468 _c4dbgpf(
"mapflow[RVAL]: key ref! [{}]~~~{}~~~", ref.len, ref);
5469 m_evt_handler->set_val_ref(ref);
5472 else if(first ==
'&')
5474 csubstr anchor = _scan_anchor();
5475 _c4dbgpf(
"mapflow[RVAL]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5476 m_evt_handler->set_val_anchor(anchor);
5478 else if(first ==
'!')
5480 csubstr tag = _scan_tag();
5481 _c4dbgpf(
"mapflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5483 m_evt_handler->set_val_tag(tag);
5490 else if(has_any(
RNXT))
5492 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5493 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5494 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5495 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5496 _c4dbgpf(
"mapflow[RNXT]: '{}'", rem.str[0]);
5497 if(rem.begins_with(
','))
5499 _c4dbgp(
"mapflow[RNXT]: expect next keyval");
5500 m_evt_handler->add_sibling();
5502 _line_progressed(1);
5504 else if(rem.begins_with(
'}'))
5506 _c4dbgp(
"mapflow[RNXT]: end!");
5507 _line_progressed(1);
5509 goto mapflow_finish;
5516 else if(has_any(
QMRK))
5518 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5519 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5520 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5521 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5522 const char first = rem.str[0];
5523 _c4dbgpf(
"mapflow[QMRK]: '{}'", first);
5527 _c4dbgp(
"mapflow[QMRK]: scanning single-quoted scalar");
5528 sc = _scan_scalar_squot();
5529 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5530 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5533 else if(first ==
'"')
5535 _c4dbgp(
"mapflow[QMRK]: scanning double-quoted scalar");
5536 sc = _scan_scalar_dquot();
5537 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5538 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5542 else if(_scan_scalar_plain_map_flow(&sc))
5544 _c4dbgp(
"mapflow[QMRK]: plain scalar");
5545 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5546 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5549 else if(first ==
':')
5551 _c4dbgp(
"mapflow[QMRK]: setting empty key");
5552 m_evt_handler->set_key_scalar_plain_empty();
5554 _line_progressed(1);
5555 _maybe_skip_whitespace_tokens();
5557 else if(first ==
'}')
5559 _c4dbgp(
"mapflow[QMRK]: end!");
5560 m_evt_handler->set_key_scalar_plain_empty();
5561 m_evt_handler->set_val_scalar_plain_empty();
5563 _line_progressed(1);
5564 goto mapflow_finish;
5566 else if(first ==
',')
5568 _c4dbgp(
"mapflow[QMRK]: empty key+val!");
5569 m_evt_handler->set_key_scalar_plain_empty();
5570 m_evt_handler->set_val_scalar_plain_empty();
5573 else if(first ==
'&')
5575 csubstr anchor = _scan_anchor();
5576 _c4dbgpf(
"mapflow[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5577 m_evt_handler->set_key_anchor(anchor);
5579 else if(first ==
'*')
5581 csubstr ref = _scan_ref_map();
5582 _c4dbgpf(
"mapflow[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
5583 m_evt_handler->set_key_ref(ref);
5586 else if(first ==
'[')
5591 _c4dbgp(
"mapflow[QMRK]: start child seqflow (!)");
5593 m_evt_handler->begin_seq_key_flow();
5595 _set_indentation(m_evt_handler->m_parent->indref);
5596 _line_progressed(1);
5597 goto mapflow_finish;
5599 else if(first ==
'{')
5604 _c4dbgp(
"mapflow[QMRK]: start child mapflow (!)");
5606 m_evt_handler->begin_map_key_flow();
5607 _set_indentation(m_evt_handler->m_parent->indref);
5609 _line_progressed(1);
5612 else if(first ==
'!')
5614 csubstr tag = _scan_tag();
5615 _c4dbgpf(
"mapflow[QMRK]: tag! [{}]~~~{}~~~", tag.len, tag);
5617 m_evt_handler->set_key_tag(tag);
5626 _c4dbgt(
"mapflow: go again", 0);
5627 if(_finished_line())
5629 if(C4_LIKELY(!_finished_file()))
5637 _c4err(
"missing terminating }");
5643 _c4dbgp(
"mapflow: finish");
5649 template<
class EventHandler>
5650 void ParseEngine<EventHandler>::_handle_seq_block()
5653 _c4dbgpf(
"handle_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5655 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ));
5656 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RBLCK));
5657 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT));
5658 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RVAL) + has_any(
RNXT)));
5660 _maybe_skip_comment();
5661 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5667 _c4dbgpf(
"seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
5668 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5669 if(m_evt_handler->m_curr->at_line_beginning())
5671 _c4dbgpf(
"seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5672 if(m_evt_handler->m_curr->indentation_ge())
5674 _c4dbgpf(
"seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
5675 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5676 rem = m_evt_handler->m_curr->line_contents.rem;
5680 else if(m_evt_handler->m_curr->indentation_lt())
5682 _c4dbgp(
"seqblck[RVAL]: smaller indentation!");
5683 _handle_indentation_pop_from_block_seq();
5684 goto seqblck_finish;
5686 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
5688 _c4dbgp(
"seqblck[RVAL]: empty line!");
5689 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5693 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
5701 if(rem.str[0] ==
' ')
5703 if(_handle_indentation_from_annotations())
5705 _c4dbgp(
"seqblck[RVAL]: annotations!");
5706 rem = m_evt_handler->m_curr->line_contents.rem;
5713 _RYML_ASSERT_BASIC_(callbacks(), rem.len);
5714 _c4dbgpf(
"seqblck[RVAL]: '{}' node_id={}", rem.str[0], m_evt_handler->m_curr->node_id);
5715 const char first = rem.str[0];
5716 const size_t startline = m_evt_handler->m_curr->pos.line;
5719 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col() - m_bom_len;
5723 _c4dbgp(
"seqblck[RVAL]: single-quoted scalar");
5724 sc = _scan_scalar_squot();
5725 if(!_maybe_scan_following_colon())
5727 _c4dbgp(
"seqblck[RVAL]: set as val");
5728 _handle_annotations_before_blck_val_scalar();
5729 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5730 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5735 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
5737 _handle_annotations_before_start_mapblck(startline);
5739 m_evt_handler->begin_map_val_block();
5740 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5741 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5742 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5744 _maybe_skip_whitespace_tokens();
5745 goto seqblck_finish;
5748 else if(first ==
'"')
5750 _c4dbgp(
"seqblck[RVAL]: double-quoted scalar");
5751 sc = _scan_scalar_dquot();
5752 if(!_maybe_scan_following_colon())
5754 _c4dbgp(
"seqblck[RVAL]: set as val");
5755 _handle_annotations_before_blck_val_scalar();
5756 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5757 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5762 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
5764 _handle_annotations_before_start_mapblck(startline);
5766 m_evt_handler->begin_map_val_block();
5767 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5768 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5769 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5771 _maybe_skip_whitespace_tokens();
5772 goto seqblck_finish;
5778 else if(first ==
'|')
5780 _c4dbgp(
"seqblck[RVAL]: block-literal scalar");
5782 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5783 _handle_annotations_before_blck_val_scalar();
5784 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
5785 m_evt_handler->set_val_scalar_literal(maybe_filtered);
5788 else if(first ==
'>')
5790 _c4dbgp(
"seqblck[RVAL]: block-folded scalar");
5792 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5793 _handle_annotations_before_blck_val_scalar();
5794 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
5795 m_evt_handler->set_val_scalar_folded(maybe_filtered);
5798 else if(_scan_scalar_plain_seq_blck(&sc))
5800 _c4dbgp(
"seqblck[RVAL]: plain scalar.");
5801 if(!_maybe_scan_following_colon())
5803 _c4dbgp(
"seqblck[RVAL]: set as val");
5804 _handle_annotations_before_blck_val_scalar();
5805 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5806 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5811 if(startindent > m_evt_handler->m_curr->indref)
5813 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
5815 _handle_annotations_before_start_mapblck(startline);
5817 m_evt_handler->begin_map_val_block();
5818 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5819 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5820 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5822 _maybe_skip_whitespace_tokens();
5823 goto seqblck_finish;
5825 else if(m_evt_handler->m_parent && m_evt_handler->m_parent->indref == startindent && has_any(
RMAP|
RBLCK, m_evt_handler->m_parent))
5827 _c4dbgp(
"seqblck[RVAL]: empty val + end indentless seq + set key");
5828 m_evt_handler->set_val_scalar_plain_empty();
5829 m_evt_handler->end_seq_block();
5830 m_evt_handler->add_sibling();
5831 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5832 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5834 _maybe_skip_whitespace_tokens();
5835 goto seqblck_finish;
5843 else if(first ==
'[')
5845 _c4dbgp(
"seqblck[RVAL]: start child seqflow");
5847 _handle_annotations_before_blck_val_scalar();
5848 m_evt_handler->begin_seq_val_flow();
5850 _line_progressed(1);
5851 _set_indentation(m_evt_handler->m_parent->indref + 1u);
5852 goto seqblck_finish;
5854 else if(first ==
'{')
5856 _c4dbgp(
"seqblck[RVAL]: start child mapflow");
5858 _handle_annotations_before_blck_val_scalar();
5859 m_evt_handler->begin_map_val_flow();
5861 _line_progressed(1);
5862 _set_indentation(m_evt_handler->m_parent->indref + 1u);
5863 goto seqblck_finish;
5865 else if(first ==
'-')
5867 if(startindent == m_evt_handler->m_curr->indref)
5869 _c4dbgp(
"seqblck[RVAL]: prev val was empty");
5870 _handle_annotations_before_blck_val_scalar();
5871 m_evt_handler->set_val_scalar_plain_empty();
5873 m_evt_handler->add_sibling();
5877 _c4dbgp(
"seqblck[RVAL]: start child seqblck");
5878 _RYML_ASSERT_BASIC_(this->callbacks(), startindent > m_evt_handler->m_curr->indref);
5880 _handle_annotations_before_blck_val_scalar();
5881 m_evt_handler->begin_seq_val_block();
5883 _set_indentation(startindent);
5886 _line_progressed(1);
5887 _maybe_skip_whitespace_tokens();
5889 else if(first ==
':')
5891 _c4dbgp(
"seqblck[RVAL]: start child mapblck with empty key");
5893 _handle_annotations_before_start_mapblck(startline);
5895 m_evt_handler->begin_map_val_block();
5896 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5897 m_evt_handler->set_key_scalar_plain_empty();
5899 _line_progressed(1);
5900 _maybe_skip_whitespace_tokens();
5901 goto seqblck_finish;
5903 else if(first ==
'&')
5905 const csubstr anchor = _scan_anchor();
5906 _c4dbgpf(
"seqblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5909 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
5911 else if(first ==
'*')
5913 csubstr ref = _scan_ref_seq();
5914 _c4dbgpf(
"seqblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5915 if(!_maybe_scan_following_colon())
5917 _c4dbgp(
"seqblck[RVAL]: set ref as val!");
5918 _handle_annotations_before_blck_val_scalar();
5919 m_evt_handler->set_val_ref(ref);
5924 _c4dbgp(
"seqblck[RVAL]: ref is key of map");
5926 _handle_annotations_before_start_mapblck(startline);
5927 m_evt_handler->begin_map_val_block();
5928 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5929 m_evt_handler->set_key_ref(ref);
5931 _set_indentation(startindent);
5932 _maybe_skip_whitespace_tokens();
5933 goto seqblck_finish;
5936 else if(first ==
'!')
5938 csubstr tag = _scan_tag();
5939 _c4dbgpf(
"seqblck[RVAL]: val tag! [{}]~~~{}~~~", tag.len, tag);
5942 _add_annotation(&m_pending_tags, tag, startindent, startline);
5944 else if(first ==
'?')
5946 _c4dbgp(
"seqblck[RVAL]: start child mapblck, explicit key");
5948 m_was_inside_qmrk =
true;
5949 m_evt_handler->begin_map_val_block();
5951 _set_indentation(startindent);
5952 _line_progressed(1);
5953 _maybe_skip_whitespace_tokens();
5954 goto seqblck_finish;
5963 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
5964 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5968 _c4dbgpf(
"seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5969 if(C4_LIKELY(_at_line_begin()))
5971 _c4dbgp(
"seqblck[RNXT]: at line begin");
5972 if(m_evt_handler->m_curr->indentation_ge())
5974 _c4dbgpf(
"seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
5975 _line_progressed(m_evt_handler->m_curr->indref);
5976 _maybe_skip_whitespace_tokens();
5977 rem = m_evt_handler->m_curr->line_contents.rem;
5981 else if(m_evt_handler->m_curr->indentation_lt())
5983 _c4dbgp(
"seqblck[RNXT]: smaller indentation!");
5984 _handle_indentation_pop_from_block_seq();
5987 _c4dbgp(
"seqblck[RNXT]: still seqblck!");
5988 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
5989 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5990 rem = m_evt_handler->m_curr->line_contents.rem;
5996 _c4dbgp(
"seqblck[RNXT]: no longer seqblck!");
5997 goto seqblck_finish;
6000 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
6002 _c4dbgpf(
"seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
6003 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6004 rem = m_evt_handler->m_curr->line_contents.rem;
6011 _c4dbgp(
"seqblck[RNXT]: NOT at line begin");
6012 if(!rem.begins_with_any(
" \t"))
6019 rem = m_evt_handler->m_curr->line_contents.rem;
6022 _c4dbgp(
"seqblck[RNXT]: again");
6030 const char first = rem.str[0];
6031 _c4dbgpf(
"seqblck[RNXT]: '{}' node_id={}", first, m_evt_handler->m_curr->node_id);
6034 if(m_evt_handler->m_curr->indref > 0 || m_evt_handler->m_curr->line_contents.indentation > 0 || !_is_doc_begin_token(rem))
6036 _c4dbgp(
"seqblck[RNXT]: expect next val");
6038 m_evt_handler->add_sibling();
6039 _line_progressed(1);
6040 _maybe_skip_whitespace_tokens();
6044 _c4dbgp(
"seqblck[RNXT]: start doc");
6045 _start_doc_suddenly();
6046 _line_progressed(3);
6047 _maybe_skip_whitespace_tokens();
6048 goto seqblck_finish;
6051 else if(first ==
':')
6057 auto const *C4_RESTRICT prev_state = m_evt_handler->m_parent;
6058 if(C4_LIKELY(prev_state && (prev_state->flags &
RMAP)))
6060 _c4dbgp(
"seqblck[RNXT]: actually this seq was '?' key of parent map");
6061 m_evt_handler->end_seq_block();
6062 goto seqblck_finish;
6069 else if(first ==
'.')
6071 _c4dbgp(
"seqblck[RNXT]: maybe doc?");
6072 csubstr rs = rem.sub(1);
6073 if(rs ==
".." || rs.begins_with(
".. "))
6075 _c4dbgp(
"seqblck[RNXT]: end+start doc");
6076 _end_doc_suddenly();
6077 _line_progressed(3);
6078 _maybe_skip_whitespace_tokens();
6079 goto seqblck_finish;
6092 for(
auto const& s : m_evt_handler->m_stack)
6094 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
6097 if(m_evt_handler->m_parent && has_all(
RMAP|
RBLCK, m_evt_handler->m_parent) && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
6099 _c4dbgpf(
"seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
6100 _RYML_ASSERT_BASIC_(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent);
6101 _handle_indentation_pop(m_evt_handler->m_parent);
6102 _RYML_ASSERT_BASIC_(this->callbacks(), has_all(
RMAP|
RBLCK));
6103 m_evt_handler->add_sibling();
6105 goto seqblck_finish;
6115 _c4dbgt(
"seqblck: go again", 0);
6116 if(_finished_line())
6121 if(_finished_file())
6123 _c4dbgp(
"seqblck: finish!");
6125 goto seqblck_finish;
6132 _c4dbgp(
"seqblck: finish");
6138 template<
class EventHandler>
6139 void ParseEngine<EventHandler>::_handle_map_block()
6142 _c4dbgpf(
"handle_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6145 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
6146 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RBLCK));
6148 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)));
6150 _maybe_skip_comment();
6151 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
6157 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6158 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6159 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6160 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6164 if(m_evt_handler->m_curr->at_line_beginning())
6166 if(m_evt_handler->m_curr->indentation_eq())
6168 _c4dbgpf(
"mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
6169 _line_progressed(m_evt_handler->m_curr->indref);
6170 rem = m_evt_handler->m_curr->line_contents.rem;
6174 else if(m_evt_handler->m_curr->indentation_lt())
6176 _c4dbgp(
"mapblck[RKEY]: smaller indentation!");
6177 _handle_indentation_pop_from_block_map();
6178 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6181 _c4dbgp(
"mapblck[RKEY]: still mapblck!");
6182 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY));
6183 rem = m_evt_handler->m_curr->line_contents.rem;
6189 _c4dbgp(
"mapblck[RKEY]: no longer mapblck!");
6190 goto mapblck_finish;
6195 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt());
6196 _c4err(
"invalid indentation");
6202 const char first = rem.str[0];
6203 const size_t startline = m_evt_handler->m_curr->pos.line;
6204 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6205 _c4dbgpf(
"mapblck[RKEY]: '{}'", first);
6209 _c4dbgp(
"mapblck[RKEY]: scanning single-quoted scalar");
6210 sc = _scan_scalar_squot();
6211 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6212 _handle_annotations_before_blck_key_scalar();
6213 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6215 if(!_maybe_scan_following_colon())
6216 _c4err(
"could not find ':' colon after key");
6217 _maybe_skip_whitespace_tokens();
6219 else if(first ==
'"')
6221 _c4dbgp(
"mapblck[RKEY]: scanning double-quoted scalar");
6222 sc = _scan_scalar_dquot();
6223 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6224 _handle_annotations_before_blck_key_scalar();
6225 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6227 if(!_maybe_scan_following_colon())
6228 _c4err(
"could not find ':' colon after key");
6229 _maybe_skip_whitespace_tokens();
6233 else if(C4_UNLIKELY(first ==
'|'))
6235 _c4err(
"block map: literal keys must be enclosed in '?'");
6237 else if(C4_UNLIKELY(first ==
'>'))
6239 _c4err(
"block map: folded keys must be enclosed in '?'");
6241 else if(_scan_scalar_plain_map_blck(&sc))
6243 _c4dbgp(
"mapblck[RKEY]: plain scalar");
6244 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6245 _handle_annotations_before_blck_key_scalar();
6246 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6248 if(!_maybe_scan_following_colon())
6249 _c4err(
"could not find ':' colon after key");
6250 _maybe_skip_whitespace_tokens();
6252 else if(first ==
'?')
6254 _c4dbgp(
"mapblck[RKEY]: key token!");
6256 _line_progressed(1);
6257 _maybe_skip_whitespace_tokens();
6258 m_was_inside_qmrk =
true;
6261 else if(first ==
':')
6263 _c4dbgp(
"mapblck[RKEY]: setting empty key");
6264 _handle_annotations_before_blck_key_scalar();
6265 m_evt_handler->set_key_scalar_plain_empty();
6267 _line_progressed(1);
6268 _maybe_skip_whitespace_tokens();
6270 else if(first ==
'*')
6272 csubstr ref = _scan_ref_map();
6273 _c4dbgpf(
"mapblck[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
6274 _handle_annotations_before_blck_key_scalar();
6275 m_evt_handler->set_key_ref(ref);
6277 if(!_maybe_scan_following_colon())
6278 _c4err(
"could not find ':' colon after key");
6279 _maybe_skip_whitespace_tokens();
6281 else if(first ==
'&')
6283 csubstr anchor = _scan_anchor();
6284 _c4dbgpf(
"mapblck[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
6285 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6287 else if(first ==
'!')
6289 csubstr tag = _scan_tag();
6290 _c4dbgpf(
"mapblck[RKEY]: key tag! [{}]~~~{}~~~", tag.len, tag);
6291 _add_annotation(&m_pending_tags, tag, startindent, startline);
6293 else if(first ==
'[')
6298 _c4dbgp(
"mapblck[RKEY]: start child seqflow (!)");
6300 _handle_annotations_before_blck_key_scalar();
6301 m_evt_handler->begin_seq_key_flow();
6303 _line_progressed(1);
6304 _set_indentation(startindent);
6305 goto mapblck_finish;
6307 else if(first ==
'{')
6312 _c4dbgp(
"mapblck[RKEY]: start child mapflow (!)");
6314 _handle_annotations_before_blck_key_scalar();
6315 m_evt_handler->begin_map_key_flow();
6317 _line_progressed(1);
6318 _set_indentation(startindent);
6319 goto mapblck_finish;
6321 else if(first ==
'-')
6323 _c4dbgp(
"mapblck[RKEY]: maybe doc?");
6324 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(rem))
6326 _c4dbgp(
"mapblck[RKEY]: end+start doc");
6327 _start_doc_suddenly();
6328 _line_progressed(3);
6329 _maybe_skip_whitespace_tokens();
6330 goto mapblck_finish;
6337 else if(first ==
'.')
6339 _c4dbgp(
"mapblck[RKEY]: maybe end doc?");
6340 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(rem))
6342 _c4dbgp(
"mapblck[RKEY]: end doc");
6343 _end_doc_suddenly();
6344 _line_progressed(3);
6345 _maybe_skip_whitespace_tokens();
6346 goto mapblck_finish;
6354 else if(first ==
'\t')
6356 _c4dbgp(
"mapblck[RKEY]: skip tabs");
6357 _maybe_skipchars(
'\t');
6364 else if(has_any(
RKCL))
6366 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6367 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6368 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6369 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6373 if(m_evt_handler->m_curr->at_line_beginning())
6375 if(m_evt_handler->m_curr->indentation_eq())
6377 _c4dbgpf(
"mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
6378 _line_progressed(m_evt_handler->m_curr->indref);
6379 rem = m_evt_handler->m_curr->line_contents.rem;
6383 else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
6385 _c4err(
"invalid indentation");
6388 const char first = rem.str[0];
6389 _c4dbgpf(
"mapblck[RKCL]: '{}'", first);
6392 _c4dbgp(
"mapblck[RKCL]: found the colon");
6394 _line_progressed(1);
6395 _maybe_skip_whitespace_tokens();
6397 else if(first ==
'?')
6399 _c4dbgp(
"mapblck[RKCL]: got '?'. val was empty");
6400 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_was_inside_qmrk);
6401 m_evt_handler->set_val_scalar_plain_empty();
6402 m_evt_handler->add_sibling();
6404 _line_progressed(1);
6405 _maybe_skip_whitespace_tokens();
6407 else if(first ==
'-')
6409 if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6411 _c4dbgp(
"mapblck[RKCL]: end+start doc");
6412 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6413 _start_doc_suddenly();
6414 _line_progressed(3);
6415 _maybe_skip_whitespace_tokens();
6416 goto mapblck_finish;
6423 else if(first ==
'.')
6425 _c4dbgp(
"mapblck[RKCL]: maybe end doc?");
6426 csubstr rs = rem.sub(1);
6427 if(rs ==
".." || rs.begins_with(
".. "))
6429 _c4dbgp(
"mapblck[RKCL]: end+start doc");
6430 _end_doc_suddenly();
6431 _line_progressed(3);
6432 goto mapblck_finish;
6439 else if(m_was_inside_qmrk)
6441 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_eq());
6442 _c4dbgp(
"mapblck[RKCL]: missing :");
6443 m_evt_handler->set_val_scalar_plain_empty();
6444 m_evt_handler->add_sibling();
6445 m_was_inside_qmrk =
false;
6453 else if(has_any(
RVAL))
6455 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6456 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6457 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6458 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6462 if(m_evt_handler->m_curr->at_line_beginning())
6464 _c4dbgpf(
"mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6465 m_evt_handler->m_curr->more_indented =
false;
6466 if(m_evt_handler->m_curr->indref ==
npos)
6468 _c4dbgpf(
"mapblck[RVAL]: setting indentation={}", m_evt_handler->m_parent->indref);
6469 _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6470 _line_progressed(m_evt_handler->m_curr->indref);
6471 rem = m_evt_handler->m_curr->line_contents.rem;
6475 else if(m_evt_handler->m_curr->indentation_eq())
6477 _c4dbgp(
"mapblck[RVAL]: skip indentation!");
6478 _line_progressed(m_evt_handler->m_curr->indref);
6479 rem = m_evt_handler->m_curr->line_contents.rem;
6507 else if(m_evt_handler->m_curr->indentation_gt())
6509 _c4dbgp(
"mapblck[RVAL]: more indented!");
6510 m_evt_handler->m_curr->more_indented =
true;
6511 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6512 rem = m_evt_handler->m_curr->line_contents.rem;
6516 else if(m_evt_handler->m_curr->indentation_lt())
6518 _c4dbgp(
"mapblck[RVAL]: smaller indentation!");
6519 _handle_indentation_pop_from_block_map();
6522 _c4dbgp(
"mapblck[RVAL]: still mapblck!");
6523 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6526 _c4dbgp(
"mapblck[RVAL]: speculatively expect next keyval");
6527 m_evt_handler->add_sibling();
6534 _c4dbgp(
"mapblck[RVAL]: no longer mapblck!");
6535 goto mapblck_finish;
6538 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
6540 _c4dbgp(
"mapblck[RVAL]: empty line!");
6541 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6548 const char first = rem.str[0];
6549 const size_t startline = m_evt_handler->m_curr->pos.line;
6550 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6551 _c4dbgpf(
"mapblck[RVAL]: '{}'", first);
6555 _c4dbgp(
"mapblck[RVAL]: scanning single-quoted scalar");
6556 sc = _scan_scalar_squot();
6557 if(!_maybe_scan_following_colon())
6559 _c4dbgp(
"mapblck[RVAL]: set as val");
6560 _handle_annotations_before_blck_val_scalar();
6561 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6562 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6567 if(startindent != m_evt_handler->m_curr->indref)
6569 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6570 _handle_annotations_before_start_mapblck(startline);
6573 m_evt_handler->begin_map_val_block();
6574 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6575 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6576 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6577 _maybe_skip_whitespace_tokens();
6583 _c4dbgp(
"mapblck[RVAL]: prev val empty+this is a key");
6584 m_evt_handler->set_val_scalar_plain_empty();
6585 m_evt_handler->add_sibling();
6586 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6587 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6589 _maybe_skip_whitespace_tokens();
6593 else if(first ==
'"')
6595 _c4dbgp(
"mapblck[RVAL]: scanning double-quoted scalar");
6596 sc = _scan_scalar_dquot();
6597 if(!_maybe_scan_following_colon())
6599 _c4dbgp(
"mapblck[RVAL]: set as val");
6600 _handle_annotations_before_blck_val_scalar();
6601 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6602 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6607 if(startindent != m_evt_handler->m_curr->indref)
6609 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6610 _handle_annotations_before_start_mapblck(startline);
6613 m_evt_handler->begin_map_val_block();
6614 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6615 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6616 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6617 _maybe_skip_whitespace_tokens();
6623 _c4dbgp(
"mapblck[RVAL]: prev val empty+this is a key");
6624 m_evt_handler->set_val_scalar_plain_empty();
6625 m_evt_handler->add_sibling();
6626 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6627 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6629 _maybe_skip_whitespace_tokens();
6635 else if(first ==
'|')
6637 _c4dbgp(
"mapblck[RVAL]: scanning block-literal scalar");
6639 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6640 _handle_annotations_before_blck_val_scalar();
6641 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6642 m_evt_handler->set_val_scalar_literal(maybe_filtered);
6645 else if(first ==
'>')
6647 _c4dbgp(
"mapblck[RVAL]: scanning block-folded scalar");
6649 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6650 _handle_annotations_before_blck_val_scalar();
6651 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6652 m_evt_handler->set_val_scalar_folded(maybe_filtered);
6655 else if(_scan_scalar_plain_map_blck(&sc))
6657 _c4dbgp(
"mapblck[RVAL]: plain scalar.");
6658 if(!_maybe_scan_following_colon())
6660 _c4dbgp(
"mapblck[RVAL]: set as val");
6661 _handle_annotations_before_blck_val_scalar();
6662 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6663 m_evt_handler->set_val_scalar_plain(maybe_filtered);
6668 if(startindent != m_evt_handler->m_curr->indref)
6670 _c4dbgpf(
"mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
6672 _handle_annotations_before_start_mapblck(startline);
6674 m_evt_handler->begin_map_val_block();
6675 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6676 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6677 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6678 _maybe_skip_whitespace_tokens();
6684 _c4dbgp(
"mapblck[RVAL]: prev val empty+this is a key");
6685 _handle_annotations_before_blck_val_scalar();
6686 m_evt_handler->set_val_scalar_plain_empty();
6687 m_evt_handler->add_sibling();
6688 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6689 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6691 _maybe_skip_whitespace_tokens();
6695 else if(first ==
'-')
6699 _c4dbgp(
"mapblck[RVAL]: start val seqblck");
6701 _handle_annotations_before_blck_val_scalar();
6702 m_evt_handler->begin_seq_val_block();
6704 _set_indentation(startindent);
6705 _line_progressed(1);
6706 _maybe_skip_whitespace_tokens();
6707 goto mapblck_finish;
6709 else if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6711 _c4dbgp(
"mapblck[RVAL]: end+start doc");
6712 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6713 _start_doc_suddenly();
6714 _line_progressed(3);
6715 _maybe_skip_whitespace_tokens();
6716 goto mapblck_finish;
6723 else if(first ==
'[')
6725 _c4dbgp(
"mapblck[RVAL]: start val seqflow");
6727 _handle_annotations_before_blck_val_scalar();
6728 m_evt_handler->begin_seq_val_flow();
6730 _set_indentation(m_evt_handler->m_curr->indref + 1u);
6731 _line_progressed(1);
6732 goto mapblck_finish;
6734 else if(first ==
'{')
6736 _c4dbgp(
"mapblck[RVAL]: start val mapflow");
6738 _handle_annotations_before_blck_val_scalar();
6739 m_evt_handler->begin_map_val_flow();
6741 m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
6742 _set_indentation(m_evt_handler->m_curr->indref + 1u);
6743 _line_progressed(1);
6744 goto mapblck_finish;
6746 else if(first ==
'*')
6748 csubstr ref = _scan_ref_map();
6749 _c4dbgpf(
"mapblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
6750 if(startindent == m_evt_handler->m_curr->indref)
6752 _c4dbgpf(
"mapblck[RVAL]: same indentation {}", startindent);
6753 m_evt_handler->set_val_ref(ref);
6758 _c4dbgpf(
"mapblck[RVAL]: larger indentation {}>{}", startindent, m_evt_handler->m_curr->indref);
6759 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref);
6760 if(_maybe_scan_following_colon())
6762 _c4dbgp(
"mapblck[RVAL]: start child map, block");
6764 _handle_annotations_before_blck_val_scalar();
6765 m_evt_handler->begin_map_val_block();
6766 m_evt_handler->set_key_ref(ref);
6767 _set_indentation(startindent);
6773 _c4dbgp(
"mapblck[RVAL]: was val ref");
6774 _handle_annotations_before_blck_val_scalar();
6775 m_evt_handler->set_val_ref(ref);
6779 _maybe_skip_whitespace_tokens();
6781 else if(first ==
'&')
6783 csubstr anchor = _scan_anchor();
6784 _c4dbgpf(
"mapblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
6785 if(startindent == m_evt_handler->m_curr->indref)
6787 _c4dbgp(
"mapblck[RVAL]: anchor for next key. val is missing!");
6788 m_evt_handler->set_val_scalar_plain_empty();
6789 m_evt_handler->add_sibling();
6794 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6796 else if(first ==
'!')
6798 csubstr tag = _scan_tag();
6799 _c4dbgpf(
"mapblck[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
6800 if(startindent == m_evt_handler->m_curr->indref)
6802 _c4dbgp(
"mapblck[RVAL]: tag for next key. val is missing!");
6803 _handle_annotations_before_blck_val_scalar();
6804 m_evt_handler->set_val_scalar_plain_empty();
6805 m_evt_handler->add_sibling();
6810 _add_annotation(&m_pending_tags, tag, startindent, startline);
6812 else if(first ==
'?')
6814 if(startindent == m_evt_handler->m_curr->indref)
6816 _c4dbgp(
"mapblck[RVAL]: got '?'. val was empty");
6817 _handle_annotations_before_blck_val_scalar();
6818 m_evt_handler->set_val_scalar_plain_empty();
6819 m_evt_handler->add_sibling();
6822 else if(startindent > m_evt_handler->m_curr->indref)
6824 _c4dbgp(
"mapblck[RVAL]: start val mapblck");
6826 _handle_annotations_before_blck_val_scalar();
6827 m_evt_handler->begin_map_val_block();
6829 _set_indentation(startindent);
6835 m_was_inside_qmrk =
true;
6836 _line_progressed(1);
6837 _maybe_skip_whitespace_tokens();
6840 else if(first ==
':')
6842 if(startindent == m_evt_handler->m_curr->indref)
6844 _c4dbgp(
"mapblck[RVAL]: got ':'. val was empty, next key as well");
6845 m_evt_handler->set_val_scalar_plain_empty();
6846 m_evt_handler->add_sibling();
6847 m_evt_handler->set_key_scalar_plain_empty();
6849 else if(startindent > m_evt_handler->m_curr->indref)
6851 _c4dbgp(
"mapblck[RVAL]: start val mapblck");
6853 _handle_annotations_before_start_mapblck(startline);
6855 m_evt_handler->begin_map_val_block();
6856 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6857 m_evt_handler->set_key_scalar_plain_empty();
6865 _line_progressed(1);
6866 _maybe_skip_whitespace_tokens();
6869 else if(first ==
'.')
6871 _c4dbgp(
"mapblck[RVAL]: maybe doc?");
6872 csubstr rs = rem.sub(1);
6873 if(rs ==
".." || rs.begins_with(
".. "))
6875 _c4dbgp(
"seqblck[RVAL]: end doc expl");
6876 _end_doc_suddenly();
6877 _line_progressed(3);
6878 _maybe_skip_whitespace_tokens();
6879 goto mapblck_finish;
6887 else if(first ==
'\t')
6889 _c4dbgp(
"mapblck[RVAL]: skip tabs");
6890 _maybe_skipchars(
'\t');
6897 else if(has_any(
RNXT))
6899 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6900 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6901 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6902 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6906 if(m_evt_handler->m_curr->at_line_beginning())
6908 _c4dbgpf(
"mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6909 if(m_evt_handler->m_curr->indentation_eq())
6911 _c4dbgpf(
"mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6912 _line_progressed(m_evt_handler->m_curr->indref);
6913 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
6914 m_evt_handler->add_sibling();
6918 else if(m_evt_handler->m_curr->indentation_lt())
6920 _c4dbgp(
"mapblck[RNXT]: smaller indentation!");
6921 _handle_indentation_pop_from_block_map();
6924 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6927 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
6928 m_evt_handler->add_sibling();
6935 goto mapblck_finish;
6941 _c4dbgp(
"mapblck[RNXT]: NOT at line begin");
6942 if(!rem.begins_with_any(
" \t"))
6949 rem = m_evt_handler->m_curr->line_contents.rem;
6952 _c4dbgp(
"seqblck[RNXT]: again");
6960 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
6961 const char first = rem.str[0];
6962 _c4dbgpf(
"mapblck[RNXT]: '{}'", _c4prc(first));
6965 if(m_evt_handler->m_curr->more_indented)
6967 _c4dbgp(
"mapblck[RNXT]: start child block map");
6968 C4_NOT_IMPLEMENTED();
6970 _line_progressed(1);
6971 _set_indentation(m_evt_handler->m_curr->scalar_col);
6972 m_evt_handler->m_curr->more_indented =
false;
6980 else if(first ==
' ')
6982 _c4dbgp(
"mapblck[RNXT]: skip spaces");
6983 _maybe_skip_whitespace_tokens();
6990 else if(has_any(
QMRK))
6992 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6993 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6994 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6995 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6999 if(m_evt_handler->m_curr->at_line_beginning())
7001 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation !=
npos);
7002 if(m_evt_handler->m_curr->indentation_eq())
7004 _c4dbgpf(
"mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref);
7005 _line_progressed(m_evt_handler->m_curr->indref);
7006 rem = m_evt_handler->m_curr->line_contents.rem;
7010 else if(m_evt_handler->m_curr->indentation_lt())
7012 _c4dbgp(
"mapblck[QMRK]: smaller indentation!");
7013 _handle_indentation_pop_from_block_map();
7014 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7017 _c4dbgp(
"mapblck[QMRK]: still mapblck!");
7022 _c4dbgp(
"mapblck[QMRK]: no longer mapblck!");
7023 goto mapblck_finish;
7029 _c4dbgp(
"mapblck[QMRK]: larger indentation !");
7030 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7031 rem = m_evt_handler->m_curr->line_contents.rem;
7039 const char first = rem.str[0];
7040 const size_t startline = m_evt_handler->m_curr->pos.line;
7041 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
7042 _c4dbgpf(
"mapblck[QMRK]: '{}'", first);
7046 _c4dbgp(
"mapblck[QMRK]: scanning single-quoted scalar");
7047 sc = _scan_scalar_squot();
7048 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7049 if(!_maybe_scan_following_colon())
7051 _c4dbgp(
"mapblck[QMRK]: set as key");
7052 _handle_annotations_before_blck_key_scalar();
7053 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7058 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7060 _handle_annotations_before_start_mapblck_as_key();
7061 m_evt_handler->begin_map_key_block();
7062 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7063 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7064 _maybe_skip_whitespace_tokens();
7065 _set_indentation(startindent);
7070 else if(first ==
'"')
7072 _c4dbgp(
"mapblck[QMRK]: scanning double-quoted scalar");
7073 sc = _scan_scalar_dquot();
7074 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7075 if(!_maybe_scan_following_colon())
7077 _c4dbgp(
"mapblck[QMRK]: set as key");
7078 _handle_annotations_before_blck_key_scalar();
7079 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7084 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7086 _handle_annotations_before_start_mapblck_as_key();
7087 m_evt_handler->begin_map_key_block();
7088 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7089 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7090 _maybe_skip_whitespace_tokens();
7091 _set_indentation(startindent);
7096 else if(first ==
'|')
7098 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
7100 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7101 csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb);
7102 _handle_annotations_before_blck_key_scalar();
7103 m_evt_handler->set_key_scalar_literal(maybe_filtered);
7106 else if(first ==
'>')
7108 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
7110 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7111 csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb);
7112 _handle_annotations_before_blck_key_scalar();
7113 m_evt_handler->set_key_scalar_folded(maybe_filtered);
7116 else if(_scan_scalar_plain_map_blck(&sc))
7118 _c4dbgp(
"mapblck[QMRK]: plain scalar");
7119 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
7120 if(!_maybe_scan_following_colon())
7122 _c4dbgp(
"mapblck[QMRK]: set as key");
7123 _handle_annotations_before_blck_key_scalar();
7124 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7129 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7131 _handle_annotations_before_start_mapblck_as_key();
7132 m_evt_handler->begin_map_key_block();
7133 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7134 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7135 _maybe_skip_whitespace_tokens();
7136 _set_indentation(startindent);
7141 else if(first ==
':')
7143 if(startindent == m_evt_handler->m_curr->indref)
7145 _c4dbgp(
"mapblck[QMRK]: empty key");
7147 _handle_annotations_before_blck_key_scalar();
7148 m_evt_handler->set_key_scalar_plain_empty();
7149 _line_progressed(1);
7150 _maybe_skip_whitespace_tokens();
7154 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), empty key");
7156 _handle_annotations_before_start_mapblck_as_key();
7157 m_evt_handler->begin_map_key_block();
7158 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7159 m_evt_handler->set_key_scalar_plain_empty();
7160 _line_progressed(1);
7161 _maybe_skip_whitespace_tokens();
7162 _set_indentation(startindent);
7167 else if(first ==
'*')
7169 csubstr ref = _scan_ref_map();
7170 _c4dbgpf(
"mapblck[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
7171 if(!_maybe_scan_following_colon())
7173 _c4dbgp(
"mapblck[QMRK]: set ref as key");
7174 _handle_annotations_before_blck_key_scalar();
7175 m_evt_handler->set_key_ref(ref);
7180 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set ref as key");
7182 _handle_annotations_before_blck_key_scalar();
7183 m_evt_handler->begin_map_key_block();
7184 m_evt_handler->set_key_ref(ref);
7185 _set_indentation(startindent);
7189 _maybe_skip_whitespace_tokens();
7191 else if(first ==
'&')
7193 csubstr anchor = _scan_anchor();
7194 _c4dbgpf(
"mapblck[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
7195 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7197 else if(first ==
'!')
7199 csubstr tag = _scan_tag();
7200 _c4dbgpf(
"mapblck[QMRK]: key tag! [{}]~~~{}~~~", tag.len, tag);
7201 _add_annotation(&m_pending_tags, tag, startindent, startline);
7203 else if(first ==
'-')
7205 _c4dbgp(
"mapblck[QMRK]: maybe doc?");
7206 csubstr rs = rem.sub(1);
7207 if(rs ==
"--" || rs.begins_with(
"-- "))
7209 _c4dbgp(
"mapblck[QMRK]: end+start doc");
7210 _start_doc_suddenly();
7211 _line_progressed(3);
7215 _c4dbgp(
"mapblck[QMRK]: start child seqblck (!)");
7217 _handle_annotations_before_blck_key_scalar();
7218 m_evt_handler->begin_seq_key_block();
7220 _set_indentation(startindent);
7221 _line_progressed(1);
7223 _maybe_skip_whitespace_tokens();
7224 goto mapblck_finish;
7226 else if(first ==
'[')
7228 _c4dbgp(
"mapblck[QMRK]: start child seqflow (!)");
7230 m_evt_handler->begin_seq_key_flow();
7232 _set_indentation(startindent);
7233 _line_progressed(1);
7234 goto mapblck_finish;
7236 else if(first ==
'{')
7238 _c4dbgp(
"mapblck[QMRK]: start child mapflow (!)");
7240 m_evt_handler->begin_map_key_flow();
7242 _set_indentation(startindent);
7243 _line_progressed(1);
7244 goto mapblck_finish;
7246 else if(first ==
'?')
7248 _c4dbgp(
"mapblck[QMRK]: another QMRK '?'");
7249 if(m_evt_handler->m_curr->indentation_eq())
7251 _c4dbgp(
"mapblck[QMRK]: ? indent eq - prev ? was for an empty keyval");
7252 m_evt_handler->set_key_scalar_plain_empty();
7253 m_evt_handler->set_val_scalar_plain_empty();
7254 m_evt_handler->add_sibling();
7258 _RYML_ASSERT_BASIC_(callbacks(), m_evt_handler->m_curr->indentation_gt());
7259 _c4dbgp(
"mapblck[QMRK]: ? indent gt - start child mapblck (!)");
7261 m_evt_handler->begin_map_key_block();
7263 _set_indentation(startindent);
7266 _line_progressed(1);
7267 _maybe_skip_whitespace_tokens();
7269 else if(first ==
'.')
7271 _c4dbgp(
"mapblck[QMRK]: maybe end doc?");
7272 csubstr rs = rem.sub(1);
7273 if(rs ==
".." || rs.begins_with(
".. "))
7275 _c4dbgp(
"mapblck[QMRK]: end+start doc");
7276 _end_doc_suddenly();
7277 _line_progressed(3);
7278 goto mapblck_finish;
7292 _c4dbgt(
"mapblck: again", 0);
7293 if(_finished_line())
7297 if(_finished_file())
7299 _c4dbgp(
"mapblck: file finished!");
7301 goto mapblck_finish;
7308 _c4dbgp(
"mapblck: finish");
7314 template<
class EventHandler>
7315 void ParseEngine<EventHandler>::_handle_unk_json()
7317 _c4dbgpf(
"handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7319 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP));
7320 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP));
7322 _maybe_skip_comment();
7323 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7327 size_t pos = rem.first_not_of(
" \t");
7330 pos = pos !=
npos ? pos : rem.len;
7331 _c4dbgpf(
"skipping indentation of {}", pos);
7332 _line_progressed(pos);
7333 rem = m_evt_handler->m_curr->line_contents.rem;
7336 _c4dbgpf(
"rem is now [{}]~~~{}~~~", rem.len, rem);
7339 if(rem.begins_with(
'['))
7341 _c4dbgp(
"it's a seq");
7342 m_evt_handler->check_trailing_doc_token();
7344 m_evt_handler->begin_seq_val_flow();
7346 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7347 m_doc_empty =
false;
7348 _line_progressed(1);
7350 else if(rem.begins_with(
'{'))
7352 _c4dbgp(
"it's a map");
7353 m_evt_handler->check_trailing_doc_token();
7355 m_evt_handler->begin_map_val_flow();
7357 m_doc_empty =
false;
7358 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7359 _line_progressed(1);
7361 else if(_handle_bom())
7363 _c4dbgp(
"byte order mark");
7367 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
7368 _maybe_skip_whitespace_tokens();
7369 csubstr s = m_evt_handler->m_curr->line_contents.rem;
7372 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7373 const char first = s.str[0];
7377 _c4dbgp(
"runk_json: scanning double-quoted scalar");
7378 m_evt_handler->check_trailing_doc_token();
7381 m_doc_empty =
false;
7382 sc = _scan_scalar_dquot();
7383 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7384 if(!_maybe_scan_following_colon())
7386 _c4dbgp(
"runk_json: set as val");
7387 _handle_annotations_before_blck_val_scalar();
7388 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7395 else if(_scan_scalar_plain_unk(&sc))
7397 _c4dbgp(
"runk_json: got a plain scalar");
7398 m_evt_handler->check_trailing_doc_token();
7401 m_doc_empty =
false;
7402 if(!_maybe_scan_following_colon())
7404 _c4dbgp(
"runk_json: set as val");
7405 _handle_annotations_before_blck_val_scalar();
7406 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7407 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7424 template<
class EventHandler>
7425 void ParseEngine<EventHandler>::_handle_unk()
7427 _c4dbgpf(
"handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7429 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP));
7430 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP));
7432 _maybe_skip_comment();
7433 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7437 size_t pos = rem.first_not_of(
" \t");
7440 pos = pos !=
npos ? pos : rem.len;
7441 _c4dbgpf(
"skipping {} whitespace characters", pos);
7442 _line_progressed(pos);
7443 rem = m_evt_handler->m_curr->line_contents.rem;
7446 _c4dbgpf(
"rem is now [{}]~~~{}~~~", rem.len, rem);
7449 if(m_evt_handler->m_curr->line_contents.indentation == 0u && (_at_line_begin() || (m_bom_len && (m_evt_handler->m_curr->pos.line == m_bom_line))))
7451 _c4dbgpf(
"rtop: zero indent + at line begin. offset={}", m_evt_handler->m_curr->pos.offset);
7452 _c4dbgp(
"check BOM");
7455 m_bom_line = m_evt_handler->m_curr->pos.line;
7456 _c4dbgpf(
"byte order mark! line={} offset={}", m_bom_line, m_evt_handler->m_curr->pos.offset);
7459 const char first = rem.str[0];
7462 _c4dbgp(
"rtop: suspecting doc");
7463 if(_is_doc_begin_token(rem))
7465 _c4dbgp(
"rtop: begin doc");
7468 _set_indentation(0);
7470 _line_progressed(3u);
7471 _maybe_skip_whitespace_tokens();
7475 else if(first ==
'.')
7477 _c4dbgp(
"rtop: suspecting doc end");
7478 if(_is_doc_end_token(rem))
7480 _c4dbgp(
"rtop: end doc");
7487 _c4dbgp(
"rtop: ignore end doc");
7490 _line_progressed(3u);
7491 _maybe_skip_whitespace_tokens();
7495 else if(first ==
'%')
7497 _c4dbgpf(
"directive: {}", rem);
7498 if(C4_UNLIKELY(!m_doc_empty && has_none(
NDOC)))
7499 _c4err(
"need document footer before directives");
7500 _handle_directive(rem);
7506 char first = rem.str[0];
7508 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7509 size_t remindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7512 _c4dbgpf(
"prev BOMlen={}", m_bom_len);
7513 if(m_evt_handler->m_curr->pos.line == m_bom_line)
7515 _c4dbgpf(
"BOM remindent={} offset={}", remindent, m_evt_handler->m_curr->pos.offset);
7516 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, remindent >= m_bom_len);
7517 remindent -= m_bom_len;
7527 m_evt_handler->check_trailing_doc_token();
7529 m_doc_empty =
false;
7530 if(C4_LIKELY( ! _annotations_require_key_container()))
7532 _c4dbgp(
"it's a seq, flow");
7533 _handle_annotations_before_blck_val_scalar();
7534 m_evt_handler->begin_seq_val_flow();
7536 _set_indentation(remindent);
7540 _c4dbgp(
"start new block map, set flow seq as key (!)");
7541 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7542 m_evt_handler->begin_map_val_block();
7544 _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7545 m_evt_handler->begin_seq_key_flow();
7547 _set_indentation(remindent);
7549 _line_progressed(1);
7551 else if(first ==
'{')
7553 m_evt_handler->check_trailing_doc_token();
7555 m_doc_empty =
false;
7556 if(C4_LIKELY( ! _annotations_require_key_container()))
7558 _c4dbgp(
"it's a map, flow");
7559 _handle_annotations_before_blck_val_scalar();
7560 m_evt_handler->begin_map_val_flow();
7562 _set_indentation(remindent);
7566 _c4dbgp(
"start new block map, set flow map as key (!)");
7567 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7568 m_evt_handler->begin_map_val_block();
7570 _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7571 m_evt_handler->begin_map_key_flow();
7573 _set_indentation(remindent);
7575 _line_progressed(1);
7577 else if(first ==
'-' && _is_blck_token(rem))
7579 _c4dbgp(
"it's a seq, block");
7580 m_evt_handler->check_trailing_doc_token();
7582 _handle_annotations_before_blck_val_scalar();
7583 m_evt_handler->begin_seq_val_block();
7585 m_doc_empty =
false;
7586 _set_indentation(remindent);
7587 _line_progressed(1);
7588 _maybe_skip_whitespace_tokens();
7590 else if(first ==
'?' && _is_blck_token(rem))
7592 _c4dbgp(
"it's a map + this key is complex");
7593 m_evt_handler->check_trailing_doc_token();
7595 _handle_annotations_before_blck_val_scalar();
7596 m_evt_handler->begin_map_val_block();
7598 m_doc_empty =
false;
7599 m_was_inside_qmrk =
true;
7600 _set_indentation(remindent);
7601 _line_progressed(1);
7602 _maybe_skip_whitespace_tokens();
7604 else if(first ==
':' && _is_blck_token(rem))
7608 _c4dbgp(
"it's a map with an empty key");
7609 const size_t startline = m_evt_handler->m_curr->pos.line;
7610 m_evt_handler->check_trailing_doc_token();
7612 _handle_annotations_before_start_mapblck(startline);
7614 m_evt_handler->begin_map_val_block();
7615 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7616 m_evt_handler->set_key_scalar_plain_empty();
7617 m_doc_empty =
false;
7618 _set_indentation(startindent);
7622 _c4dbgp(
"actually prev val is a key!");
7623 size_t prev_indentation = m_evt_handler->m_curr->indref;
7624 m_evt_handler->actually_val_is_first_key_of_new_map_block();
7625 _set_indentation(prev_indentation);
7628 _line_progressed(1);
7629 _maybe_skip_whitespace_tokens();
7631 else if(first ==
'&')
7633 csubstr anchor = _scan_anchor();
7634 _c4dbgpf(
"anchor! [{}]~~~{}~~~", anchor.len, anchor);
7635 m_evt_handler->check_trailing_doc_token();
7637 const size_t line = m_evt_handler->m_curr->pos.line;
7638 _add_annotation(&m_pending_anchors, anchor, remindent, line);
7639 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7640 m_doc_empty =
false;
7642 else if(first ==
'*')
7644 csubstr ref = _scan_ref_map();
7645 _c4dbgpf(
"ref! [{}]~~~{}~~~", ref.len, ref);
7646 m_evt_handler->check_trailing_doc_token();
7648 m_doc_empty =
false;
7649 if(!_maybe_scan_following_colon())
7651 _c4dbgp(
"runk: set val ref");
7652 _handle_annotations_before_blck_val_scalar();
7653 m_evt_handler->set_val_ref(ref);
7657 _c4dbgp(
"runk: start new block map, set ref as key");
7658 const size_t startline = m_evt_handler->m_curr->pos.line;
7659 _handle_annotations_before_start_mapblck(startline);
7660 m_evt_handler->begin_map_val_block();
7661 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7662 m_evt_handler->set_key_ref(ref);
7663 _maybe_skip_whitespace_tokens();
7664 _set_indentation(startindent);
7668 else if(first ==
'!')
7670 csubstr tag = _scan_tag();
7671 _c4dbgpf(
"unk: val tag! [{}]~~~{}~~~", tag.len, tag);
7674 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7675 const size_t line = m_evt_handler->m_curr->pos.line;
7676 _add_annotation(&m_pending_tags, tag, indentation, line);
7680 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
7681 _maybe_skip_whitespace_tokens();
7682 csubstr s = m_evt_handler->m_curr->line_contents.rem;
7685 const size_t startline = m_evt_handler->m_curr->pos.line;
7690 _c4dbgp(
"runk: scanning single-quoted scalar");
7691 m_evt_handler->check_trailing_doc_token();
7694 m_doc_empty =
false;
7695 sc = _scan_scalar_squot();
7696 if(!_maybe_scan_following_colon())
7698 _c4dbgp(
"runk: set as val");
7699 _handle_annotations_before_blck_val_scalar();
7700 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
7701 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
7705 _c4dbgp(
"runk: start new block map, set scalar as key");
7706 _handle_annotations_before_start_mapblck(startline);
7708 m_evt_handler->begin_map_val_block();
7709 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7710 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7711 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7712 _maybe_skip_whitespace_tokens();
7713 _set_indentation(startindent);
7717 else if(first ==
'"')
7719 _c4dbgp(
"runk: scanning double-quoted scalar");
7720 m_evt_handler->check_trailing_doc_token();
7723 m_doc_empty =
false;
7724 sc = _scan_scalar_dquot();
7725 if(!_maybe_scan_following_colon())
7727 _c4dbgp(
"runk: set as val");
7728 _handle_annotations_before_blck_val_scalar();
7729 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7730 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7734 _c4dbgp(
"runk: start new block map, set double-quoted scalar as key");
7735 _handle_annotations_before_start_mapblck(startline);
7736 m_evt_handler->begin_map_val_block();
7738 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7739 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7740 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7741 _maybe_skip_whitespace_tokens();
7742 _set_indentation(startindent);
7746 else if(first ==
'|')
7748 _c4dbgp(
"runk: scanning block-literal scalar");
7749 m_evt_handler->check_trailing_doc_token();
7752 m_doc_empty =
false;
7754 _scan_block(&sb, startindent);
7755 if(C4_LIKELY(!_maybe_scan_following_colon()))
7757 _c4dbgp(
"runk: set as val");
7758 _handle_annotations_before_blck_val_scalar();
7759 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7760 m_evt_handler->set_val_scalar_literal(maybe_filtered);
7764 _c4err(
"block literal keys must be enclosed in '?'");
7767 else if(first ==
'>')
7769 _c4dbgp(
"runk: scanning block-folded scalar");
7770 m_evt_handler->check_trailing_doc_token();
7773 m_doc_empty =
false;
7775 _scan_block(&sb, startindent);
7776 if(C4_LIKELY(!_maybe_scan_following_colon()))
7778 _c4dbgp(
"runk: set as val");
7779 _handle_annotations_before_blck_val_scalar();
7780 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7781 m_evt_handler->set_val_scalar_folded(maybe_filtered);
7785 _c4err(
"block folded keys must be enclosed in '?'");
7788 else if(_scan_scalar_plain_unk(&sc))
7790 _c4dbgp(
"runk: got a plain scalar");
7791 m_evt_handler->check_trailing_doc_token();
7794 m_doc_empty =
false;
7795 if(!_maybe_scan_following_colon())
7797 _c4dbgp(
"runk: set as val");
7798 _handle_annotations_before_blck_val_scalar();
7799 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7800 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7804 _c4dbgp(
"runk: start new block map, set scalar as key");
7805 _handle_annotations_before_start_mapblck(startline);
7807 m_evt_handler->begin_map_val_block();
7808 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7809 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7810 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7811 _maybe_skip_whitespace_tokens();
7812 _set_indentation(startindent);
7822 template<
class EventHandler>
7823 C4_COLD
void ParseEngine<EventHandler>::_handle_usty()
7825 _c4dbgpf(
"handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7827 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK|
RFLOW));
7829 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
7832 _c4dbgp(
"usty[RNXT]: finishing!");
7837 _maybe_skip_comment();
7838 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7842 size_t pos = rem.first_not_of(
" \t");
7845 pos = pos !=
npos ? pos : rem.len;
7846 _c4dbgpf(
"skipping indentation of {}", pos);
7847 _line_progressed(pos);
7848 rem = m_evt_handler->m_curr->line_contents.rem;
7851 _c4dbgpf(
"rem is now [{}]~~~{}~~~", rem.len, rem);
7854 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
7855 size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7856 char first = rem.str[0];
7859 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RMAP));
7860 _c4dbgpf(
"usty[RSEQ]: first='{}'", _c4prc(first));
7863 _c4dbgp(
"usty[RSEQ]: it's a flow seq. merging it");
7865 m_evt_handler->_push();
7867 _set_indentation(startindent);
7868 _line_progressed(1);
7869 _maybe_skip_whitespace_tokens();
7871 else if(first ==
'-' && _is_blck_token(rem))
7873 _c4dbgp(
"usty[RSEQ]: it's a block seq. merging it");
7875 m_evt_handler->_push();
7877 _set_indentation(startindent);
7878 _line_progressed(1);
7879 _maybe_skip_whitespace_tokens();
7883 _c4err(
"can only parse a seq into an existing seq");
7886 else if(has_any(
RMAP))
7888 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ));
7889 _c4dbgpf(
"usty[RMAP]: first='{}'", _c4prc(first));
7892 _c4dbgp(
"usty[RMAP]: it's a flow map. merging it");
7894 _handle_annotations_before_blck_val_scalar();
7895 m_evt_handler->_push();
7897 _set_indentation(startindent);
7898 _line_progressed(1);
7899 _maybe_skip_whitespace_tokens();
7901 else if(first ==
'?' && _is_blck_token(rem))
7903 _c4dbgp(
"usty[RMAP]: it's a block map + this key is complex");
7905 _handle_annotations_before_blck_val_scalar();
7906 m_evt_handler->_push();
7908 m_was_inside_qmrk =
true;
7909 _save_indentation();
7910 _line_progressed(1);
7911 _maybe_skip_whitespace_tokens();
7913 else if(first ==
':' && _is_blck_token(rem))
7915 _c4dbgp(
"usty[RMAP]: it's a map with an empty key");
7917 _handle_annotations_before_blck_val_scalar();
7918 m_evt_handler->_push();
7919 m_evt_handler->set_key_scalar_plain_empty();
7921 _save_indentation();
7922 _line_progressed(1);
7923 _maybe_skip_whitespace_tokens();
7925 else if(rem.begins_with(
'&'))
7927 csubstr anchor = _scan_anchor();
7928 _c4dbgpf(
"usty[RMAP]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
7929 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7930 const size_t line = m_evt_handler->m_curr->pos.line;
7931 _add_annotation(&m_pending_anchors, anchor, indentation, line);
7932 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7934 else if(first ==
'*')
7936 csubstr ref = _scan_ref_map();
7937 _c4dbgpf(
"usty[RMAP]: ref! [{}]~~~{}~~~", ref.len, ref);
7938 if(!_maybe_scan_following_colon())
7940 _c4err(
"cannot read a VAL to a map");
7944 _c4dbgp(
"usty[RMAP]: start new block map, set ref as key");
7945 const size_t startline = m_evt_handler->m_curr->pos.line;
7947 _handle_annotations_before_start_mapblck(startline);
7948 m_evt_handler->_push();
7949 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7950 m_evt_handler->set_key_ref(ref);
7951 _maybe_skip_whitespace_tokens();
7952 _set_indentation(startindent);
7956 else if(first ==
'!')
7958 csubstr tag = _scan_tag();
7959 _c4dbgpf(
"usty[RMAP]: val tag! [{}]~~~{}~~~", tag.len, tag);
7962 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7963 const size_t line = m_evt_handler->m_curr->pos.line;
7964 _add_annotation(&m_pending_tags, tag, indentation, line);
7966 else if(first ==
'[' || (first ==
'-' && _is_blck_token(rem)))
7968 _c4err(
"cannot parse a seq into an existing map");
7972 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
7973 startindent = m_evt_handler->m_curr->line_contents.indentation;
7974 const size_t startline = m_evt_handler->m_curr->pos.line;
7976 _c4dbgpf(
"usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
7979 _c4dbgp(
"usty[RMAP]: scanning single-quoted scalar");
7980 sc = _scan_scalar_squot();
7981 if(!_maybe_scan_following_colon())
7983 _c4err(
"cannot read a VAL to a map");
7987 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
7989 _handle_annotations_before_start_mapblck(startline);
7990 m_evt_handler->_push();
7991 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7992 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7993 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7994 _set_indentation(startindent);
7996 _maybe_skip_whitespace_tokens();
7999 else if(first ==
'"')
8001 _c4dbgp(
"usty[RMAP]: scanning double-quoted scalar");
8002 sc = _scan_scalar_dquot();
8003 if(!_maybe_scan_following_colon())
8005 _c4err(
"cannot read a VAL to a map");
8009 _c4dbgp(
"usty[RMAP]: start new block map, set double-quoted scalar as key");
8011 _handle_annotations_before_start_mapblck(startline);
8012 m_evt_handler->_push();
8013 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8014 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8015 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8016 _set_indentation(startindent);
8018 _maybe_skip_whitespace_tokens();
8021 else if(first ==
'|')
8023 _c4err(
"block literal keys must be enclosed in '?'");
8025 else if(first ==
'>')
8027 _c4err(
"block literal keys must be enclosed in '?'");
8029 else if(_scan_scalar_plain_unk(&sc))
8031 _c4dbgp(
"usty[RMAP]: got a plain scalar");
8032 if(!_maybe_scan_following_colon())
8034 _c4err(
"cannot read a VAL to a map");
8038 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
8040 _handle_annotations_before_start_mapblck(startline);
8041 m_evt_handler->_push();
8042 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8043 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8044 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8045 _set_indentation(startindent);
8047 _maybe_skip_whitespace_tokens();
8058 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ));
8059 _c4dbgpf(
"usty[UNK]: first='{}'", _c4prc(first));
8062 _c4dbgp(
"usty[UNK]: it's a flow seq");
8064 _handle_annotations_before_blck_val_scalar();
8065 m_evt_handler->begin_seq_val_flow();
8067 _set_indentation(startindent);
8068 _line_progressed(1);
8069 _maybe_skip_whitespace_tokens();
8071 else if(first ==
'-' && _is_blck_token(rem))
8073 _c4dbgp(
"usty[UNK]: it's a block seq");
8075 _handle_annotations_before_blck_val_scalar();
8076 m_evt_handler->begin_seq_val_block();
8078 _set_indentation(startindent);
8079 _line_progressed(1);
8080 _maybe_skip_whitespace_tokens();
8082 else if(first ==
'{')
8084 _c4dbgp(
"usty[UNK]: it's a flow map");
8086 _handle_annotations_before_blck_val_scalar();
8087 m_evt_handler->begin_map_val_flow();
8089 _set_indentation(startindent);
8090 _line_progressed(1);
8091 _maybe_skip_whitespace_tokens();
8093 else if(first ==
'?' && _is_blck_token(rem))
8095 _c4dbgp(
"usty[UNK]: it's a map + this key is complex");
8097 _handle_annotations_before_blck_val_scalar();
8098 m_evt_handler->begin_map_val_block();
8100 m_was_inside_qmrk =
true;
8101 _save_indentation();
8102 _line_progressed(1);
8103 _maybe_skip_whitespace_tokens();
8105 else if(first ==
':' && _is_blck_token(rem))
8107 _c4dbgp(
"usty[UNK]: it's a map with an empty key");
8109 _handle_annotations_before_blck_val_scalar();
8110 m_evt_handler->begin_map_val_block();
8111 m_evt_handler->set_key_scalar_plain_empty();
8113 _save_indentation();
8114 _line_progressed(1);
8115 _maybe_skip_whitespace_tokens();
8117 else if(first ==
'&')
8119 csubstr anchor = _scan_anchor();
8120 _c4dbgpf(
"usty[UNK]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
8121 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8122 const size_t line = m_evt_handler->m_curr->pos.line;
8123 _add_annotation(&m_pending_anchors, anchor, indentation, line);
8124 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8126 else if(first ==
'*')
8128 csubstr ref = _scan_ref_map();
8129 _c4dbgpf(
"usty[UNK]: ref! [{}]~~~{}~~~", ref.len, ref);
8130 if(!_maybe_scan_following_colon())
8132 _c4dbgp(
"usty[UNK]: set val ref");
8133 _handle_annotations_before_blck_val_scalar();
8134 m_evt_handler->set_val_ref(ref);
8138 _c4dbgp(
"usty[UNK]: start new block map, set ref as key");
8139 const size_t startline = m_evt_handler->m_curr->pos.line;
8141 _handle_annotations_before_start_mapblck(startline);
8142 m_evt_handler->begin_map_val_block();
8143 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8144 m_evt_handler->set_key_ref(ref);
8145 _maybe_skip_whitespace_tokens();
8146 _set_indentation(startindent);
8150 else if(first ==
'!')
8152 csubstr tag = _scan_tag();
8153 _c4dbgpf(
"usty[UNK]: val tag! [{}]~~~{}~~~", tag.len, tag);
8156 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8157 const size_t line = m_evt_handler->m_curr->pos.line;
8158 _add_annotation(&m_pending_tags, tag, indentation, line);
8162 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
8163 startindent = m_evt_handler->m_curr->line_contents.indentation;
8164 const size_t startline = m_evt_handler->m_curr->pos.line;
8167 _c4dbgpf(
"usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
8170 _c4dbgp(
"usty[UNK]: scanning single-quoted scalar");
8171 sc = _scan_scalar_squot();
8172 if(!_maybe_scan_following_colon())
8174 _c4dbgp(
"usty[UNK]: set as val");
8175 _handle_annotations_before_blck_val_scalar();
8176 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8177 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8182 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
8184 _handle_annotations_before_start_mapblck(startline);
8185 m_evt_handler->begin_map_val_block();
8186 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8187 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8188 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8189 _set_indentation(startindent);
8191 _maybe_skip_whitespace_tokens();
8194 else if(first ==
'"')
8196 _c4dbgp(
"usty[UNK]: scanning double-quoted scalar");
8197 sc = _scan_scalar_dquot();
8198 if(!_maybe_scan_following_colon())
8200 _c4dbgp(
"usty[UNK]: set as val");
8201 _handle_annotations_before_blck_val_scalar();
8202 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8203 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8208 _c4dbgp(
"usty[UNK]: start new block map, set double-quoted scalar as key");
8210 _handle_annotations_before_start_mapblck(startline);
8211 m_evt_handler->begin_map_val_block();
8212 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8213 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8214 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8215 _set_indentation(startindent);
8217 _maybe_skip_whitespace_tokens();
8220 else if(first ==
'|')
8222 _c4dbgp(
"usty[UNK]: scanning block-literal scalar");
8224 _scan_block(&sb, startindent);
8225 _c4dbgp(
"usty[UNK]: set as val");
8226 _handle_annotations_before_blck_val_scalar();
8227 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8228 m_evt_handler->set_val_scalar_literal(maybe_filtered);
8231 else if(first ==
'>')
8233 _c4dbgp(
"usty[UNK]: scanning block-folded scalar");
8235 _scan_block(&sb, startindent);
8236 _c4dbgp(
"usty[UNK]: set as val");
8237 _handle_annotations_before_blck_val_scalar();
8238 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8239 m_evt_handler->set_val_scalar_folded(maybe_filtered);
8242 else if(_scan_scalar_plain_unk(&sc))
8244 _c4dbgp(
"usty[UNK]: got a plain scalar");
8245 if(!_maybe_scan_following_colon())
8247 _c4dbgp(
"usty[UNK]: set as val");
8248 _handle_annotations_before_blck_val_scalar();
8249 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8250 m_evt_handler->set_val_scalar_plain(maybe_filtered);
8255 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
8257 _handle_annotations_before_start_mapblck(startline);
8258 m_evt_handler->begin_map_val_block();
8259 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8260 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8261 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8262 _set_indentation(startindent);
8264 _maybe_skip_whitespace_tokens();
8278 template<
class EventHandler>
8281 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8285 m_evt_handler->start_parse(filename.str, src, &_s_relocate_arena,
this);
8286 m_evt_handler->begin_stream();
8287 while( ! _finished_file())
8290 while( ! _finished_line())
8293 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8298 else if(has_any(
RMAP))
8302 else if(has_any(
RUNK))
8308 _c4err(
"internal error");
8311 if(_finished_file())
8316 m_evt_handler->finish_parse();
8322 template<
class EventHandler>
8325 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8329 m_evt_handler->start_parse(filename.str, src, &_s_relocate_arena,
this);
8330 m_evt_handler->begin_stream();
8331 while( ! _finished_file())
8334 while( ! _finished_line())
8337 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8348 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
8357 else if(has_any(
RBLCK))
8361 _handle_seq_block();
8365 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
8366 _handle_map_block();
8369 else if(has_any(
RUNK))
8373 else if(has_any(
USTY))
8379 _c4err(
"internal error");
8382 if(_finished_file())
8387 m_evt_handler->finish_parse();
8396 #undef _c4dbgnextline
8398 #if defined(_MSC_VER)
8399 # pragma warning(pop)
8400 #elif defined(__clang__)
8401 # pragma clang diagnostic pop
8402 #elif defined(__GNUC__)
8403 # pragma GCC diagnostic pop
Lightweight generic type-safe wrappers for converting individual values to/from strings.
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&) noexcept
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
void err_parse(ErrorDataParse const &errdata, const char *msg)
trigger a parse error to its respective handler, with a non-formatted error message.
enum c4::yml::BlockChomp_ BlockChomp_e
@ CHOMP_CLIP
single newline at end (default)
@ CHOMP_KEEP
all newlines from end (+)
@ CHOMP_STRIP
no newline at end (-)
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
@ npos
a null string position
@ RTOP
reading at top level
@ RSET
the (implicit) map being read is a !!set.
@ RNXT
read next val or keyval
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a scalar as key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ QSCL
stored scalar was quoted
@ RBLCK
reading in block mode
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a scalar as val
@ RFLOW
reading is inside explicit flow chars: [] or {}
int ParserFlag_t
data type for ParserState_e
size_t to_chars(substr buf, escaped_scalar e)
formatting implementation to escape a scalar with x
@ UTF16BE
UTF16, Big-Endian.
@ UTF16LE
UTF16, Little-Endian.
@ NOBOM
No Byte Order Mark was found.
@ UTF32BE
UTF32, Big-Endian.
@ UTF32LE
UTF32, Little-Endian.
enum c4::yml::Encoding_ Encoding_e
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
#define _RYML_WITHOUT_TAB_TOKENS(...)
#define _ryml_relocate(s)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _RYML_WITH_TAB_TOKENS(...)
Options to give to the parser to control its behavior.
utilities for UTF and Byte Order Mark