1 #ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2 #define _C4_YML_PARSE_ENGINE_DEF_HPP_
5 #include "c4/error.hpp"
11 #include "c4/yml/detail/dbgprint.hpp"
14 #include <c4/dump.hpp>
15 #include "c4/yml/detail/print.hpp"
17 do { RYML_DEBUG_BREAK(); this->_err(RYML_LOC_HERE(), __VA_ARGS__); } while(0)
20 this->_err(RYML_LOC_HERE(), __VA_ARGS__)
24 #if defined(RYML_WITH_TAB_TOKENS)
25 #define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
26 #define _RYML_WITHOUT_TAB_TOKENS(...)
27 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
29 #define _RYML_WITH_TAB_TOKENS(...)
30 #define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
31 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
36 #define _c4dbgnextline() \
38 _c4dbgq("\n-----------"); \
39 _c4dbgt("handling line={}, offset={}B", \
40 m_evt_handler->m_curr->pos.line, \
41 m_evt_handler->m_curr->pos.offset); \
46 # pragma warning(push)
47 # pragma warning(disable: 4296)
48 # pragma warning(disable: 4702)
49 #elif defined(__clang__)
50 # pragma clang diagnostic push
51 # pragma clang diagnostic ignored "-Wtype-limits"
52 # pragma clang diagnostic ignored "-Wformat-nonliteral"
53 # pragma clang diagnostic ignored "-Wold-style-cast"
54 #elif defined(__GNUC__)
55 # pragma GCC diagnostic push
56 # pragma GCC diagnostic ignored "-Wtype-limits"
57 # pragma GCC diagnostic ignored "-Wformat-nonliteral"
58 # pragma GCC diagnostic ignored "-Wold-style-cast"
60 # pragma GCC diagnostic ignored "-Wduplicated-branches"
71 C4_HOT C4_ALWAYS_INLINE
bool _is_blck_token(csubstr s) noexcept
73 _RYML_ASSERT_BASIC(s.len > 0);
74 _RYML_ASSERT_BASIC(s.str[0] ==
'-' || s.str[0] ==
':' || s.str[0] ==
'?');
78 inline bool _is_doc_begin_token(csubstr s)
80 _RYML_ASSERT_BASIC(s.begins_with(
'-'));
81 _RYML_ASSERT_BASIC(!s.ends_with(
"\n"));
82 _RYML_ASSERT_BASIC(!s.ends_with(
"\r"));
83 return (s.len >= 3 && s.str[1] ==
'-' && s.str[2] ==
'-')
87 inline bool _is_doc_end_token(csubstr s)
89 _RYML_ASSERT_BASIC(s.begins_with(
'.'));
90 _RYML_ASSERT_BASIC(!s.ends_with(
"\n"));
91 _RYML_ASSERT_BASIC(!s.ends_with(
"\r"));
92 return (s.len >= 3 && s.str[1] ==
'.' && s.str[2] ==
'.')
96 inline bool _is_doc_token(csubstr s) noexcept
124 return (s.str[1] ==
'-' && s.str[2] ==
'-')
128 return (s.str[1] ==
'.' && s.str[2] ==
'.')
135 inline size_t _is_special_json_scalar(csubstr s)
137 _RYML_ASSERT_BASIC(s.len);
141 if(s.len >= 5 && s.begins_with(
"false"))
145 if(s.len >= 4 && s.begins_with(
"true"))
149 if(s.len >= 4 && s.begins_with(
"null"))
159 C4_ALWAYS_INLINE
size_t _extend_from_combined_newline(
char nl,
char following)
161 return (nl ==
'\n' && following ==
'\r') || (nl ==
'\r' && following ==
'\n');
165 inline substr from_next_line(substr rem)
167 size_t nlpos = rem.first_of(
"\r\n");
170 const char nl = rem[nlpos];
171 rem = rem.right_of(nlpos);
174 if(_extend_from_combined_newline(nl, rem.front()))
182 inline size_t _count_following_newlines(csubstr r,
size_t *C4_RESTRICT i)
184 _RYML_ASSERT_BASIC(r[*i] ==
'\n');
185 size_t numnl_following = 0;
187 for( ; *i < r.len; ++(*i))
189 if(r.str[*i] ==
'\n')
192 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
197 return numnl_following;
202 inline size_t _count_following_newlines(csubstr r,
size_t *C4_RESTRICT i,
size_t indentation)
204 _RYML_ASSERT_BASIC(r[*i] ==
'\n');
205 size_t numnl_following = 0;
209 for( ; *i < r.len; ++(*i))
211 if(r.str[*i] ==
'\n')
214 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
222 for( ; *i < r.len; ++(*i))
224 if(r.str[*i] ==
'\n')
228 size_t stop = *i + indentation;
229 for( ; *i < r.len; ++(*i))
231 if(r.str[*i] !=
' ' && r.str[*i] !=
'\r')
233 _RYML_ASSERT_BASIC(*i < stop);
238 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
244 return numnl_following;
254 template<
class EventHandler>
261 template<
class EventHandler>
266 , m_evt_handler(evt_handler)
267 , m_pending_anchors()
269 , m_was_inside_qmrk(false)
273 , m_newline_offsets()
274 , m_newline_offsets_size(0)
275 , m_newline_offsets_capacity(0)
276 , m_newline_offsets_buf()
278 _RYML_CHECK_BASIC(evt_handler);
281 template<
class EventHandler>
283 : m_options(that.m_options)
284 , m_file(that.m_file)
286 , m_evt_handler(that.m_evt_handler)
287 , m_pending_anchors(that.m_pending_anchors)
288 , m_pending_tags(that.m_pending_tags)
289 , m_was_inside_qmrk(
false)
293 , m_newline_offsets(that.m_newline_offsets)
294 , m_newline_offsets_size(that.m_newline_offsets_size)
295 , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
296 , m_newline_offsets_buf(that.m_newline_offsets_buf)
301 template<
class EventHandler>
303 : m_options(that.m_options)
304 , m_file(that.m_file)
306 , m_evt_handler(that.m_evt_handler)
307 , m_pending_anchors(that.m_pending_anchors)
308 , m_pending_tags(that.m_pending_tags)
309 , m_was_inside_qmrk(false)
313 , m_newline_offsets()
314 , m_newline_offsets_size()
315 , m_newline_offsets_capacity()
316 , m_newline_offsets_buf()
318 if(that.m_newline_offsets_capacity)
320 _resize_locations(that.m_newline_offsets_capacity);
321 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
322 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size *
sizeof(
size_t));
323 m_newline_offsets_size = that.m_newline_offsets_size;
327 template<
class EventHandler>
331 m_options = (that.m_options);
332 m_file = (that.m_file);
333 m_buf = (that.m_buf);
334 m_evt_handler = that.m_evt_handler;
335 m_pending_anchors = that.m_pending_anchors;
336 m_pending_tags = that.m_pending_tags;
337 m_was_inside_qmrk = that.m_was_inside_qmrk;
338 m_doc_empty = that.m_doc_empty;
339 m_prev_colon = that.m_prev_colon;
340 m_encoding = that.m_encoding;
341 m_newline_offsets = (that.m_newline_offsets);
342 m_newline_offsets_size = (that.m_newline_offsets_size);
343 m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
344 m_newline_offsets_buf = (that.m_newline_offsets_buf);
349 template<
class EventHandler>
355 m_options = (that.m_options);
356 m_file = (that.m_file);
357 m_buf = (that.m_buf);
358 m_evt_handler = that.m_evt_handler;
359 m_pending_anchors = that.m_pending_anchors;
360 m_pending_tags = that.m_pending_tags;
361 m_was_inside_qmrk = that.m_was_inside_qmrk;
362 m_doc_empty = that.m_doc_empty;
363 m_prev_colon = that.m_prev_colon;
364 m_encoding = that.m_encoding;
365 if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
366 _resize_locations(that.m_newline_offsets_capacity);
367 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
368 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
369 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size *
sizeof(
size_t));
370 m_newline_offsets_size = that.m_newline_offsets_size;
371 m_newline_offsets_buf = that.m_newline_offsets_buf;
376 template<
class EventHandler>
383 m_pending_anchors = {};
385 m_was_inside_qmrk =
false;
389 m_newline_offsets = {};
390 m_newline_offsets_size = {};
391 m_newline_offsets_capacity = {};
392 m_newline_offsets_buf = {};
395 template<
class EventHandler>
396 void ParseEngine<EventHandler>::_free()
398 if(m_newline_offsets)
400 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
401 m_newline_offsets =
nullptr;
402 m_newline_offsets_size = 0u;
403 m_newline_offsets_capacity = 0u;
404 m_newline_offsets_buf =
nullptr;
411 template<
class EventHandler>
412 void ParseEngine<EventHandler>::_reset()
414 m_pending_anchors = {};
417 m_was_inside_qmrk =
false;
422 if(m_options.locations())
424 _prepare_locations();
431 template<
class EventHandler>
432 void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena)
434 #define _ryml_relocate(s) \
435 if((s).is_sub(prev_arena)) \
437 (s).str = next_arena.str + ((s).str - prev_arena.str); \
441 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
443 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
445 #undef _ryml_relocate
448 template<
class EventHandler>
449 void ParseEngine<EventHandler>::_s_relocate_arena(
void* data, csubstr prev_arena, substr next_arena)
451 ((ParseEngine*)data)->_relocate_arena(prev_arena, next_arena);
458 template<
class EventHandler>
459 template<
class DumpFn>
460 C4_NO_INLINE
void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn)
const
462 auto const *
const C4_RESTRICT st = m_evt_handler->m_curr;
463 auto const& lc = st->line_contents;
464 csubstr contents = lc.full.first(lc.num_cols);
468 size_t offs = 3u +
to_chars(substr{}, st->pos.line) +
to_chars(substr{}, st->pos.col);
471 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{}:", m_file);
472 offs += m_file.len + 1;
474 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{}:{}: ", st->pos.line, st->pos.col);
475 csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
476 csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr(
"..."));
477 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len);
479 size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin());
480 size_t lastcol = firstcol + lc.rem.len;
481 for(
size_t i = 0; i < offs + firstcol; ++i)
482 std::forward<DumpFn>(dumpfn)(
" ");
483 std::forward<DumpFn>(dumpfn)(
"^");
484 for(
size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i)
485 std::forward<DumpFn>(dumpfn)(
"~");
486 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
490 std::forward<DumpFn>(dumpfn)(
"\n");
495 _dbg_dump(std::forward<DumpFn>(dumpfn),
"top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
503 template<
class EventHandler>
504 template<
class ...Args>
505 C4_NORETURN C4_NO_INLINE
void ParseEngine<EventHandler>::_err(Location
const& cpploc, Location
const& ymlloc,
const char* fmt, Args
const& ...args)
const
507 m_evt_handler->cancel_parse();
508 err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, ymlloc}, fmt, args...);
511 template<
class EventHandler>
512 template<
class ...Args>
513 C4_NORETURN C4_NO_INLINE
void ParseEngine<EventHandler>::_err(Location
const& cpploc,
const char *fmt, Args
const& ...args)
const
515 m_evt_handler->cancel_parse();
516 err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, m_evt_handler->m_curr->pos}, fmt, args...);
522 template<
class EventHandler>
523 template<
class ...Args>
524 void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args
const& ...args)
const
528 _dbg_printf(fmt, args...);
530 _fmt_msg(_dbg_dumper);
537 template<
class EventHandler>
538 bool ParseEngine<EventHandler>::_finished_file()
const
540 bool ret = m_evt_handler->m_curr->pos.offset >= m_buf.len;
543 _c4dbgp(
"finished file!!!");
548 template<
class EventHandler>
549 C4_HOT C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_finished_line()
const
551 return m_evt_handler->m_curr->line_contents.rem.empty();
557 template<
class EventHandler>
558 void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
560 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
566 _c4dbgpf(
"skip {} whitespace characters", pos);
567 _line_progressed(pos);
571 template<
class EventHandler>
572 void ParseEngine<EventHandler>::_maybe_skipchars(
char c)
574 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
575 if(rem.len && rem.str[0] == c)
577 size_t pos = rem.first_not_of(c);
580 _c4dbgpf(
"skip {}x'{}'", pos, c);
581 _line_progressed(pos);
585 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
586 template<
class EventHandler>
587 void ParseEngine<EventHandler>::_maybe_skipchars_up_to(
char c,
size_t max_to_skip)
589 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
590 if(rem.len && rem.str[0] == c)
592 size_t pos = rem.first_not_of(c);
595 if(pos > max_to_skip)
597 _c4dbgpf(
"skip {}x'{}'", pos, c);
598 _line_progressed(pos);
603 template<
class EventHandler>
605 void ParseEngine<EventHandler>::_skipchars(
const char (&chars)[N])
607 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars));
608 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
610 pos = m_evt_handler->m_curr->line_contents.rem.len;
611 _c4dbgpf(
"skip {} characters", pos);
612 _line_progressed(pos);
615 template<
class EventHandler>
616 void ParseEngine<EventHandler>::_skip_comment()
618 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'));
619 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full));
620 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
621 csubstr line = m_evt_handler->m_curr->line_contents.full;
623 if(!line.begins_with(
'#'))
625 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.str > line.str);
626 const char c = line[(size_t)(rem.str - line.str - 1)];
627 if(C4_UNLIKELY(c !=
' ' && c !=
'\t'))
628 _c4err(
"comment not preceded by whitespace");
632 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.str == line.str);
634 _c4dbgpf(
"comment was '{}'", rem);
635 _line_progressed(rem.len);
638 template<
class EventHandler>
639 void ParseEngine<EventHandler>::_maybe_skip_comment()
641 csubstr s = m_evt_handler->m_curr->line_contents.rem.triml(
' ');
642 if(s.begins_with(
'#'))
644 _line_progressed((
size_t)(s.str - m_evt_handler->m_curr->line_contents.rem.str));
649 template<
class EventHandler>
650 bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
652 if(m_evt_handler->m_curr->line_contents.rem.len)
654 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
656 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
658 pos = m_evt_handler->m_curr->line_contents.rem.len;
659 _c4dbgpf(
"skip {}x'{}'", pos,
' ');
660 _line_progressed(pos);
662 if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] ==
':'))
664 _c4dbgp(
"found ':' colon next");
672 template<
class EventHandler>
673 bool ParseEngine<EventHandler>::_maybe_scan_following_comma() noexcept
675 if(m_evt_handler->m_curr->line_contents.rem.len)
677 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
679 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
681 pos = m_evt_handler->m_curr->line_contents.rem.len;
682 _c4dbgpf(
"skip {}x'{}'", pos,
' ');
683 _line_progressed(pos);
685 if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] ==
','))
687 _c4dbgp(
"found ',' comma next");
698 template<
class EventHandler>
699 csubstr ParseEngine<EventHandler>::_scan_anchor()
701 csubstr s = m_evt_handler->m_curr->line_contents.rem;
702 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'&'));
703 csubstr anchor = s.range(1, s.first_of(
' '));
704 _line_progressed(1u + anchor.len);
705 _maybe_skipchars(
' ');
709 template<
class EventHandler>
710 csubstr ParseEngine<EventHandler>::_scan_ref_seq()
712 csubstr s = m_evt_handler->m_curr->line_contents.rem;
713 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'));
714 csubstr ref = s.first(s.first_of(
",] :"));
715 _line_progressed(ref.len);
719 template<
class EventHandler>
720 csubstr ParseEngine<EventHandler>::_scan_ref_map()
722 csubstr s = m_evt_handler->m_curr->line_contents.rem;
723 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'));
724 csubstr ref = s.first(s.first_of(
",} "));
725 _line_progressed(ref.len);
729 template<
class EventHandler>
730 csubstr ParseEngine<EventHandler>::_scan_tag()
732 csubstr rem = m_evt_handler->m_curr->line_contents.rem.triml(
' ');
733 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.begins_with(
'!'));
735 if(rem.begins_with(
"!!"))
737 _c4dbgp(
"begins with '!!'");
739 t = rem.left_of(rem.first_of(
" ,"));
741 t = rem.left_of(rem.first_of(
' '));
743 else if(rem.begins_with(
"!<"))
745 _c4dbgp(
"begins with '!<'");
746 t = rem.left_of(rem.first_of(
'>'),
true);
748 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
749 else if(rem.begins_with(
"!h!"))
751 _c4dbgp(
"begins with '!h!'");
752 t = rem.left_of(rem.first_of(
' '));
757 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.begins_with(
'!'));
758 _c4dbgp(
"begins with '!'");
760 t = rem.left_of(rem.first_of(
" ,"));
762 t = rem.left_of(rem.first_of(
' '));
764 _line_progressed(t.len);
765 _maybe_skip_whitespace_tokens();
772 template<
class EventHandler>
773 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
775 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.empty());
792 _c4dbgpf(
"not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
809 _c4dbgpf(
"not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
816 _c4err(
"invalid token \":{}\"", _c4prc(s.str[1]));
836 _c4dbgpf(
"not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
842 _c4err(
"invalid token \"?{}\"", _c4prc(s.str[1]));
861 template<
class EventHandler>
862 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
864 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP));
865 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK));
866 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ|
RSEQIMAP));
867 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW));
868 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL));
870 substr s = m_buf.sub(m_evt_handler->m_curr->pos.offset);
871 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
872 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
'\n'));
874 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem));
876 if(!s.len || !_is_valid_start_scalar_plain_flow(s))
879 _c4dbgp(
"scanning seqflow scalar...");
881 bool needs_filter =
false;
885 for( ; offs < s.len; ++offs, ++col)
887 const char c = s.str[offs];
892 _c4dbgpf(
"found terminating character at {}: '{}'", offs, c);
893 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, offs > 0);
896 _c4dbgpf(
"found newline. offs={} col={}", offs, col);
901 if(next_line.begins_with_any(
",]#"))
903 _c4dbgpf(
"found terminating character beginning next line: '{}'", next_line.str[0]);
909 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
918 _c4dbgp(
"found suspicious ':'");
922 char next = s.str[offsp1];
923 _c4dbgpf(
"next char is '{}'", _c4prc(next));
926 csubstr after = s.sub(offsp1).triml(
'\r');
930 _c4dbgpf(
"skip \\r to '{}'", _c4prc(next));
936 _c4dbgp(
"map starting!");
941 _c4dbgp(
"':' nothing to see here");
946 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.len == offsp1);
947 _line_progressed(col);
948 _c4err(
"missing termination: '{}'", c);
953 _c4dbgp(
"found suspicious '#'");
954 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, offs > 0);
955 char prev = s.str[offs - 1];
958 _c4dbgpf(
"found terminating character at {}: '{}'", offs, c);
966 _line_progressed(col);
967 _c4err(
"invalid character: '{}'", c);
975 _line_progressed(col);
978 sc->needs_filter = needs_filter;
980 _c4prscalar(
"scanned plain scalar", sc->scalar,
true);
985 template<
class EventHandler>
986 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
988 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ) || has_any(
RSEQIMAP));
989 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK));
990 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP|
RSEQIMAP));
991 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW));
992 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK));
994 substr s = m_evt_handler->m_curr->line_contents.rem;
995 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
1000 if(!_is_valid_start_scalar_plain_flow(s))
1003 _c4dbgp(
"scanning scalar...");
1005 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1006 bool needs_filter =
false;
1009 for(
size_t i = 0; i < s.len; ++i)
1011 const char c = s.str[i];
1016 _line_progressed(i);
1017 _c4dbgpf(
"found terminating character: '{}'", c);
1020 if(s.len == i+1 || s.str[i+1] ==
' ' || s.str[i+1] ==
',' || s.str[i+1] ==
'}' _RYML_WITH_TAB_TOKENS(|| s.str[i+1] ==
'\t'))
1022 _line_progressed(i);
1023 _c4dbgpf(
"found terminating character: '{}'", c);
1029 _line_progressed(i);
1030 _c4err(
"invalid character: '{}'", c);
1033 _line_progressed(i);
1037 _c4err(
"invalid character: '{}'", c);
1042 _line_progressed(i);
1043 _c4dbgpf(
"found terminating character: '{}'", c);
1051 _c4dbgp(
"next line!");
1052 _line_progressed(s.len);
1053 if(!_finished_file())
1055 _c4dbgp(
"next line!");
1061 _c4dbgp(
"file finished!");
1064 s = m_evt_handler->m_curr->line_contents.rem;
1065 needs_filter =
true;
1071 sc->needs_filter = needs_filter;
1073 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1075 return sc->scalar.len > 0u;
1078 template<
class EventHandler>
1079 bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1081 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP));
1082 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK));
1083 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ));
1084 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW));
1086 substr s = m_evt_handler->m_curr->line_contents.rem;
1087 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
1092 _c4dbgp(
"scanning scalar...");
1099 _c4dbgp(
"not a scalar.");
1104 const size_t len = _is_special_json_scalar(s);
1107 sc->scalar = s.first(len);
1108 sc->needs_filter =
false;
1109 _c4dbgpf(
"special json scalar: '{}'", sc->scalar);
1110 _line_progressed(len);
1117 for( ; i < s.len; ++i)
1119 const char c = s.str[i];
1126 _c4dbgpf(
"found terminating character: '{}'", c);
1129 if(!i || s.str[i-1] ==
' ')
1131 _c4dbgpf(
"found terminating character: '{}'", c);
1142 if(C4_LIKELY(i > 0))
1144 _line_progressed(i);
1145 sc->scalar = s.first(i);
1146 sc->needs_filter =
false;
1147 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1154 template<
class EventHandler>
1155 bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1157 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ));
1158 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK));
1159 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP));
1160 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW));
1161 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL));
1163 substr s = m_evt_handler->m_curr->line_contents.rem;
1164 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
1169 _c4dbgp(
"scanning scalar...");
1172 const size_t len = _is_special_json_scalar(s);
1175 sc->scalar = s.first(len);
1176 sc->needs_filter =
false;
1177 _c4dbgpf(
"special json scalar: '{}'", sc->scalar);
1178 _line_progressed(len);
1185 for( ; i < s.len; ++i)
1187 const char c = s.str[i];
1194 _c4dbgpf(
"found terminating character: '{}'", c);
1197 if(!i || s.str[i-1] ==
' ')
1199 _c4dbgpf(
"found terminating character: '{}'", c);
1210 if(C4_LIKELY(i > 0))
1212 _line_progressed(i);
1213 sc->scalar = s.first(i);
1214 sc->needs_filter =
false;
1215 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1222 template<
class EventHandler>
1223 bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1225 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s[0] ==
'-');
1226 return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_begin_token(s));
1229 template<
class EventHandler>
1230 bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1232 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s[0] ==
'.');
1233 return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_end_token(s));
1236 template<
class EventHandler>
1237 bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc,
size_t indentation)
1239 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW));
1240 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP));
1241 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK|
RUNK|
USTY));
1243 substr s = m_evt_handler->m_curr->line_contents.rem;
1244 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
1252 if(_is_blck_token(s))
1256 else if(_is_doc_begin(s))
1258 _c4dbgp(
"token is doc start");
1264 if(_is_blck_token(s))
1277 _c4dbgp(
"token is doc end");
1283 _c4dbgpf(
"plain scalar! indentation={}", indentation);
1285 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1286 const size_t start_line = m_evt_handler->m_curr->pos.line;
1288 bool needs_filter =
false;
1291 _c4dbgpf(
"plain scalar line: [{}]~~~{}~~~", s.len, s);
1292 for(
size_t i = 0; i < s.len; ++i)
1294 const char curr = s.str[i];
1299 _c4dbgpf(
"[{}]: got suspicious ':'", i);
1303 _c4dbgpf(
"followed by '{}'", i+1 == s.len ? csubstr(
"\\n") : _c4prc(s.str[i+1]));
1304 _line_progressed(i);
1306 if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1308 _c4dbgp(
"start line. scalar ends here");
1319 while(j + 1 < s.len && s.str[j+1] ==
':')
1321 _c4dbgp(
"skip colon");
1324 i = j > i ? j-1 : i;
1325 _c4dbgp(
"nothing to see here");
1329 _c4dbgp(
"got suspicious '#'");
1330 if(!i || (s.str[i-1] ==
' ' || s.str[i-1] ==
'\t'))
1332 _c4dbgp(
"comment! scalar ends here");
1333 _line_progressed(i);
1338 _c4dbgp(
"nothing to see here");
1343 _line_progressed(s.len);
1344 csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1345 next_peeked = next_peeked.trimr(
"\n\r");
1346 const size_t next_indentation = next_peeked.first_not_of(
' ');
1347 _c4dbgpf(
"indentation curr={} next={}", indentation, next_indentation);
1348 if(next_indentation < indentation)
1350 _c4dbgp(
"smaller indentation! scalar ended");
1353 else if(next_indentation == 0 && next_peeked.len > 0)
1355 const char first = next_peeked.str[0];
1359 next_peeked = next_peeked.trimr(
"\n\r");
1360 _c4dbgpf(
"doc begin? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ?
"..." :
"");
1361 if(_is_doc_begin_token(next_peeked))
1363 _c4dbgp(
"doc begin! scalar ended");
1368 next_peeked = next_peeked.trimr(
"\n\r");
1369 _c4dbgpf(
"doc end? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ?
"..." :
"");
1370 if(_is_doc_end_token(next_peeked))
1372 _c4dbgp(
"doc end! scalar ended");
1379 _c4dbgp(
"next line!");
1380 if(!_finished_file())
1382 _c4dbgp(
"next line!");
1388 _c4dbgp(
"file finished!");
1391 s = m_evt_handler->m_curr->line_contents.rem;
1392 needs_filter =
true;
1397 sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(
" \n\r\t");
1398 sc->needs_filter = needs_filter;
1400 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1405 template<
class EventHandler>
1406 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc)
1408 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP));
1409 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW));
1410 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP));
1411 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ));
1412 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK));
1413 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL));
1414 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1417 template<
class EventHandler>
1418 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc)
1420 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ));
1421 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW));
1422 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP));
1423 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK));
1424 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK));
1425 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1428 template<
class EventHandler>
1429 bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc)
1431 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RUNK|
USTY));
1432 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1438 template<
class EventHandler>
1439 substr ParseEngine<EventHandler>::_peek_next_line(
size_t pos)
const
1443 pos = pos ==
npos ? m_evt_handler->m_curr->pos.offset : pos;
1444 if(pos >= m_buf.len)
1448 rem = from_next_line(m_buf.sub(pos));
1453 nlpos = rem.first_of(
"\r\n");
1455 nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1456 rem = rem.left_of(nlpos,
true);
1458 _c4dbgpf(
"peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr(
"\r\n"));
1462 _c4dbgpf(
"peek next line @ {}: (len=0)''", pos);
1468 template<
class EventHandler>
1469 void ParseEngine<EventHandler>::_scan_line()
1471 if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < m_buf.len))
1472 m_evt_handler->m_curr->line_contents.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
1474 m_evt_handler->m_curr->line_contents.reset_with_next_line(m_buf.last(0), 0);
1477 template<
class EventHandler>
1478 void ParseEngine<EventHandler>::_line_progressed(
size_t ahead)
1480 _c4dbgpf(
"line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}",
1481 m_evt_handler->m_curr->pos.line,
1482 m_evt_handler->m_curr->line_contents.full.len,
1483 ahead, m_evt_handler->m_curr->pos.col,
1484 m_evt_handler->m_curr->pos.col+ahead,
1485 m_evt_handler->m_curr->pos.offset,
1486 m_evt_handler->m_curr->pos.offset+ahead);
1487 m_evt_handler->m_curr->pos.offset += ahead;
1488 m_evt_handler->m_curr->pos.col += ahead;
1489 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.num_cols+1);
1490 m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1493 template<
class EventHandler>
1494 void ParseEngine<EventHandler>::_line_ended()
1496 _c4dbgpf(
"line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1497 m_evt_handler->m_curr->pos.line,
1498 m_evt_handler->m_curr->line_contents.full.len,
1499 m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols,
1500 m_evt_handler->m_curr->pos.col, 1);
1501 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.num_cols + 1);
1502 m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1503 ++m_evt_handler->m_curr->pos.line;
1504 m_evt_handler->m_curr->pos.col = 1;
1507 template<
class EventHandler>
1508 void ParseEngine<EventHandler>::_line_ended_undo()
1510 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u);
1511 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u);
1512 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols);
1513 const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1514 _c4dbgpf(
"line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1515 m_evt_handler->m_curr->pos.offset -= delta;
1516 --m_evt_handler->m_curr->pos.line;
1517 m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.num_cols + 1u;
1520 m_evt_handler->m_curr->line_contents.rem = m_buf.sub(m_evt_handler->m_curr->pos.offset, 0);
1525 template<
class EventHandler>
1526 void ParseEngine<EventHandler>::_set_indentation(
size_t indentation)
1528 m_evt_handler->m_curr->indref = indentation;
1529 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1532 template<
class EventHandler>
1533 void ParseEngine<EventHandler>::_save_indentation()
1535 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full));
1536 m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1537 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1543 template<
class EventHandler>
1544 void ParseEngine<EventHandler>::_end_map_flow()
1546 bool multiline = m_options.detect_flow_ml() && m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1547 _c4dbgpf(
"mapflow: end, multiline={}", multiline);
1548 m_evt_handler->end_map_flow(multiline);
1551 template<
class EventHandler>
1552 void ParseEngine<EventHandler>::_end_seq_flow()
1554 bool multiline = m_options.detect_flow_ml() && m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1555 _c4dbgpf(
"seqflow: end, multiline={}", multiline);
1556 m_evt_handler->end_seq_flow(multiline);
1559 template<
class EventHandler>
1560 void ParseEngine<EventHandler>::_end_map_blck()
1562 _c4dbgp(
"mapblck: end");
1565 _c4dbgp(
"mapblck: set missing val");
1566 _handle_annotations_before_blck_val_scalar();
1567 m_evt_handler->set_val_scalar_plain_empty();
1569 else if(has_any(
QMRK))
1571 _c4dbgp(
"mapblck: set missing keyval");
1572 _handle_annotations_before_blck_key_scalar();
1573 m_evt_handler->set_key_scalar_plain_empty();
1574 _handle_annotations_before_blck_val_scalar();
1575 m_evt_handler->set_val_scalar_plain_empty();
1577 m_evt_handler->end_map_block();
1580 template<
class EventHandler>
1581 void ParseEngine<EventHandler>::_end_seq_blck()
1585 _c4dbgp(
"seqblck: set missing val");
1586 _handle_annotations_before_blck_val_scalar();
1587 m_evt_handler->set_val_scalar_plain_empty();
1589 m_evt_handler->end_seq_block();
1592 template<
class EventHandler>
1593 void ParseEngine<EventHandler>::_end2_map()
1595 _c4dbgp(
"map: end");
1596 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP));
1603 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW));
1604 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
USTY));
1605 m_evt_handler->_pop();
1609 template<
class EventHandler>
1610 void ParseEngine<EventHandler>::_end2_seq()
1612 _c4dbgp(
"seq: end");
1613 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ));
1620 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW));
1621 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
USTY));
1622 m_evt_handler->_pop();
1626 template<
class EventHandler>
1627 void ParseEngine<EventHandler>::_begin2_doc()
1629 _c4dbgp(
"begin_doc");
1632 m_evt_handler->begin_doc();
1633 m_evt_handler->m_curr->indref = 0;
1636 template<
class EventHandler>
1637 void ParseEngine<EventHandler>::_begin2_doc_expl()
1639 _c4dbgp(
"begin_doc_expl");
1642 m_evt_handler->begin_doc_expl();
1643 m_evt_handler->m_curr->indref = 0;
1646 template<
class EventHandler>
1647 void ParseEngine<EventHandler>::_end2_doc()
1649 _c4dbgp(
"doc: end");
1650 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC));
1651 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1653 _c4dbgp(
"doc was empty; add empty val");
1654 _handle_annotations_before_blck_val_scalar();
1655 m_evt_handler->set_val_scalar_plain_empty();
1657 m_evt_handler->end_doc();
1661 template<
class EventHandler>
1662 void ParseEngine<EventHandler>::_end2_doc_expl()
1664 _c4dbgp(
"doc: end");
1665 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1667 _c4dbgp(
"doc: no children; add empty val");
1668 _handle_annotations_before_blck_val_scalar();
1669 m_evt_handler->set_val_scalar_plain_empty();
1671 m_evt_handler->end_doc_expl();
1675 template<
class EventHandler>
1676 void ParseEngine<EventHandler>::_maybe_begin_doc()
1680 _c4dbgp(
"doc must be started");
1684 template<
class EventHandler>
1685 void ParseEngine<EventHandler>::_maybe_end_doc()
1689 _c4dbgp(
"doc must be finished");
1692 else if(m_doc_empty && (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1694 _c4dbgp(
"no doc to finish, but pending annotations");
1695 m_evt_handler->begin_doc();
1696 _handle_annotations_before_blck_val_scalar();
1697 m_evt_handler->set_val_scalar_plain_empty();
1698 m_evt_handler->end_doc();
1702 template<
class EventHandler>
1703 void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1705 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
1706 if(m_evt_handler->m_stack[0].flags &
RDOC)
1708 _c4dbgp(
"root is RDOC");
1709 if(m_evt_handler->m_curr->level != 0)
1710 _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1712 else if((m_evt_handler->m_stack.size() > 1) && (m_evt_handler->m_stack[1].flags &
RDOC))
1714 _c4dbgp(
"root is STREAM");
1715 if(m_evt_handler->m_curr->level != 1)
1716 _handle_indentation_pop(&m_evt_handler->m_stack[1]);
1720 _c4err(
"internal error");
1722 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC));
1725 template<
class EventHandler>
1726 void ParseEngine<EventHandler>::_end_doc_suddenly()
1728 _c4dbgp(
"end doc suddenly");
1729 _end_doc_suddenly__pop();
1734 template<
class EventHandler>
1735 void ParseEngine<EventHandler>::_start_doc_suddenly()
1737 _c4dbgp(
"start doc suddenly");
1738 _end_doc_suddenly__pop();
1743 template<
class EventHandler>
1744 void ParseEngine<EventHandler>::_end_stream()
1746 _c4dbgpf(
"end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1748 _c4err(
"missing terminating ]");
1750 _c4err(
"missing terminating }");
1751 if(m_evt_handler->m_stack.size() > 1)
1752 _handle_indentation_pop(m_evt_handler->m_stack.begin());
1759 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1763 m_evt_handler->begin_doc();
1764 _handle_annotations_before_blck_val_scalar();
1765 m_evt_handler->set_val_scalar_plain_empty();
1766 m_evt_handler->end_doc();
1770 m_evt_handler->end_stream();
1774 template<
class EventHandler>
1775 void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState
const* popto)
1777 _c4dbgpf(
"popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ?
"s" :
""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
1778 while(m_evt_handler->m_curr != popto)
1782 _c4dbgpf(
"popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1785 else if(has_any(
RMAP))
1787 _c4dbgpf(
"popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1795 _c4dbgpf(
"current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1798 template<
class EventHandler>
1799 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
1802 using state_type =
typename EventHandler::state;
1803 state_type
const* popto =
nullptr;
1804 auto &stack = m_evt_handler->m_stack;
1805 _RYML_ASSERT_BASIC_(stack.m_callbacks, stack.is_contiguous());
1806 _RYML_ASSERT_BASIC_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1807 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1812 for(state_type
const& s : stack)
1813 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1816 for(state_type
const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
1818 _c4dbgpf(
"searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
1819 if(s->indref == ind)
1821 _c4dbgpf(
"gotit!!! level={} node={}", s->level, s->node_id);
1826 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1828 _c4err(
"parse error: incorrect indentation?");
1830 _handle_indentation_pop(popto);
1833 template<
class EventHandler>
1834 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
1837 using state_type =
typename EventHandler::state;
1838 auto &stack = m_evt_handler->m_stack;
1839 _RYML_ASSERT_BASIC_(stack.m_callbacks, stack.is_contiguous());
1840 _RYML_ASSERT_BASIC_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1841 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1842 state_type
const* popto =
nullptr;
1847 for(state_type
const& s : stack)
1848 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1851 for(state_type
const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s)
1853 _c4dbgpf(
"searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
1858 else if(s->indref == ind)
1860 _c4dbgpf(
"same indentation!!! level={} node={}", s->level, s->node_id);
1861 if(popto && has_any(
RTOP, s) && has_none(
RMAP|
RSEQ, s))
1868 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1869 const size_t first = rem.first_not_of(
' ');
1870 _RYML_ASSERT_BASIC_(stack.m_callbacks, first == ind || first ==
npos);
1871 rem = rem.right_of(first,
true);
1872 _c4dbgpf(
"indentless? rem='{}' first={}", rem, first);
1873 if(rem.begins_with(
'-') && _is_blck_token(rem))
1875 _c4dbgp(
"parent was indentless seq");
1881 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1883 _c4err(
"parse error: incorrect indentation?");
1885 _handle_indentation_pop(popto);
1890 template<
class EventHandler>
1891 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
1897 size_t b = m_evt_handler->m_curr->pos.offset;
1898 substr s = m_buf.sub(b);
1899 if(s.begins_with(
' '))
1902 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1903 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1904 _line_progressed((
size_t)(s.begin() - m_buf.sub(b).begin()));
1906 b = m_evt_handler->m_curr->pos.offset;
1907 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'\''));
1910 _line_progressed(1);
1913 bool needs_filter =
false;
1915 size_t numlines = 1;
1917 while( ! _finished_file())
1919 const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1920 bool line_is_blank =
true;
1921 _c4dbgpf(
"scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_evt_handler->m_curr->pos.line, line);
1922 for(
size_t i = 0; i < line.len; ++i)
1924 const char curr = line.str[i];
1927 const char next = i+1 < line.len ? line.str[i+1] :
'~';
1935 needs_filter =
true;
1939 else if(curr !=
' ')
1941 line_is_blank =
false;
1946 needs_filter = needs_filter
1949 || (_at_line_begin() && line.begins_with(
' '));
1953 _line_progressed(line.len);
1958 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
1959 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] ==
'\'');
1960 _line_progressed(pos + 1);
1961 pos = m_evt_handler->m_curr->pos.offset - b - 1;
1971 _c4err(
"reached end of file while looking for closing quote");
1975 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, pos > 0);
1976 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
1977 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() ==
'\'');
1978 s = s.sub(0, pos-1);
1981 _c4prscalar(
"scanned squoted scalar", s,
true);
1983 return ScannedScalar { s, needs_filter };
1988 template<
class EventHandler>
1989 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
1995 size_t b = m_evt_handler->m_curr->pos.offset;
1996 substr s = m_buf.sub(b);
1997 if(s.begins_with(
' '))
2000 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
2001 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
2002 _line_progressed((
size_t)(s.begin() - m_buf.sub(b).begin()));
2004 b = m_evt_handler->m_curr->pos.offset;
2005 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'"'));
2008 _line_progressed(1);
2011 bool needs_filter =
false;
2013 size_t numlines = 1;
2015 auto *st = m_evt_handler->m_curr;
2016 while( ! _finished_file())
2018 const csubstr line = st->line_contents.rem;
2019 #if defined(__GNUC__) && (__GNUC__ == 11 || __GNUC__ == 8)
2020 C4_DONT_OPTIMIZE(line);
2022 bool line_is_blank =
true;
2023 _c4dbgpf(
"scanning double quoted scalar @ line[{}]: line='{}'", st->pos.line, line);
2024 for(
size_t i = 0; i < line.len; ++i)
2026 const char curr = line.str[i];
2028 line_is_blank =
false;
2032 const char next = i+1 < line.len ? line.str[i+1] :
'~';
2033 needs_filter =
true;
2034 if(next ==
'"' || next ==
'\\')
2037 else if(curr ==
'"')
2045 needs_filter = needs_filter
2048 || (_at_line_begin() && line.begins_with(
' '));
2052 _line_progressed(line.len);
2057 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
2058 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf[st->pos.offset + pos] ==
'"');
2059 _line_progressed(pos + 1);
2060 pos = st->pos.offset - b - 1;
2070 _c4err(
"reached end of file looking for closing quote");
2074 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, pos > 0);
2075 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() ==
'"');
2076 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
2077 s = s.sub(0, pos-1);
2080 _c4prscalar(
"scanned dquoted scalar", s,
true);
2082 return ScannedScalar{s, needs_filter};
2087 template<
class EventHandler>
2088 void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb,
size_t indref)
2090 _c4dbgpf(
"blck: indref={}", indref);
2091 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, indref !=
npos);
2094 csubstr s = m_evt_handler->m_curr->line_contents.rem;
2095 csubstr trimmed = s.triml(
' ');
2096 if(trimmed.str > s.str)
2098 _c4dbgp(
"skipping whitespace");
2099 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, trimmed.str >= s.str);
2100 _line_progressed(
static_cast<size_t>(trimmed.str - s.str));
2103 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'|') || s.begins_with(
'>'));
2105 _c4dbgpf(
"blck: specs=[{}]~~~{}~~~", s.len, s);
2109 size_t indentation =
npos;
2113 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(
"|>"));
2114 csubstr t = s.sub(1);
2115 _c4dbgpf(
"blck: spec is multichar: '{}'", t);
2116 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, t.len >= 1);
2117 size_t pos = t.first_of(
"-+");
2118 _c4dbgpf(
"blck: spec chomp char at {}", pos);
2123 else if(t[pos] ==
'+')
2131 digits = t.left_of(t.first_not_of(
"0123456789"));
2132 if( ! digits.empty())
2134 if(C4_UNLIKELY(digits.len > 1))
2135 _c4err(
"parse error: invalid indentation");
2136 _c4dbgpf(
"blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
2137 if(C4_UNLIKELY( !
c4::atou(digits, &indentation)))
2138 _c4err(
"parse error: could not read indentation as decimal");
2139 if(C4_UNLIKELY( ! indentation))
2140 _c4err(
"parse error: null indentation");
2141 _c4dbgpf(
"blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2142 indentation += m_evt_handler->m_curr->indref;
2146 _c4dbgpf(
"blck: style={} chomp={} indentation={}", s.begins_with(
'>') ?
"fold" :
"literal", chomp==
CHOMP_CLIP ?
"clip" : (chomp==
CHOMP_STRIP ?
"strip" :
"keep"), indentation);
2149 _line_progressed(s.len);
2154 substr raw_block(m_buf.data() + m_evt_handler->m_curr->pos.offset,
size_t(0));
2155 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.str);
2163 size_t num_lines = 0;
2164 size_t first = m_evt_handler->m_curr->pos.line;
2165 size_t provisional_indentation =
npos;
2167 while(( ! _finished_file()))
2170 lc.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
2171 #if defined(__GNUC__) && (__GNUC__ == 12 || __GNUC__ == 13)
2172 C4_DONT_OPTIMIZE(lc.rem);
2174 _c4dbgpf(
"blck: peeking at [{}]~~~{}~~~", lc.rem.trimr(
"\r\n").len, lc.rem.trimr(
"\r\n"));
2176 if(indentation !=
npos)
2178 _c4dbgpf(
"blck: indentation={}", indentation);
2180 if(lc.indentation < indentation && ( ! lc.rem.trim(
" \t").empty()))
2184 _c4dbgpf(
"blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2188 _c4err(
"indentation decreased without any scalar");
2192 else if(indentation == 0)
2194 _c4dbgpf(
"blck: noindent. lc.rem=[{}]~~~{}~~~", lc.rem.len, lc.rem);
2195 if(_is_doc_token(lc.rem))
2197 _c4dbgp(
"blck: stop. indentation=0 and doc ended");
2204 const size_t fns = lc.rem.first_not_of(
' ');
2205 _c4dbgpf(
"blck: indentation ref not set. firstnonws={}", fns);
2209 if(C4_UNLIKELY(lc.full.begins_with(
'\t')))
2212 _c4dbgpf(
"blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2213 if(provisional_indentation ==
npos)
2215 if(lc.indentation < indref)
2217 _c4dbgpf(
"blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2218 if(raw_block.len == 0)
2220 _c4dbgp(
"blck: was empty, undo next line");
2225 else if(lc.indentation == m_evt_handler->m_curr->indref)
2229 _c4dbgpf(
"blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2233 _c4dbgpf(
"blck: set indentation ref from this line: ref={}", lc.indentation);
2234 indentation = lc.indentation;
2238 if(lc.indentation >= provisional_indentation)
2240 _c4dbgpf(
"blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2242 indentation = lc.indentation;
2253 _c4dbgpf(
"blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.rem.len, lc.indentation, provisional_indentation);
2254 if(provisional_indentation !=
npos)
2256 if(lc.rem.len >= provisional_indentation)
2258 _c4dbgpf(
"blck: increase provisional_ref {} -> {}", provisional_indentation, lc.rem.len);
2259 provisional_indentation = lc.rem.len;
2261 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
2262 else if(lc.indentation >= provisional_indentation && lc.indentation !=
npos)
2264 _c4dbgpf(
"blck: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation);
2265 provisional_indentation = lc.indentation;
2271 provisional_indentation = lc.indentation ? lc.indentation : has_any(
RSEQ|
RVAL);
2272 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2273 if(provisional_indentation ==
npos)
2275 provisional_indentation = lc.rem.len ? lc.rem.len : has_any(
RSEQ|
RVAL);
2276 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2278 if(provisional_indentation < indref)
2280 provisional_indentation = indref;
2281 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2287 m_evt_handler->m_curr->line_contents = lc;
2288 _c4dbgpf(
"blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2289 raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2290 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2294 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0));
2295 C4_UNUSED(num_lines);
2298 if(indentation ==
npos)
2300 _c4dbgpf(
"blck: set indentation from provisional: {}", provisional_indentation);
2301 indentation = provisional_indentation;
2307 _c4prscalar(
"scanned block", raw_block,
true);
2309 sb->scalar = raw_block;
2310 sb->indentation = indentation;
2322 #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2324 #define _c4dbgfws(...)
2327 template<
class EventHandler>
2328 template<
class FilterProcessor>
2329 bool ParseEngine<EventHandler>::_filter_ws_handle_to_first_non_space(FilterProcessor &proc)
2331 _c4dbgfws(
"found whitespace '{}'", _c4prc(proc.curr()));
2332 _RYML_ASSERT_BASIC_(this->callbacks(), proc.curr() ==
' ' || proc.curr() ==
'\t');
2334 const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(
" \t", proc.rpos) : proc.src.first_not_of(
' ', proc.rpos);
2335 if(first_pos !=
npos)
2337 const char first_char = proc.src[first_pos];
2338 _c4dbgfws(
"firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2339 if(first_char ==
'\n' || first_char ==
'\r')
2341 _c4dbgfws(
"whitespace is trailing on line",
"");
2342 proc.skip(first_pos - proc.rpos);
2347 _c4dbgfws(
"legit whitespace. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2351 _c4dbgfws(
"whitespace is trailing on line",
"");
2355 template<
class EventHandler>
2356 template<
class FilterProcessor>
2357 void ParseEngine<EventHandler>::_filter_ws_copy_trailing(FilterProcessor &proc)
2359 if(!_filter_ws_handle_to_first_non_space(proc))
2361 _c4dbgfws(
"... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2362 proc.copy(proc.src.len - proc.rpos);
2366 template<
class EventHandler>
2367 template<
class FilterProcessor>
2368 void ParseEngine<EventHandler>::_filter_ws_skip_trailing(FilterProcessor &proc)
2370 if(!_filter_ws_handle_to_first_non_space(proc))
2372 _c4dbgfws(
"... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2373 proc.skip(proc.src.len - proc.rpos);
2387 #define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2389 #define _c4dbgfps(fmt, ...)
2392 template<
class EventHandler>
2393 template<
class FilterProcessor>
2394 void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation)
2396 _RYML_ASSERT_BASIC_(this->callbacks(), proc.curr() ==
'\n');
2398 _c4dbgfps(
"found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2399 size_t ii = proc.rpos;
2400 const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2403 proc.set(
'\n', numnl_following);
2404 _c4dbgfps(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2408 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2412 _c4dbgfps(
"single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2416 _c4dbgfps(
"last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2423 template<
class EventHandler>
2424 template<
class FilterProcessor>
2425 auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation) -> decltype(proc.result())
2427 _RYML_ASSERT_BASIC_(this->callbacks(), indentation !=
npos);
2428 _c4dbgfps(
"before=[{}]~~~{}~~~", proc.src.len, proc.src);
2430 while(proc.has_more_chars())
2432 const char curr = proc.curr();
2433 _c4dbgfps(
"'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2438 _c4dbgfps(
"whitespace", curr);
2439 _filter_ws_skip_trailing(proc);
2442 _c4dbgfps(
"newline", curr);
2443 _filter_nl_plain(proc, indentation);
2446 _c4dbgfps(
"carriage return, ignore", curr);
2455 _c4dbgfps(
"after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2457 return proc.result();
2463 template<
class EventHandler>
2466 FilterProcessorSrcDst proc(scalar, dst);
2467 return _filter_plain(proc, indentation);
2470 template<
class EventHandler>
2473 FilterProcessorInplaceEndExtending proc(dst, cap);
2474 return _filter_plain(proc, indentation);
2485 #define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2487 #define _c4dbgfsq(fmt, ...)
2490 template<
class EventHandler>
2491 template<
class FilterProcessor>
2492 void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2494 _RYML_ASSERT_BASIC_(this->callbacks(), proc.curr() ==
'\n');
2496 _c4dbgfsq(
"found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2497 size_t ii = proc.rpos;
2498 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2501 proc.set(
'\n', numnl_following);
2502 _c4dbgfsq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2506 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2510 _c4dbgfsq(
"single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2515 _c4dbgfsq(
"single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2521 template<
class EventHandler>
2522 template<
class FilterProcessor>
2523 auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2525 _c4dbgfsq(
"before=[{}]~~~{}~~~", proc.src.len, proc.src);
2529 while(proc.has_more_chars())
2531 const char curr = proc.curr();
2532 _c4dbgfsq(
"'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2537 _c4dbgfsq(
"whitespace", curr);
2538 _filter_ws_copy_trailing(proc);
2541 _c4dbgfsq(
"newline", curr);
2542 _filter_nl_squoted(proc);
2545 _c4dbgfsq(
"skip cr", curr);
2549 _c4dbgfsq(
"squote", curr);
2550 if(proc.next() ==
'\'')
2552 _c4dbgfsq(
"two consecutive squotes", curr);
2567 _c4dbgfsq(
": #filteredchars={} after=~~~[{}]{}~~~", proc.src.len-proc.sofar().len, proc.sofar().len, proc.sofar());
2569 return proc.result();
2574 template<
class EventHandler>
2577 FilterProcessorSrcDst proc(scalar, dst);
2578 return _filter_squoted(proc);
2581 template<
class EventHandler>
2584 FilterProcessorInplaceEndExtending proc(dst, cap);
2585 return _filter_squoted(proc);
2596 #define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2598 #define _c4dbgfdq(...)
2601 template<
class EventHandler>
2602 template<
class FilterProcessor>
2603 void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2605 _RYML_ASSERT_BASIC_(this->callbacks(), proc.curr() ==
'\n');
2607 _c4dbgfdq(
"found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2608 size_t ii = proc.rpos;
2609 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2612 proc.set(
'\n', numnl_following);
2613 _c4dbgfdq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2617 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2621 _c4dbgfdq(
"single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2626 _c4dbgfdq(
"single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2628 if(ii < proc.src.len && proc.src.str[ii] ==
'\\')
2630 _c4dbgfdq(
"backslash at [{}]", ii);
2631 const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] :
'\0';
2632 if(next ==
' ' || next ==
'\t')
2634 _c4dbgfdq(
"extend skip to backslash",
"");
2642 template<
class EventHandler>
2643 template<
class FilterProcessor>
2644 void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2646 char next = proc.next();
2647 _c4dbgfdq(
"backslash, next='{}'", _c4prc(next));
2650 if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] ==
'\n')
2654 _c4dbgfdq(
"[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2660 size_t ii = proc.rpos + 2;
2661 for( ; ii < proc.src.len; ++ii)
2664 if(proc.src.str[ii] ==
' ' || proc.src.str[ii] ==
'\t')
2669 proc.skip(ii - proc.rpos);
2671 else if(next ==
'"' || next ==
'/' || next ==
' ' || next ==
'\t')
2674 proc.translate_esc(next);
2675 _c4dbgfdq(
"here, used '{}'", _c4prc(next));
2677 else if(next ==
'\r')
2681 else if(next ==
'n')
2683 proc.translate_esc(
'\n');
2685 else if(next ==
'r')
2687 proc.translate_esc(
'\r');
2689 else if(next ==
't')
2691 proc.translate_esc(
'\t');
2693 else if(next ==
'\\')
2695 proc.translate_esc(
'\\');
2697 else if(next ==
'x')
2699 if(C4_UNLIKELY(proc.rpos + 1u + 2u >= proc.src.len))
2700 _c4err(
"\\x requires 2 hex digits. scalar pos={}", proc.rpos);
2702 csubstr codepoint = proc.src.sub(proc.rpos + 2u, 2u);
2703 _c4dbgfdq(
"utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2704 uint32_t codepoint_val = {};
2705 if(C4_UNLIKELY(!
read_hex(codepoint, &codepoint_val)))
2706 _c4err(
"failed to read \\x codepoint. scalar pos={}", proc.rpos);
2707 const size_t numbytes =
decode_code_point((uint8_t*)readbuf,
sizeof(readbuf), codepoint_val);
2708 if(C4_UNLIKELY(numbytes == 0))
2709 _c4err(
"failed to decode code point={}", proc.rpos);
2710 _RYML_ASSERT_BASIC_(callbacks(), numbytes <= 4);
2711 proc.translate_esc_bulk(readbuf, numbytes, 3u);
2712 _c4dbgfdq(
"utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2714 else if(next ==
'u')
2716 if(C4_UNLIKELY(proc.rpos + 1u + 4u >= proc.src.len))
2717 _c4err(
"\\u requires 4 hex digits. scalar pos={}", proc.rpos);
2719 csubstr codepoint = proc.src.sub(proc.rpos + 2u, 4u);
2720 uint32_t codepoint_val = {};
2721 if(C4_UNLIKELY(!
read_hex(codepoint, &codepoint_val)))
2722 _c4err(
"failed to parse \\u codepoint. scalar pos={}", proc.rpos);
2723 const size_t numbytes =
decode_code_point((uint8_t*)readbuf,
sizeof(readbuf), codepoint_val);
2724 if(C4_UNLIKELY(numbytes == 0))
2725 _c4err(
"failed to decode code point={}", proc.rpos);
2726 _RYML_ASSERT_BASIC_(callbacks(), numbytes <= 4);
2727 proc.translate_esc_bulk(readbuf, numbytes, 5u);
2729 else if(next ==
'U')
2731 if(C4_UNLIKELY(proc.rpos + 1u + 8u >= proc.src.len))
2732 _c4err(
"\\U requires 8 hex digits. scalar pos={}", proc.rpos);
2734 csubstr codepoint = proc.src.sub(proc.rpos + 2u, 8u);
2735 uint32_t codepoint_val = {};
2736 if(C4_UNLIKELY(!
read_hex(codepoint, &codepoint_val)))
2737 _c4err(
"failed to parse \\U codepoint. scalar pos={}", proc.rpos);
2738 const size_t numbytes =
decode_code_point((uint8_t*)readbuf,
sizeof(readbuf), codepoint_val);
2739 if(C4_UNLIKELY(numbytes == 0))
2740 _c4err(
"failed to decode code point={}", proc.rpos);
2741 _RYML_ASSERT_BASIC_(callbacks(), numbytes <= 4);
2742 proc.translate_esc_bulk(readbuf, numbytes, 9u);
2745 else if(next ==
'0')
2747 proc.translate_esc(
'\0');
2749 else if(next ==
'b')
2751 proc.translate_esc(
'\b');
2753 else if(next ==
'f')
2755 proc.translate_esc(
'\f');
2757 else if(next ==
'a')
2759 proc.translate_esc(
'\a');
2761 else if(next ==
'v')
2763 proc.translate_esc(
'\v');
2765 else if(next ==
'e')
2767 proc.translate_esc(
'\x1b');
2769 else if(next ==
'_')
2772 const char payload[] = {
2773 _RYML_CHCONST(-0x3e, 0xc2),
2774 _RYML_CHCONST(-0x60, 0xa0),
2776 proc.translate_esc_bulk(payload, 2, 1);
2778 else if(next ==
'N')
2781 const char payload[] = {
2782 _RYML_CHCONST(-0x3e, 0xc2),
2783 _RYML_CHCONST(-0x7b, 0x85),
2785 proc.translate_esc_bulk(payload, 2, 1);
2787 else if(next ==
'L')
2790 const char payload[] = {
2791 _RYML_CHCONST(-0x1e, 0xe2),
2792 _RYML_CHCONST(-0x80, 0x80),
2793 _RYML_CHCONST(-0x58, 0xa8),
2795 proc.translate_esc_extending(payload, 3, 1);
2797 else if(next ==
'P')
2800 const char payload[] = {
2801 _RYML_CHCONST(-0x1e, 0xe2),
2802 _RYML_CHCONST(-0x80, 0x80),
2803 _RYML_CHCONST(-0x57, 0xa9),
2805 proc.translate_esc_extending(payload, 3, 1);
2807 else if(next ==
'\0')
2813 _c4err(
"unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2815 _c4dbgfdq(
"backslash...sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2819 template<
class EventHandler>
2820 template<
class FilterProcessor>
2821 auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2823 _c4dbgfdq(
"before=[{}]~~~{}~~~", proc.src.len, proc.src);
2826 while(proc.has_more_chars())
2828 const char curr = proc.curr();
2829 _c4dbgfdq(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2835 _c4dbgfdq(
"whitespace", curr);
2836 _filter_ws_copy_trailing(proc);
2841 _c4dbgfdq(
"newline", curr);
2842 _filter_nl_dquoted(proc);
2847 _c4dbgfdq(
"carriage return, ignore", curr);
2853 _filter_dquoted_backslash(proc);
2863 _c4dbgfdq(
"after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2864 return proc.result();
2870 template<
class EventHandler>
2873 FilterProcessorSrcDst proc(scalar, dst);
2874 return _filter_dquoted(proc);
2877 template<
class EventHandler>
2880 FilterProcessorInplaceMidExtending proc(dst, cap);
2881 return _filter_dquoted(proc);
2890 C4_NO_INLINE
inline size_t _find_last_newline_and_larger_indentation(csubstr s,
size_t indentation) noexcept
2892 if(indentation + 1 > s.len)
2894 for(
size_t i = s.len-indentation-1; i !=
size_t(-1); --i)
2896 if(s.str[i] ==
'\n')
2898 csubstr rem = s.sub(i + 1);
2899 size_t first = rem.first_not_of(
' ');
2900 first = (first !=
npos) ? first : rem.len;
2901 if(first > indentation)
2908 template<
class EventHandler>
2909 template<
class FilterProcessor>
2910 void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc,
BlockChomp_e chomp,
size_t indentation)
2913 _RYML_ASSERT_BASIC_(this->callbacks(), proc.rem().first_not_of(
" \n\r") ==
npos);
2917 #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2919 #define _c4dbgchomp(...)
2924 size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
2927 _c4dbgchomp(
"found newline and larger indentation. last={}", last);
2928 last = proc.rpos + last + size_t(1) + indentation;
2929 _RYML_ASSERT_BASIC_(this->callbacks(), last <= proc.src.len);
2931 while((proc.rpos < last) && proc.has_more_chars())
2933 const char curr = proc.curr();
2934 _c4dbgchomp(
"curr='{}'", _c4prc(curr));
2939 _c4dbgchomp(
"newline! remlen={}", proc.rem().len);
2942 csubstr at_next_line = proc.rem();
2943 if(at_next_line.begins_with(
' '))
2945 _c4dbgchomp(
"next line begins with spaces. indentation={}", indentation);
2947 size_t first_non_space = at_next_line.first_not_of(
' ');
2948 _c4dbgchomp(
"first_non_space={}", first_non_space);
2949 if(first_non_space ==
npos)
2951 _c4dbgchomp(
"{} spaces, to the end", at_next_line.len);
2952 first_non_space = at_next_line.len;
2954 if(first_non_space <= indentation)
2956 _c4dbgchomp(
"skip spaces={}<=indentation={}", first_non_space, indentation);
2957 proc.skip(first_non_space);
2961 _c4dbgchomp(
"skip indentation={}<spaces={}", indentation, first_non_space);
2962 proc.skip(indentation);
2964 _c4dbgchomp(
"copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
2965 proc.copy(first_non_space - indentation);
2986 bool had_one =
false;
2987 while(proc.has_more_chars())
2989 const char curr = proc.curr();
2990 _c4dbgchomp(
"CLIP: '{}'", _c4prc(curr));
2995 _c4dbgchomp(
"copy newline!", curr);
3003 _c4dbgchomp(
"skip!", curr);
3010 _c4dbgchomp(
"chomp=CLIP: add missing newline @{}", proc.wpos);
3017 _c4dbgchomp(
"chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
3018 while(proc.has_more_chars())
3020 const char curr = proc.curr();
3021 _c4dbgchomp(
"KEEP: '{}'", _c4prc(curr));
3025 _c4dbgchomp(
"copy newline!", curr);
3030 _c4dbgchomp(
"skip!", curr);
3039 _c4dbgchomp(
"chomp=STRIP: strip {} characters", proc.rem().len);
3051 #define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3053 #define _c4dbgfb(...)
3056 template<
class EventHandler>
3057 template<
class FilterProcessor>
3058 void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc,
size_t indentation)
3060 csubstr rem = proc.rem();
3063 size_t first = rem.first_not_of(
' ');
3066 _c4dbgfb(
"{} spaces follow before next nonws character", first);
3067 if(first < indentation)
3069 _c4dbgfb(
"skip {}<{} spaces from indentation", first, indentation);
3074 _c4dbgfb(
"skip {} spaces from indentation", indentation);
3075 proc.skip(indentation);
3078 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
3081 _c4dbgfb(
"all spaces to the end: {} spaces", first);
3085 if(first < indentation)
3087 _c4dbgfb(
"skip everything", first);
3088 proc.skip(proc.src.len - proc.rpos);
3092 _c4dbgfb(
"skip {} spaces from indentation", indentation);
3093 proc.skip(indentation);
3101 template<
class EventHandler>
3102 template<
class FilterProcessor>
3103 size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc,
BlockChomp_e chomp)
3105 csubstr contents = proc.src.trimr(
" \n\r");
3106 _c4dbgfb(
"ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
3109 _c4dbgfb(
"ws: all whitespace: len={}", proc.src.len);
3112 _c4dbgfb(
"ws: chomp=KEEP all {} newlines", proc.src.count(
'\n'));
3113 while(proc.has_more_chars())
3115 const char curr = proc.curr();
3127 return contents.len;
3130 template<
class EventHandler>
3131 template<
class FilterProcessor>
3132 size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc,
size_t contents_len)
3134 _c4dbgfb(
"contents_len={}", contents_len);
3136 _RYML_ASSERT_BASIC_(this->callbacks(), contents_len > 0u);
3140 size_t firstnewl = proc.src.first_of(
'\n', contents_len);
3141 if(firstnewl !=
npos)
3143 contents_len = firstnewl;
3144 _c4dbgfb(
"contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3148 contents_len = proc.src.len;
3149 _c4dbgfb(
"contents_len={} <--- src.len={}", contents_len, proc.src.len);
3152 return contents_len;
3164 #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3166 #define _c4dbgfbl(...)
3169 template<
class EventHandler>
3170 template<
class FilterProcessor>
3171 auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
BlockChomp_e chomp) -> decltype(proc.result())
3173 _c4dbgfbl(
"indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3175 size_t contents_len = _handle_all_whitespace(proc, chomp);
3177 return proc.result();
3179 contents_len = _extend_to_chomp(proc, contents_len);
3181 _c4dbgfbl(
"to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3183 _filter_block_indentation(proc, indentation);
3186 while(proc.has_more_chars(contents_len))
3188 const char curr = proc.curr();
3189 _c4dbgfbl(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3194 _c4dbgfbl(
"found newline. skip indentation on the next line", curr);
3196 _filter_block_indentation(proc, indentation);
3208 _c4dbgfbl(
"before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3210 _filter_chomp(proc, chomp, indentation);
3212 _c4dbgfbl(
"final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3214 return proc.result();
3219 template<
class EventHandler>
3222 FilterProcessorSrcDst proc(scalar, dst);
3223 return _filter_block_literal(proc, indentation, chomp);
3226 template<
class EventHandler>
3229 FilterProcessorInplaceEndExtending proc(scalar, cap);
3230 return _filter_block_literal(proc, indentation, chomp);
3240 #define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3242 #define _c4dbgfbf(...)
3246 template<
class EventHandler>
3247 template<
class FilterProcessor>
3248 void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len)
3250 _filter_block_indentation(proc, indentation);
3251 while(proc.has_more_chars(len))
3253 const char curr = proc.curr();
3254 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3258 _c4dbgfbf(
"newline.", curr);
3260 _filter_block_indentation(proc, indentation);
3268 size_t first = proc.rem().first_not_of(
" \t");
3269 _c4dbgfbf(
"space. first={}", first);
3271 first = proc.rem().len;
3272 _c4dbgfbf(
"... indentation increased to {}", first);
3273 _filter_block_folded_indented_block(proc, indentation, len, first);
3277 _c4dbgfbf(
"newl leading: not space, not newline. stop.", 0);
3283 template<
class EventHandler>
3284 template<
class FilterProcessor>
3285 size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc,
size_t num_newl,
size_t wpos_at_first_newl)
3290 _c4dbgfbf(
"... this is the first newline. turn into space. wpos={}", proc.wpos);
3291 wpos_at_first_newl = proc.wpos;
3296 _c4dbgfbf(
"... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3297 _RYML_ASSERT_BASIC_(this->callbacks(), wpos_at_first_newl !=
npos);
3298 _RYML_ASSERT_BASIC_(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
' ');
3299 _RYML_ASSERT_BASIC_(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos);
3301 proc.set_at(wpos_at_first_newl,
'\n');
3302 _RYML_ASSERT_BASIC_(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
'\n');
3305 _c4dbgfbf(
"... subsequent newline (num_newl={}). copy", num_newl);
3309 return wpos_at_first_newl;
3312 template<
class EventHandler>
3313 template<
class FilterProcessor>
3314 void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len)
3316 _RYML_ASSERT_BASIC_(this->callbacks(), proc.curr() ==
'\n');
3317 size_t num_newl = 0;
3318 size_t wpos_at_first_newl =
npos;
3319 while(proc.has_more_chars(len))
3321 const char curr = proc.curr();
3322 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3327 _c4dbgfbf(
"newline. sofar={}", num_newl);
3363 wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3364 _filter_block_indentation(proc, indentation);
3370 size_t first = proc.rem().first_not_of(
" \t");
3371 _c4dbgfbf(
"space. first={}", first);
3373 first = proc.rem().len;
3374 _c4dbgfbf(
"... indentation increased to {}", first);
3377 _c4dbgfbf(
"... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3378 proc.set_at(wpos_at_first_newl,
'\n');
3382 _c4dbgfbf(
"... add missing newline", wpos_at_first_newl);
3385 _filter_block_folded_indented_block(proc, indentation, len, first);
3387 wpos_at_first_newl =
npos;
3394 _c4dbgfbf(
"not space, not newline. stop.", 0);
3401 template<
class EventHandler>
3402 template<
class FilterProcessor>
3403 void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len,
size_t curr_indentation) noexcept
3405 _RYML_ASSERT_BASIC_(this->callbacks(), (proc.rem().first_not_of(
" \t") == curr_indentation) || (proc.rem().first_not_of(
" \t") ==
npos));
3406 if(curr_indentation)
3407 proc.copy(curr_indentation);
3408 while(proc.has_more_chars(len))
3410 const char curr = proc.curr();
3411 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3417 _filter_block_indentation(proc, indentation);
3418 csubstr rem = proc.rem();
3419 const size_t first = rem.first_not_of(
' ');
3420 _c4dbgfbf(
"newline. firstns={}", first);
3423 const char c = rem[first];
3424 _c4dbgfbf(
"firstns={}='{}'", first, _c4prc(c));
3425 if(c ==
'\n' || c ==
'\r')
3431 _c4dbgfbf(
"done with indented block", first);
3435 else if(first !=
npos)
3438 _c4dbgfbf(
"copy all {} spaces", first);
3456 template<
class EventHandler>
3457 template<
class FilterProcessor>
3458 auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
BlockChomp_e chomp) -> decltype(proc.result())
3460 _c4dbgfbf(
"indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3462 size_t contents_len = _handle_all_whitespace(proc, chomp);
3464 return proc.result();
3466 contents_len = _extend_to_chomp(proc, contents_len);
3468 _c4dbgfbf(
"to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3470 _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3473 while(proc.has_more_chars(contents_len))
3475 const char curr = proc.curr();
3476 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3481 _c4dbgfbf(
"found newline", curr);
3482 _filter_block_folded_newlines(proc, indentation, contents_len);
3494 _c4dbgfbf(
"before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3496 _filter_chomp(proc, chomp, indentation);
3498 _c4dbgfbf(
"final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3500 return proc.result();
3505 template<
class EventHandler>
3508 FilterProcessorSrcDst proc(scalar, dst);
3509 return _filter_block_folded(proc, indentation, chomp);
3512 template<
class EventHandler>
3515 FilterProcessorInplaceEndExtending proc(scalar, cap);
3516 return _filter_block_folded(proc, indentation, chomp);
3524 template<
class EventHandler>
3525 csubstr ParseEngine<EventHandler>::_filter_scalar_plain(substr s,
size_t indentation)
3527 _c4dbgpf(
"filtering plain scalar: s=[{}]~~~{}~~~", s.len, s);
3528 FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3529 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, r.valid());
3530 _c4dbgpf(
"filtering plain scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3536 template<
class EventHandler>
3537 csubstr ParseEngine<EventHandler>::_filter_scalar_squot(substr s)
3539 _c4dbgpf(
"filtering squo scalar: s=[{}]~~~{}~~~", s.len, s);
3540 FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3541 _RYML_ASSERT_BASIC_(this->callbacks(), r.valid());
3542 _c4dbgpf(
"filtering squo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3549 template<
class EventHandler>
3550 csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
3552 _c4dbgpf(
"filtering dquo scalar: s=[{}]~~~{}~~~", s.len, s);
3553 FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3554 if(C4_LIKELY(r.valid()))
3556 _c4dbgpf(
"filtering dquo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3561 const size_t len = r.required_len();
3562 _c4dbgpf(
"filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3563 substr dst = m_evt_handler->alloc_arena(len, &s);
3564 _c4dbgpf(
"filtering dquo scalar: dst.len={}", dst.len);
3567 _RYML_ASSERT_BASIC_(this->callbacks(), dst.len == len);
3568 FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3569 _c4dbgpf(
"filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3570 _RYML_ASSERT_BASIC_(this->callbacks(), rsd.required_len() <= len);
3571 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3572 _c4dbgpf(
"filtering dquo scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3582 template<
class EventHandler>
3583 csubstr ParseEngine<EventHandler>::_move_scalar_left_and_add_newline(substr s)
3587 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.str > m_buf.str);
3588 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, s.str-1 >= m_buf.str);
3590 memmove(s.str - 1, s.str, s.len);
3592 s.str[s.len] =
'\n';
3598 substr dst = m_evt_handler->alloc_arena(s.len + 1);
3600 memcpy(dst.str, s.str, s.len);
3606 template<
class EventHandler>
3607 csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s,
size_t indentation,
BlockChomp_e chomp)
3609 _c4dbgpf(
"filtering block literal scalar: s=[{}]~~~{}~~~", s.len, s);
3610 FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3612 if(C4_LIKELY(r.valid()))
3618 _c4dbgpf(
"filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3619 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1);
3622 result = _move_scalar_left_and_add_newline(s);
3624 _c4dbgpf(
"filtering block literal scalar: success! s=[{}]~~~{}~~~", result.len, result);
3630 template<
class EventHandler>
3631 csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s,
size_t indentation,
BlockChomp_e chomp)
3633 _c4dbgpf(
"filtering block folded scalar: s=[{}]~~~{}~~~", s.len, s);
3634 FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3636 if(C4_LIKELY(r.valid()))
3642 _c4dbgpf(
"filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3643 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1);
3646 result = _move_scalar_left_and_add_newline(s);
3648 _c4dbgpf(
"filtering block folded scalar: success! s=[{}]~~~{}~~~", result.len, result);
3655 template<
class EventHandler>
3656 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar
const& C4_RESTRICT sc,
size_t indentation)
3660 if(m_options.scalar_filtering())
3662 return _filter_scalar_plain(sc.scalar, indentation);
3666 _c4dbgp(
"plain scalar left unfiltered");
3667 m_evt_handler->mark_key_scalar_unfiltered();
3672 _c4dbgp(
"plain scalar doesn't need filtering");
3677 template<
class EventHandler>
3678 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar
const& C4_RESTRICT sc,
size_t indentation)
3682 if(m_options.scalar_filtering())
3684 return _filter_scalar_plain(sc.scalar, indentation);
3688 _c4dbgp(
"plain scalar left unfiltered");
3689 m_evt_handler->mark_val_scalar_unfiltered();
3694 _c4dbgp(
"plain scalar doesn't need filtering");
3702 template<
class EventHandler>
3703 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar
const& C4_RESTRICT sc)
3707 if(m_options.scalar_filtering())
3709 return _filter_scalar_squot(sc.scalar);
3713 _c4dbgp(
"squo key scalar left unfiltered");
3714 m_evt_handler->mark_key_scalar_unfiltered();
3719 _c4dbgp(
"squo key scalar doesn't need filtering");
3724 template<
class EventHandler>
3725 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar
const& C4_RESTRICT sc)
3729 if(m_options.scalar_filtering())
3731 return _filter_scalar_squot(sc.scalar);
3735 _c4dbgp(
"squo val scalar left unfiltered");
3736 m_evt_handler->mark_val_scalar_unfiltered();
3741 _c4dbgp(
"squo val scalar doesn't need filtering");
3749 template<
class EventHandler>
3750 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar
const& C4_RESTRICT sc)
3754 if(m_options.scalar_filtering())
3756 return _filter_scalar_dquot(sc.scalar);
3760 _c4dbgp(
"dquo scalar left unfiltered");
3761 m_evt_handler->mark_key_scalar_unfiltered();
3766 _c4dbgp(
"dquo scalar doesn't need filtering");
3771 template<
class EventHandler>
3772 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar
const& C4_RESTRICT sc)
3776 if(m_options.scalar_filtering())
3778 return _filter_scalar_dquot(sc.scalar);
3782 _c4dbgp(
"dquo scalar left unfiltered");
3783 m_evt_handler->mark_val_scalar_unfiltered();
3788 _c4dbgp(
"dquo scalar doesn't need filtering");
3796 template<
class EventHandler>
3797 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_literal(ScannedBlock
const& C4_RESTRICT sb)
3799 if(m_options.scalar_filtering())
3801 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3805 _c4dbgp(
"literal scalar left unfiltered");
3806 m_evt_handler->mark_key_scalar_unfiltered();
3811 template<
class EventHandler>
3812 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_literal(ScannedBlock
const& C4_RESTRICT sb)
3814 if(m_options.scalar_filtering())
3816 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3820 _c4dbgp(
"literal scalar left unfiltered");
3821 m_evt_handler->mark_val_scalar_unfiltered();
3829 template<
class EventHandler>
3830 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_folded(ScannedBlock
const& C4_RESTRICT sb)
3832 if(m_options.scalar_filtering())
3834 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3838 _c4dbgp(
"folded scalar left unfiltered");
3839 m_evt_handler->mark_key_scalar_unfiltered();
3844 template<
class EventHandler>
3845 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_folded(ScannedBlock
const& C4_RESTRICT sb)
3847 if(m_options.scalar_filtering())
3849 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3853 _c4dbgp(
"folded scalar left unfiltered");
3854 m_evt_handler->mark_val_scalar_unfiltered();
3866 template<
class EventHandler>
3867 void ParseEngine<EventHandler>::add_flags(
ParserFlag_t on, ParserState * s)
3869 char buf1_[64], buf2_[64], buf3_[64];
3870 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3871 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3872 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
3873 _c4dbgpf(
"state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
3877 template<
class EventHandler>
3880 char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
3881 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3882 csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
3883 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
3884 csubstr buf4 = detail::_parser_flags_to_str(buf4_, ((s->flags|on)&(~off)));
3885 _c4dbgpf(
"state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
3890 template<
class EventHandler>
3891 void ParseEngine<EventHandler>::rem_flags(
ParserFlag_t off, ParserState * s)
3893 char buf1_[64], buf2_[64], buf3_[64];
3894 csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
3895 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3896 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
3897 _c4dbgpf(
"state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
3901 inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf,
ParserFlag_t flags)
3904 bool gotone =
false;
3906 #define _prflag(fl) \
3907 if((flags & fl) == (fl)) \
3911 if(pos + 1 < buf.len) \
3915 csubstr fltxt = #fl; \
3916 if(pos + fltxt.len <= buf.len) \
3917 memcpy(buf.str + pos, fltxt.str, fltxt.len); \
3947 _RYML_CHECK_BASIC(pos <= buf.len);
3949 return buf.first(pos);
3959 template<
class EventHandler>
3962 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, loc.offset < m_buf.len);
3963 return m_buf.sub(loc.offset);
3966 template<
class EventHandler>
3969 if(C4_UNLIKELY(val ==
nullptr))
3970 return {m_file, 0, 0, 0};
3971 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
3974 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str);
3975 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len);
3976 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
3977 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
3978 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets !=
nullptr);
3979 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
3981 csubstr src = m_buf;
3982 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, val !=
nullptr || src.str ==
nullptr);
3983 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str ==
nullptr && val ==
nullptr));
3985 using lineptr_type =
size_t const* C4_RESTRICT;
3986 lineptr_type lineptr =
nullptr;
3987 size_t offset = (size_t)(val - src.begin());
3991 for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
4006 size_t count = m_newline_offsets_size;
4009 lineptr = m_newline_offsets;
4013 it = lineptr + step;
4025 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
4026 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
4027 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
4030 loc.offset = offset;
4031 loc.line = (size_t)(lineptr - m_newline_offsets);
4032 if(lineptr > m_newline_offsets)
4033 loc.col = (offset - *(lineptr-1) - 1u);
4039 template<
class EventHandler>
4040 void ParseEngine<EventHandler>::_prepare_locations()
4042 m_newline_offsets_buf = m_buf;
4043 size_t numnewlines = 1u + m_buf.count(
'\n');
4044 _resize_locations(numnewlines);
4045 m_newline_offsets_size = 0;
4046 for(
size_t i = 0; i < m_buf.len; i++)
4047 if(m_buf[i] ==
'\n')
4048 m_newline_offsets[m_newline_offsets_size++] = i;
4049 m_newline_offsets[m_newline_offsets_size++] = m_buf.len;
4050 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4053 template<
class EventHandler>
4054 void ParseEngine<EventHandler>::_resize_locations(
size_t numnewlines)
4056 if(numnewlines > m_newline_offsets_capacity)
4058 if(m_newline_offsets)
4059 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
4060 m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks,
size_t, numnewlines, m_newline_offsets);
4061 m_newline_offsets_capacity = numnewlines;
4065 template<
class EventHandler>
4066 bool ParseEngine<EventHandler>::_locations_dirty()
const
4068 return !m_newline_offsets_size;
4076 template<
class EventHandler>
4077 void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4080 if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4082 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
4084 _c4dbgpf(
"starts with whitespace: '{}'", _c4prc(m_evt_handler->m_curr->line_contents.rem.str[0]));
4088 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
4090 _c4dbgpf(
"it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4091 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4100 template<
class EventHandler>
4101 void ParseEngine<EventHandler>::_handle_colon()
4103 size_t curr = m_evt_handler->m_curr->pos.line;
4104 if(m_prev_colon !=
npos)
4106 if(curr == m_prev_colon)
4107 _c4err(
"two colons on same line");
4109 m_prev_colon = curr;
4112 template<
class EventHandler>
4113 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str,
size_t indentation,
size_t line)
4115 _c4dbgpf(
"store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, str, indentation, line);
4116 if(C4_UNLIKELY(dst->num_entries >= C4_COUNTOF(dst->annotations)))
4117 _c4err(
"too many annotations");
4118 dst->annotations[dst->num_entries].str = str;
4119 dst->annotations[dst->num_entries].indentation = indentation;
4120 dst->annotations[dst->num_entries].line = line;
4124 template<
class EventHandler>
4125 void ParseEngine<EventHandler>::_clear_annotations(Annotation *C4_RESTRICT dst)
4127 dst->num_entries = 0;
4130 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
4131 template<
class EventHandler>
4132 bool ParseEngine<EventHandler>::_handle_indentation_from_annotations()
4134 if(m_pending_anchors.num_entries == 1u || m_pending_tags.num_entries == 1u)
4136 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries < 2u && m_pending_tags.num_entries < 2u);
4137 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.annotations[0].line < m_evt_handler->m_curr->pos.line);
4138 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.annotations[1].line < m_evt_handler->m_curr->pos.line);
4139 size_t to_skip = m_evt_handler->m_curr->indref;
4140 if(m_pending_anchors.num_entries)
4141 to_skip = m_pending_anchors.annotations[0].indentation > to_skip ? m_pending_anchors.annotations[0].indentation : to_skip;
4142 if(m_pending_tags.num_entries)
4143 to_skip = m_pending_tags.annotations[0].indentation > to_skip ? m_pending_tags.annotations[0].indentation : to_skip;
4144 _c4dbgpf(
"annotations pending, skip indentation up to {}!", to_skip);
4145 _maybe_skipchars_up_to(
' ', to_skip);
4152 template<
class EventHandler>
4153 bool ParseEngine<EventHandler>::_annotations_require_key_container()
const
4155 return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4158 template<
class EventHandler>
4159 void ParseEngine<EventHandler>::_check_tag(csubstr tag)
4161 if(!tag.begins_with(
"!<"))
4163 if(C4_UNLIKELY(tag.first_of(
"[]{},") !=
npos))
4164 _c4err(
"tags must not contain any of '[]{},'");
4168 if(C4_UNLIKELY(!tag.ends_with(
'>')))
4173 template<
class EventHandler>
4174 void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4176 _c4dbgpf(
"annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4177 if(m_pending_tags.num_entries)
4179 _c4dbgpf(
"annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4180 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4182 _check_tag(m_pending_tags.annotations[0].str);
4183 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4184 _clear_annotations(&m_pending_tags);
4191 if(m_pending_anchors.num_entries)
4193 _c4dbgpf(
"annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4194 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4196 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4197 _clear_annotations(&m_pending_anchors);
4201 _c4err(
"too many anchors");
4206 template<
class EventHandler>
4207 void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4209 _c4dbgpf(
"annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4210 if(m_pending_tags.num_entries)
4212 _c4dbgpf(
"annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4213 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4215 _check_tag(m_pending_tags.annotations[0].str);
4216 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4217 _clear_annotations(&m_pending_tags);
4224 if(m_pending_anchors.num_entries)
4226 _c4dbgpf(
"annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4227 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4229 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4230 _clear_annotations(&m_pending_anchors);
4234 _c4err(
"too many anchors");
4239 template<
class EventHandler>
4240 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(
size_t current_line)
4242 _c4dbgpf(
"annotations_before_start_mapblck, current_line={}", current_line);
4243 if(m_pending_tags.num_entries == 2)
4245 _c4dbgp(
"2 tags, setting entry 0");
4246 _check_tag(m_pending_tags.annotations[0].str);
4247 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4249 else if(m_pending_tags.num_entries == 1)
4251 _c4dbgpf(
"1 tag. line={}, curr={}", m_pending_tags.annotations[0].line);
4252 if(m_pending_tags.annotations[0].line < current_line)
4254 _c4dbgp(
"...tag is for the map. setting it.");
4255 _check_tag(m_pending_tags.annotations[0].str);
4256 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4257 _clear_annotations(&m_pending_tags);
4261 if(m_pending_anchors.num_entries == 2)
4263 _c4dbgp(
"2 anchors, setting entry 0");
4264 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4266 else if(m_pending_anchors.num_entries == 1)
4268 _c4dbgpf(
"1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line);
4269 if(m_pending_anchors.annotations[0].line < current_line)
4271 _c4dbgp(
"...anchor is for the map. setting it.");
4272 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4273 _clear_annotations(&m_pending_anchors);
4278 template<
class EventHandler>
4279 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4281 _c4dbgp(
"annotations_before_start_mapblck_as_key");
4282 if(m_pending_tags.num_entries == 2)
4284 _check_tag(m_pending_tags.annotations[0].str);
4285 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4287 if(m_pending_anchors.num_entries == 2)
4289 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4293 template<
class EventHandler>
4294 void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(
size_t key_indentation,
size_t key_line)
4296 _c4dbgp(
"annotations_after_start_mapblck");
4297 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2);
4298 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2);
4299 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4301 key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4302 switch(m_pending_tags.num_entries)
4305 _check_tag(m_pending_tags.annotations[0].str);
4306 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4307 _clear_annotations(&m_pending_tags);
4310 _check_tag(m_pending_tags.annotations[1].str);
4311 m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4312 _clear_annotations(&m_pending_tags);
4315 switch(m_pending_anchors.num_entries)
4318 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4319 _clear_annotations(&m_pending_anchors);
4322 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4323 _clear_annotations(&m_pending_anchors);
4327 _set_indentation(key_indentation);
4330 template<
class EventHandler>
4331 size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(
size_t val_indentation,
size_t val_line)
4333 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries || m_pending_anchors.num_entries);
4335 auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4336 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4338 auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4339 if(ann.line > curr->line)
4341 else if(ann.indentation < curr->indentation)
4344 for(
size_t j = 0; j < m_pending_tags.num_entries; ++j)
4346 auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4347 if(ann.line > curr->line)
4349 else if(ann.indentation < curr->indentation)
4352 return curr->line < val_line ? val_indentation : curr->indentation;
4355 template<
class EventHandler>
4356 void ParseEngine<EventHandler>::_handle_directive(csubstr rem)
4358 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.is_sub(m_evt_handler->m_curr->line_contents.rem));
4359 const size_t pos = rem.find(
'#');
4360 _c4dbgpf(
"handle_directive: pos={} rem={}", pos, rem);
4363 m_evt_handler->add_directive(rem);
4364 _line_progressed(rem.len);
4368 csubstr to_comment = rem.first(pos);
4369 csubstr trimmed = to_comment.trimr(
" \t");
4370 m_evt_handler->add_directive(trimmed);
4371 _line_progressed(pos);
4376 template<
class EventHandler>
4377 bool ParseEngine<EventHandler>::_handle_bom()
4379 const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4382 const csubstr rest = rem.sub(1);
4384 #define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f')
4385 if(rem.begins_with(csubstr{
"\x00\x00\xfe\xff", 4}) || (rem.begins_with(csubstr{
"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
4387 _c4dbgp(
"byte order mark: UTF32BE");
4389 _line_progressed(4);
4393 else if(rem.begins_with(csubstr{
"\xff\xfe\x00\x00", 4}) || (rest.begins_with(csubstr{
"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
4395 _c4dbgp(
"byte order mark: UTF32LE");
4397 _line_progressed(4);
4401 else if(rem.begins_with(
"\xfe\xff") || (rem.begins_with(
'\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
4403 _c4dbgp(
"byte order mark: UTF16BE");
4405 _line_progressed(2);
4409 else if(rem.begins_with(
"\xff\xfe") || (rest.begins_with(
'\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
4411 _c4dbgp(
"byte order mark: UTF16LE");
4413 _line_progressed(2);
4417 else if(rem.begins_with(
"\xef\xbb\xbf"))
4419 _c4dbgp(
"byte order mark: UTF8");
4421 _line_progressed(3);
4430 template<
class EventHandler>
4431 void ParseEngine<EventHandler>::_handle_bom(
Encoding_e enc)
4433 if(m_encoding ==
NOBOM)
4435 if(enc ==
UTF8 || (m_evt_handler->m_curr->line_contents.rem.str == m_buf.str))
4438 _c4err(
"non-UTF8 byte order mark can appear only at the beginning of the file");
4440 else if(enc != m_encoding)
4442 _c4err(
"byte order mark can only be set once");
4449 template<
class EventHandler>
4450 void ParseEngine<EventHandler>::_handle_seq_json()
4453 _c4dbgpf(
"handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4455 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4456 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ));
4457 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW));
4458 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT));
4459 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT));
4461 _handle_flow_skip_whitespace();
4462 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4468 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4469 const char first = rem.str[0];
4470 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
4475 _c4dbgp(
"seqjson[RVAL]: scanning double-quoted scalar");
4476 ScannedScalar sc = _scan_scalar_dquot();
4477 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4478 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4484 _c4dbgp(
"seqjson[RVAL]: start child seqjson");
4486 m_evt_handler->begin_seq_val_flow();
4488 _line_progressed(1);
4493 _c4dbgp(
"seqjson[RVAL]: start child mapjson");
4495 m_evt_handler->begin_map_val_flow();
4497 _line_progressed(1);
4498 goto seqjson_finish;
4502 _c4dbgp(
"seqjson[RVAL]: end!");
4505 _line_progressed(1);
4507 goto seqjson_finish;
4513 if(_scan_scalar_seq_json(&sc))
4515 _c4dbgp(
"seqjson[RVAL]: it's a plain scalar.");
4516 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4517 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4529 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
4530 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4531 const char first = rem.str[0];
4532 _c4dbgpf(
"mapjson[RNXT]: '{}'", first);
4537 _c4dbgp(
"seqjson[RNXT]: expect next val");
4539 m_evt_handler->add_sibling();
4540 _line_progressed(1);
4545 _c4dbgp(
"seqjson[RNXT]: end!");
4547 _line_progressed(1);
4548 goto seqjson_finish;
4556 _c4dbgt(
"seqjson: go again", 0);
4557 if(_finished_line())
4559 if(C4_LIKELY(!_finished_file()))
4567 _c4err(
"missing terminating ]");
4573 _c4dbgp(
"seqjson: finish");
4579 template<
class EventHandler>
4580 void ParseEngine<EventHandler>::_handle_map_json()
4583 _c4dbgpf(
"handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4585 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
4586 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW));
4587 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4588 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT));
4589 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT)));
4591 _handle_flow_skip_whitespace();
4592 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4598 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4599 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4600 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4601 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4602 const char first = rem.str[0];
4603 _c4dbgpf(
"mapjson[RKEY]: '{}'", first);
4608 _c4dbgp(
"mapjson[RKEY]: scanning double-quoted scalar");
4609 ScannedScalar sc = _scan_scalar_dquot();
4610 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4611 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4617 _c4dbgp(
"mapjson[RKEY]: end!");
4619 _line_progressed(1);
4620 goto mapjson_finish;
4626 else if(has_any(
RVAL))
4628 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4629 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4630 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4631 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4632 const char first = rem.str[0];
4633 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
4638 _c4dbgp(
"mapjson[RVAL]: scanning double-quoted scalar");
4639 ScannedScalar sc = _scan_scalar_dquot();
4640 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4641 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4647 _c4dbgp(
"mapjson[RVAL]: start val seqjson");
4649 m_evt_handler->begin_seq_val_flow();
4650 _set_indentation(m_evt_handler->m_parent->indref);
4652 _line_progressed(1);
4653 goto mapjson_finish;
4657 _c4dbgp(
"mapjson[RVAL]: start val mapjson");
4659 m_evt_handler->begin_map_val_flow();
4660 _set_indentation(m_evt_handler->m_parent->indref);
4662 _line_progressed(1);
4669 if(_scan_scalar_map_json(&sc))
4671 _c4dbgp(
"mapjson[RVAL]: plain scalar.");
4672 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4673 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4684 else if(has_any(
RKCL))
4686 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4687 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4688 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4689 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4690 const char first = rem.str[0];
4691 _c4dbgpf(
"mapjson[RKCL]: '{}'", first);
4694 _c4dbgp(
"mapjson[RKCL]: found the colon");
4696 _line_progressed(1);
4703 else if(has_any(
RNXT))
4705 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4706 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4707 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4708 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4709 _c4dbgpf(
"mapjson[RNXT]: '{}'", rem.str[0]);
4710 if(rem.begins_with(
','))
4712 _c4dbgp(
"mapjson[RNXT]: expect next keyval");
4713 m_evt_handler->add_sibling();
4715 _line_progressed(1);
4717 else if(rem.begins_with(
'}'))
4719 _c4dbgp(
"mapjson[RNXT]: end!");
4721 _line_progressed(1);
4722 goto mapjson_finish;
4731 _c4dbgt(
"mapjson: go again", 0);
4732 if(_finished_line())
4734 if(C4_LIKELY(!_finished_file()))
4742 _c4err(
"missing terminating }");
4748 _c4dbgp(
"mapjson: finish");
4754 template<
class EventHandler>
4755 void ParseEngine<EventHandler>::_handle_seq_imap()
4758 _c4dbgpf(
"handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4760 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQIMAP));
4761 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4762 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT|
QMRK|
RKCL));
4763 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, 1 == has_all(
RVAL) + has_all(
RNXT) + has_all(
QMRK) + has_all(
RKCL));
4764 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3);
4766 _handle_flow_skip_whitespace();
4767 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4773 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL));
4774 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4775 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4776 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4777 const char first = rem.str[0];
4778 _c4dbgpf(
"seqimap[RVAL]: '{}'", _c4prc(first));
4782 _c4dbgp(
"seqimap[RVAL]: scanning single-quoted scalar");
4783 sc = _scan_scalar_squot();
4784 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
4785 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
4787 goto seqimap_finish;
4789 else if(first ==
'"')
4791 _c4dbgp(
"seqimap[RVAL]: scanning double-quoted scalar");
4792 sc = _scan_scalar_dquot();
4793 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4794 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4796 goto seqimap_finish;
4799 else if(_scan_scalar_plain_map_flow(&sc))
4801 _c4dbgp(
"seqimap[RVAL]: it's a scalar.");
4802 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4803 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4805 goto seqimap_finish;
4807 else if(first ==
'[')
4809 _c4dbgp(
"seqimap[RVAL]: start child seqflow");
4811 m_evt_handler->begin_seq_val_flow();
4813 _set_indentation(m_evt_handler->m_parent->indref);
4814 _line_progressed(1);
4815 goto seqimap_finish;
4817 else if(first ==
'{')
4819 _c4dbgp(
"seqimap[RVAL]: start child mapflow");
4821 m_evt_handler->begin_map_val_flow();
4823 _set_indentation(m_evt_handler->m_parent->indref);
4824 _line_progressed(1);
4825 goto seqimap_finish;
4827 else if(first ==
',' || first ==
']')
4829 _c4dbgp(
"seqimap[RVAL]: finish without val.");
4830 m_evt_handler->set_val_scalar_plain_empty();
4832 goto seqimap_finish;
4834 else if(first ==
'&')
4836 csubstr anchor = _scan_anchor();
4837 _c4dbgp(
"seqimap[RVAL]: anchor!");
4838 m_evt_handler->set_val_anchor(anchor);
4840 else if(first ==
'*')
4842 csubstr ref = _scan_ref_seq();
4843 _c4dbgp(
"seqimap[RVAL]: ref!");
4844 m_evt_handler->set_val_ref(ref);
4852 else if(has_any(
RNXT))
4854 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
4855 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4856 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4857 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4858 const char first = rem.str[0];
4859 _c4dbgpf(
"seqimap[RNXT]: '{}'", _c4prc(first));
4860 if(first ==
',' || first ==
']')
4864 _c4dbgp(
"seqimap: done");
4866 goto seqimap_finish;
4873 else if(has_any(
QMRK))
4875 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
QMRK));
4876 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4877 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4878 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4879 const char first = rem.str[0];
4880 _c4dbgpf(
"seqimap[QMRK]: '{}'", _c4prc(first));
4884 _c4dbgp(
"seqimap[QMRK]: scanning single-quoted scalar");
4885 sc = _scan_scalar_squot();
4886 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
4887 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
4891 else if(first ==
'"')
4893 _c4dbgp(
"seqimap[QMRK]: scanning double-quoted scalar");
4894 sc = _scan_scalar_dquot();
4895 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4896 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4901 else if(_scan_scalar_plain_map_flow(&sc))
4903 _c4dbgp(
"seqimap[QMRK]: it's a scalar.");
4904 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
4905 m_evt_handler->set_key_scalar_plain(maybe_filtered);
4909 else if(first ==
'[')
4911 _c4dbgp(
"seqimap[QMRK]: start child seqflow");
4913 m_evt_handler->begin_seq_key_flow();
4915 _set_indentation(m_evt_handler->m_parent->indref);
4916 _line_progressed(1);
4917 goto seqimap_finish;
4919 else if(first ==
'{')
4921 _c4dbgp(
"seqimap[QMRK]: start child mapflow");
4923 m_evt_handler->begin_map_key_flow();
4925 _set_indentation(m_evt_handler->m_parent->indref);
4926 _line_progressed(1);
4927 goto seqimap_finish;
4929 else if(first ==
',' || first ==
']')
4931 _c4dbgp(
"seqimap[QMRK]: finish without key.");
4932 m_evt_handler->set_key_scalar_plain_empty();
4933 m_evt_handler->set_val_scalar_plain_empty();
4935 goto seqimap_finish;
4937 else if(first ==
'&')
4939 csubstr anchor = _scan_anchor();
4940 _c4dbgp(
"seqimap[QMRK]: anchor!");
4941 m_evt_handler->set_key_anchor(anchor);
4943 else if(first ==
'*')
4945 csubstr ref = _scan_ref_seq();
4946 _c4dbgp(
"seqimap[QMRK]: ref!");
4947 m_evt_handler->set_key_ref(ref);
4955 else if(has_any(
RKCL))
4957 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4958 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4959 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4960 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RKCL));
4961 const char first = rem.str[0];
4962 _c4dbgpf(
"seqimap[RKCL]: '{}'", _c4prc(first));
4965 _c4dbgp(
"seqimap[RKCL]: found ':'");
4967 _line_progressed(1);
4970 else if(first ==
',' || first ==
']')
4972 _c4dbgp(
"seqimap[RKCL]: found ','. finish without val");
4973 m_evt_handler->set_val_scalar_plain_empty();
4975 goto seqimap_finish;
4984 _c4dbgt(
"seqimap: go again", 0);
4985 if(_finished_line())
4987 if(C4_LIKELY(!_finished_file()))
5001 _c4dbgp(
"seqimap: finish");
5007 template<
class EventHandler>
5008 void ParseEngine<EventHandler>::_handle_seq_flow()
5011 _c4dbgpf(
"handle_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5013 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5014 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ));
5015 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW));
5016 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT));
5017 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT));
5018 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos);
5020 _handle_flow_skip_whitespace();
5022 if(!m_evt_handler->m_curr->line_contents.rem.len)
5027 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5028 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5032 _c4dbgp(
"seqflow[RVAL]: scanning single-quoted scalar");
5033 sc = _scan_scalar_squot();
5034 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5035 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5038 else if(first ==
'"')
5040 _c4dbgp(
"seqflow[RVAL]: scanning double-quoted scalar");
5041 sc = _scan_scalar_dquot();
5042 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5043 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5047 else if(_scan_scalar_plain_seq_flow(&sc))
5049 _c4dbgp(
"seqflow[RVAL]: it's a scalar.");
5050 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5051 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5054 else if(first ==
'[')
5056 _c4dbgp(
"seqflow[RVAL]: start child seqflow");
5058 m_evt_handler->begin_seq_val_flow();
5059 _set_indentation(m_evt_handler->m_parent->indref);
5061 _line_progressed(1);
5063 else if(first ==
'{')
5065 _c4dbgp(
"seqflow[RVAL]: start child mapflow");
5067 m_evt_handler->begin_map_val_flow();
5068 _set_indentation(m_evt_handler->m_parent->indref);
5070 _line_progressed(1);
5071 goto seqflow_finish;
5073 else if(first ==
']')
5075 _c4dbgp(
"seqflow[RVAL]: end!");
5076 _line_progressed(1);
5078 goto seqflow_finish;
5080 else if(first ==
'*')
5082 csubstr ref = _scan_ref_seq();
5083 _c4dbgpf(
"seqflow[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5084 m_evt_handler->set_val_ref(ref);
5087 else if(first ==
'&')
5089 csubstr anchor = _scan_anchor();
5090 _c4dbgpf(
"seqflow[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5091 m_evt_handler->set_val_anchor(anchor);
5092 if(_maybe_scan_following_comma())
5094 _c4dbgp(
"seqflow[RVAL]: empty scalar!");
5095 m_evt_handler->set_val_scalar_plain_empty();
5096 m_evt_handler->add_sibling();
5099 else if(first ==
'!')
5101 csubstr tag = _scan_tag();
5102 _c4dbgpf(
"seqflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5104 m_evt_handler->set_val_tag(tag);
5105 if(_maybe_scan_following_comma())
5107 _c4dbgp(
"seqflow[RVAL]: empty scalar!");
5108 m_evt_handler->set_val_scalar_plain_empty();
5109 m_evt_handler->add_sibling();
5112 else if(first ==
':')
5114 _c4dbgpf(
"seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5116 m_evt_handler->begin_map_val_flow();
5117 _set_indentation(m_evt_handler->m_parent->indref);
5118 m_evt_handler->set_key_scalar_plain_empty();
5120 _line_progressed(1);
5121 goto seqflow_finish;
5123 else if(first ==
'?')
5125 _c4dbgp(
"seqflow[RVAL]: start child mapflow, explicit key");
5127 m_was_inside_qmrk =
true;
5128 m_evt_handler->begin_map_val_flow();
5129 _set_indentation(m_evt_handler->m_parent->indref);
5131 _line_progressed(1);
5132 _maybe_skip_whitespace_tokens();
5133 goto seqflow_finish;
5142 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
5143 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5144 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5147 _c4dbgp(
"seqflow[RNXT]: expect next val");
5149 m_evt_handler->add_sibling();
5150 _line_progressed(1);
5152 else if(first ==
']')
5154 _c4dbgp(
"seqflow[RNXT]: end!");
5156 _line_progressed(1);
5157 goto seqflow_finish;
5159 else if(first ==
':')
5161 _c4dbgpf(
"seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5162 m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5163 _set_indentation(m_evt_handler->m_parent->indref);
5164 _line_progressed(1);
5166 goto seqflow_finish;
5175 _c4dbgt(
"seqflow: go again", 0);
5176 if(_finished_line())
5178 if(C4_LIKELY(!_finished_file()))
5186 _c4err(
"missing terminating ]");
5192 _c4dbgp(
"seqflow: finish");
5198 template<
class EventHandler>
5199 void ParseEngine<EventHandler>::_handle_map_flow()
5202 _c4dbgpf(
"handle_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5204 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
5205 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW));
5207 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)));
5209 _handle_flow_skip_whitespace();
5210 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5216 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5217 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5218 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5219 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5220 const char first = rem.str[0];
5221 _c4dbgpf(
"mapflow[RKEY]: '{}'", first);
5225 _c4dbgp(
"mapflow[RKEY]: scanning single-quoted scalar");
5226 sc = _scan_scalar_squot();
5227 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5228 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5231 else if(first ==
'"')
5233 _c4dbgp(
"mapflow[RKEY]: scanning double-quoted scalar");
5234 sc = _scan_scalar_dquot();
5235 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5236 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5240 else if(_scan_scalar_plain_map_flow(&sc))
5242 _c4dbgp(
"mapflow[RKEY]: plain scalar");
5243 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5244 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5247 else if(first ==
'?')
5249 _c4dbgp(
"mapflow[RKEY]: explicit key");
5250 _line_progressed(1);
5252 _maybe_skip_whitespace_tokens();
5254 else if(first ==
':')
5256 _c4dbgp(
"mapflow[RKEY]: setting empty key");
5257 m_evt_handler->set_key_scalar_plain_empty();
5259 _line_progressed(1);
5260 _maybe_skip_whitespace_tokens();
5262 else if(first ==
',')
5264 _c4dbgp(
"mapflow[RKEY]: empty key+val!");
5265 m_evt_handler->set_key_scalar_plain_empty();
5266 m_evt_handler->set_val_scalar_plain_empty();
5270 else if(first ==
'}')
5272 _c4dbgp(
"mapflow[RKEY]: end!");
5274 _line_progressed(1);
5275 goto mapflow_finish;
5277 else if(first ==
'&')
5279 csubstr anchor = _scan_anchor();
5280 _c4dbgpf(
"mapflow[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5281 m_evt_handler->set_key_anchor(anchor);
5283 else if(first ==
'*')
5285 csubstr ref = _scan_ref_map();
5286 _c4dbgpf(
"mapflow[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
5287 m_evt_handler->set_key_ref(ref);
5290 else if(first ==
'[')
5295 _c4dbgp(
"mapflow[RKEY]: start child seqflow (!)");
5297 m_evt_handler->begin_seq_key_flow();
5299 _set_indentation(m_evt_handler->m_parent->indref);
5300 _line_progressed(1);
5301 goto mapflow_finish;
5303 else if(first ==
'{')
5308 _c4dbgp(
"mapflow[RKEY]: start child mapflow (!)");
5310 m_evt_handler->begin_map_key_flow();
5312 _set_indentation(m_evt_handler->m_parent->indref);
5313 _line_progressed(1);
5316 else if(first ==
'!')
5318 csubstr tag = _scan_tag();
5319 _c4dbgpf(
"mapflow[RKEY]: tag! [{}]~~~{}~~~", tag.len, tag);
5321 m_evt_handler->set_key_tag(tag);
5328 else if(has_any(
RKCL))
5330 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5331 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5332 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5333 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5334 const char first = rem.str[0];
5335 _c4dbgpf(
"mapflow[RKCL]: '{}'", first);
5338 _c4dbgp(
"mapflow[RKCL]: found the colon");
5340 _line_progressed(1);
5342 else if(first ==
'}')
5344 _c4dbgp(
"mapflow[RKCL]: end with missing val!");
5346 m_evt_handler->set_val_scalar_plain_empty();
5348 _line_progressed(1);
5349 goto mapflow_finish;
5351 else if(first ==
',')
5353 _c4dbgp(
"mapflow[RKCL]: got comma. val is missing");
5354 m_evt_handler->set_val_scalar_plain_empty();
5355 m_evt_handler->add_sibling();
5357 _line_progressed(1);
5364 else if(has_any(
RVAL))
5366 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5367 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5368 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5369 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5370 const char first = rem.str[0];
5371 _c4dbgpf(
"mapflow[RVAL]: '{}'", first);
5375 _c4dbgp(
"mapflow[RVAL]: scanning single-quoted scalar");
5376 sc = _scan_scalar_squot();
5377 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5378 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5381 else if(first ==
'"')
5383 _c4dbgp(
"mapflow[RVAL]: scanning double-quoted scalar");
5384 sc = _scan_scalar_dquot();
5385 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5386 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5390 else if(_scan_scalar_plain_map_flow(&sc))
5392 _c4dbgp(
"mapflow[RVAL]: plain scalar.");
5393 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5394 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5397 else if(first ==
'[')
5399 _c4dbgp(
"mapflow[RVAL]: start val seqflow");
5401 m_evt_handler->begin_seq_val_flow();
5402 _set_indentation(m_evt_handler->m_parent->indref);
5404 _line_progressed(1);
5405 goto mapflow_finish;
5407 else if(first ==
'{')
5409 _c4dbgp(
"mapflow[RVAL]: start val mapflow");
5411 m_evt_handler->begin_map_val_flow();
5412 _set_indentation(m_evt_handler->m_parent->indref);
5414 _line_progressed(1);
5417 else if(first ==
'}')
5419 _c4dbgp(
"mapflow[RVAL]: end!");
5420 m_evt_handler->set_val_scalar_plain_empty();
5422 _line_progressed(1);
5423 goto mapflow_finish;
5425 else if(first ==
',')
5427 _c4dbgp(
"mapflow[RVAL]: empty val!");
5428 m_evt_handler->set_val_scalar_plain_empty();
5432 else if(first ==
'*')
5434 csubstr ref = _scan_ref_map();
5435 _c4dbgpf(
"mapflow[RVAL]: key ref! [{}]~~~{}~~~", ref.len, ref);
5436 m_evt_handler->set_val_ref(ref);
5439 else if(first ==
'&')
5441 csubstr anchor = _scan_anchor();
5442 _c4dbgpf(
"mapflow[RVAL]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5443 m_evt_handler->set_val_anchor(anchor);
5445 else if(first ==
'!')
5447 csubstr tag = _scan_tag();
5448 _c4dbgpf(
"mapflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5450 m_evt_handler->set_val_tag(tag);
5457 else if(has_any(
RNXT))
5459 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5460 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5461 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5462 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5463 _c4dbgpf(
"mapflow[RNXT]: '{}'", rem.str[0]);
5464 if(rem.begins_with(
','))
5466 _c4dbgp(
"mapflow[RNXT]: expect next keyval");
5467 m_evt_handler->add_sibling();
5469 _line_progressed(1);
5471 else if(rem.begins_with(
'}'))
5473 _c4dbgp(
"mapflow[RNXT]: end!");
5475 _line_progressed(1);
5476 goto mapflow_finish;
5483 else if(has_any(
QMRK))
5485 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5486 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5487 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5488 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5489 const char first = rem.str[0];
5490 _c4dbgpf(
"mapflow[QMRK]: '{}'", first);
5494 _c4dbgp(
"mapflow[QMRK]: scanning single-quoted scalar");
5495 sc = _scan_scalar_squot();
5496 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5497 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5500 else if(first ==
'"')
5502 _c4dbgp(
"mapflow[QMRK]: scanning double-quoted scalar");
5503 sc = _scan_scalar_dquot();
5504 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5505 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5509 else if(_scan_scalar_plain_map_flow(&sc))
5511 _c4dbgp(
"mapflow[QMRK]: plain scalar");
5512 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5513 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5516 else if(first ==
':')
5518 _c4dbgp(
"mapflow[QMRK]: setting empty key");
5519 m_evt_handler->set_key_scalar_plain_empty();
5521 _line_progressed(1);
5522 _maybe_skip_whitespace_tokens();
5524 else if(first ==
'}')
5526 _c4dbgp(
"mapflow[QMRK]: end!");
5527 m_evt_handler->set_key_scalar_plain_empty();
5528 m_evt_handler->set_val_scalar_plain_empty();
5530 _line_progressed(1);
5531 goto mapflow_finish;
5533 else if(first ==
',')
5535 _c4dbgp(
"mapflow[QMRK]: empty key+val!");
5536 m_evt_handler->set_key_scalar_plain_empty();
5537 m_evt_handler->set_val_scalar_plain_empty();
5540 else if(first ==
'&')
5542 csubstr anchor = _scan_anchor();
5543 _c4dbgpf(
"mapflow[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5544 m_evt_handler->set_key_anchor(anchor);
5546 else if(first ==
'*')
5548 csubstr ref = _scan_ref_map();
5549 _c4dbgpf(
"mapflow[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
5550 m_evt_handler->set_key_ref(ref);
5553 else if(first ==
'[')
5558 _c4dbgp(
"mapflow[QMRK]: start child seqflow (!)");
5560 m_evt_handler->begin_seq_key_flow();
5562 _set_indentation(m_evt_handler->m_parent->indref);
5563 _line_progressed(1);
5564 goto mapflow_finish;
5566 else if(first ==
'{')
5571 _c4dbgp(
"mapflow[QMRK]: start child mapflow (!)");
5573 m_evt_handler->begin_map_key_flow();
5574 _set_indentation(m_evt_handler->m_parent->indref);
5576 _line_progressed(1);
5579 else if(first ==
'!')
5581 csubstr tag = _scan_tag();
5582 _c4dbgpf(
"mapflow[QMRK]: tag! [{}]~~~{}~~~", tag.len, tag);
5584 m_evt_handler->set_key_tag(tag);
5593 _c4dbgt(
"mapflow: go again", 0);
5594 if(_finished_line())
5596 if(C4_LIKELY(!_finished_file()))
5604 _c4err(
"missing terminating }");
5610 _c4dbgp(
"mapflow: finish");
5616 template<
class EventHandler>
5617 void ParseEngine<EventHandler>::_handle_seq_block()
5620 _c4dbgpf(
"handle_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5622 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ));
5623 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RBLCK));
5624 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT));
5625 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RVAL) + has_any(
RNXT)));
5627 _maybe_skip_comment();
5628 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5634 _c4dbgpf(
"seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
5635 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5636 if(m_evt_handler->m_curr->at_line_beginning())
5638 _c4dbgpf(
"seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5639 if(m_evt_handler->m_curr->indentation_ge())
5641 _c4dbgpf(
"seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
5642 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5643 rem = m_evt_handler->m_curr->line_contents.rem;
5647 else if(m_evt_handler->m_curr->indentation_lt())
5649 _c4dbgp(
"seqblck[RVAL]: smaller indentation!");
5650 _handle_indentation_pop_from_block_seq();
5651 goto seqblck_finish;
5653 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
5655 _c4dbgp(
"seqblck[RVAL]: empty line!");
5656 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5660 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
5668 if(rem.str[0] ==
' ')
5670 if(_handle_indentation_from_annotations())
5672 _c4dbgp(
"seqblck[RVAL]: annotations!");
5673 rem = m_evt_handler->m_curr->line_contents.rem;
5680 _RYML_ASSERT_BASIC_(callbacks(), rem.len);
5681 _c4dbgpf(
"seqblck[RVAL]: '{}' node_id={}", rem.str[0], m_evt_handler->m_curr->node_id);
5682 const char first = rem.str[0];
5683 const size_t startline = m_evt_handler->m_curr->pos.line;
5686 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col() - m_bom_len;
5690 _c4dbgp(
"seqblck[RVAL]: single-quoted scalar");
5691 sc = _scan_scalar_squot();
5692 if(!_maybe_scan_following_colon())
5694 _c4dbgp(
"seqblck[RVAL]: set as val");
5695 _handle_annotations_before_blck_val_scalar();
5696 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5697 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5702 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
5704 _handle_annotations_before_start_mapblck(startline);
5706 m_evt_handler->begin_map_val_block();
5707 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5708 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5709 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5711 _maybe_skip_whitespace_tokens();
5712 goto seqblck_finish;
5715 else if(first ==
'"')
5717 _c4dbgp(
"seqblck[RVAL]: double-quoted scalar");
5718 sc = _scan_scalar_dquot();
5719 if(!_maybe_scan_following_colon())
5721 _c4dbgp(
"seqblck[RVAL]: set as val");
5722 _handle_annotations_before_blck_val_scalar();
5723 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5724 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5729 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
5731 _handle_annotations_before_start_mapblck(startline);
5733 m_evt_handler->begin_map_val_block();
5734 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5735 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5736 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5738 _maybe_skip_whitespace_tokens();
5739 goto seqblck_finish;
5745 else if(first ==
'|')
5747 _c4dbgp(
"seqblck[RVAL]: block-literal scalar");
5749 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5750 _handle_annotations_before_blck_val_scalar();
5751 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
5752 m_evt_handler->set_val_scalar_literal(maybe_filtered);
5755 else if(first ==
'>')
5757 _c4dbgp(
"seqblck[RVAL]: block-folded scalar");
5759 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5760 _handle_annotations_before_blck_val_scalar();
5761 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
5762 m_evt_handler->set_val_scalar_folded(maybe_filtered);
5765 else if(_scan_scalar_plain_seq_blck(&sc))
5767 _c4dbgp(
"seqblck[RVAL]: plain scalar.");
5768 if(!_maybe_scan_following_colon())
5770 _c4dbgp(
"seqblck[RVAL]: set as val");
5771 _handle_annotations_before_blck_val_scalar();
5772 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5773 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5778 if(startindent > m_evt_handler->m_curr->indref)
5780 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
5782 _handle_annotations_before_start_mapblck(startline);
5784 m_evt_handler->begin_map_val_block();
5785 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5786 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5787 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5789 _maybe_skip_whitespace_tokens();
5790 goto seqblck_finish;
5792 else if(m_evt_handler->m_parent && m_evt_handler->m_parent->indref == startindent && has_any(
RMAP|
RBLCK, m_evt_handler->m_parent))
5794 _c4dbgp(
"seqblck[RVAL]: empty val + end indentless seq + set key");
5795 m_evt_handler->set_val_scalar_plain_empty();
5796 m_evt_handler->end_seq_block();
5797 m_evt_handler->add_sibling();
5798 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5799 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5801 _maybe_skip_whitespace_tokens();
5802 goto seqblck_finish;
5810 else if(first ==
'[')
5812 _c4dbgp(
"seqblck[RVAL]: start child seqflow");
5814 _handle_annotations_before_blck_val_scalar();
5815 m_evt_handler->begin_seq_val_flow();
5817 _line_progressed(1);
5818 _set_indentation(m_evt_handler->m_parent->indref + 1u);
5819 goto seqblck_finish;
5821 else if(first ==
'{')
5823 _c4dbgp(
"seqblck[RVAL]: start child mapflow");
5825 _handle_annotations_before_blck_val_scalar();
5826 m_evt_handler->begin_map_val_flow();
5828 _line_progressed(1);
5829 _set_indentation(m_evt_handler->m_parent->indref + 1u);
5830 goto seqblck_finish;
5832 else if(first ==
'-')
5834 if(startindent == m_evt_handler->m_curr->indref)
5836 _c4dbgp(
"seqblck[RVAL]: prev val was empty");
5837 _handle_annotations_before_blck_val_scalar();
5838 m_evt_handler->set_val_scalar_plain_empty();
5840 m_evt_handler->add_sibling();
5844 _c4dbgp(
"seqblck[RVAL]: start child seqblck");
5845 _RYML_ASSERT_BASIC_(this->callbacks(), startindent > m_evt_handler->m_curr->indref);
5847 _handle_annotations_before_blck_val_scalar();
5848 m_evt_handler->begin_seq_val_block();
5850 _set_indentation(startindent);
5853 _line_progressed(1);
5854 _maybe_skip_whitespace_tokens();
5856 else if(first ==
':')
5858 _c4dbgp(
"seqblck[RVAL]: start child mapblck with empty key");
5860 _handle_annotations_before_start_mapblck(startline);
5862 m_evt_handler->begin_map_val_block();
5863 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5864 m_evt_handler->set_key_scalar_plain_empty();
5866 _line_progressed(1);
5867 _maybe_skip_whitespace_tokens();
5868 goto seqblck_finish;
5870 else if(first ==
'&')
5872 const csubstr anchor = _scan_anchor();
5873 _c4dbgpf(
"seqblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5876 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
5878 else if(first ==
'*')
5880 csubstr ref = _scan_ref_seq();
5881 _c4dbgpf(
"seqblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5882 if(!_maybe_scan_following_colon())
5884 _c4dbgp(
"seqblck[RVAL]: set ref as val!");
5885 _handle_annotations_before_blck_val_scalar();
5886 m_evt_handler->set_val_ref(ref);
5891 _c4dbgp(
"seqblck[RVAL]: ref is key of map");
5893 _handle_annotations_before_start_mapblck(startline);
5894 m_evt_handler->begin_map_val_block();
5895 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5896 m_evt_handler->set_key_ref(ref);
5898 _set_indentation(startindent);
5899 _maybe_skip_whitespace_tokens();
5900 goto seqblck_finish;
5903 else if(first ==
'!')
5905 csubstr tag = _scan_tag();
5906 _c4dbgpf(
"seqblck[RVAL]: val tag! [{}]~~~{}~~~", tag.len, tag);
5909 _add_annotation(&m_pending_tags, tag, startindent, startline);
5911 else if(first ==
'?')
5913 _c4dbgp(
"seqblck[RVAL]: start child mapblck, explicit key");
5915 m_was_inside_qmrk =
true;
5916 m_evt_handler->begin_map_val_block();
5918 _set_indentation(startindent);
5919 _line_progressed(1);
5920 _maybe_skip_whitespace_tokens();
5921 goto seqblck_finish;
5930 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
5931 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5935 _c4dbgpf(
"seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5936 if(C4_LIKELY(_at_line_begin()))
5938 _c4dbgp(
"seqblck[RNXT]: at line begin");
5939 if(m_evt_handler->m_curr->indentation_ge())
5941 _c4dbgpf(
"seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
5942 _line_progressed(m_evt_handler->m_curr->indref);
5943 _maybe_skip_whitespace_tokens();
5944 rem = m_evt_handler->m_curr->line_contents.rem;
5948 else if(m_evt_handler->m_curr->indentation_lt())
5950 _c4dbgp(
"seqblck[RNXT]: smaller indentation!");
5951 _handle_indentation_pop_from_block_seq();
5954 _c4dbgp(
"seqblck[RNXT]: still seqblck!");
5955 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
5956 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5957 rem = m_evt_handler->m_curr->line_contents.rem;
5963 _c4dbgp(
"seqblck[RNXT]: no longer seqblck!");
5964 goto seqblck_finish;
5967 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
5969 _c4dbgpf(
"seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
5970 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5971 rem = m_evt_handler->m_curr->line_contents.rem;
5978 _c4dbgp(
"seqblck[RNXT]: NOT at line begin");
5979 if(!rem.begins_with_any(
" \t"))
5986 rem = m_evt_handler->m_curr->line_contents.rem;
5989 _c4dbgp(
"seqblck[RNXT]: again");
5997 const char first = rem.str[0];
5998 _c4dbgpf(
"seqblck[RNXT]: '{}' node_id={}", first, m_evt_handler->m_curr->node_id);
6001 if(m_evt_handler->m_curr->indref > 0 || m_evt_handler->m_curr->line_contents.indentation > 0 || !_is_doc_begin_token(rem))
6003 _c4dbgp(
"seqblck[RNXT]: expect next val");
6005 m_evt_handler->add_sibling();
6006 _line_progressed(1);
6007 _maybe_skip_whitespace_tokens();
6011 _c4dbgp(
"seqblck[RNXT]: start doc");
6012 _start_doc_suddenly();
6013 _line_progressed(3);
6014 _maybe_skip_whitespace_tokens();
6015 goto seqblck_finish;
6018 else if(first ==
':')
6024 auto const *C4_RESTRICT prev_state = m_evt_handler->m_parent;
6025 if(C4_LIKELY(prev_state && (prev_state->flags &
RMAP)))
6027 _c4dbgp(
"seqblck[RNXT]: actually this seq was '?' key of parent map");
6028 m_evt_handler->end_seq_block();
6029 goto seqblck_finish;
6036 else if(first ==
'.')
6038 _c4dbgp(
"seqblck[RNXT]: maybe doc?");
6039 csubstr rs = rem.sub(1);
6040 if(rs ==
".." || rs.begins_with(
".. "))
6042 _c4dbgp(
"seqblck[RNXT]: end+start doc");
6043 _end_doc_suddenly();
6044 _line_progressed(3);
6045 _maybe_skip_whitespace_tokens();
6046 goto seqblck_finish;
6059 for(
auto const& s : m_evt_handler->m_stack)
6061 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
6064 if(m_evt_handler->m_parent && has_all(
RMAP|
RBLCK, m_evt_handler->m_parent) && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
6066 _c4dbgpf(
"seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
6067 _RYML_ASSERT_BASIC_(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent);
6068 _handle_indentation_pop(m_evt_handler->m_parent);
6069 _RYML_ASSERT_BASIC_(this->callbacks(), has_all(
RMAP|
RBLCK));
6070 m_evt_handler->add_sibling();
6072 goto seqblck_finish;
6082 _c4dbgt(
"seqblck: go again", 0);
6083 if(_finished_line())
6088 if(_finished_file())
6090 _c4dbgp(
"seqblck: finish!");
6092 goto seqblck_finish;
6099 _c4dbgp(
"seqblck: finish");
6105 template<
class EventHandler>
6106 void ParseEngine<EventHandler>::_handle_map_block()
6109 _c4dbgpf(
"handle_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6112 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
6113 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RBLCK));
6115 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)));
6117 _maybe_skip_comment();
6118 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
6124 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6125 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6126 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6127 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6131 if(m_evt_handler->m_curr->at_line_beginning())
6133 if(m_evt_handler->m_curr->indentation_eq())
6135 _c4dbgpf(
"mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
6136 _line_progressed(m_evt_handler->m_curr->indref);
6137 rem = m_evt_handler->m_curr->line_contents.rem;
6141 else if(m_evt_handler->m_curr->indentation_lt())
6143 _c4dbgp(
"mapblck[RKEY]: smaller indentation!");
6144 _handle_indentation_pop_from_block_map();
6145 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6148 _c4dbgp(
"mapblck[RKEY]: still mapblck!");
6149 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY));
6150 rem = m_evt_handler->m_curr->line_contents.rem;
6156 _c4dbgp(
"mapblck[RKEY]: no longer mapblck!");
6157 goto mapblck_finish;
6162 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt());
6163 _c4err(
"invalid indentation");
6169 const char first = rem.str[0];
6170 const size_t startline = m_evt_handler->m_curr->pos.line;
6171 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6172 _c4dbgpf(
"mapblck[RKEY]: '{}'", first);
6176 _c4dbgp(
"mapblck[RKEY]: scanning single-quoted scalar");
6177 sc = _scan_scalar_squot();
6178 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6179 _handle_annotations_before_blck_key_scalar();
6180 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6182 if(!_maybe_scan_following_colon())
6183 _c4err(
"could not find ':' colon after key");
6184 _maybe_skip_whitespace_tokens();
6186 else if(first ==
'"')
6188 _c4dbgp(
"mapblck[RKEY]: scanning double-quoted scalar");
6189 sc = _scan_scalar_dquot();
6190 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6191 _handle_annotations_before_blck_key_scalar();
6192 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6194 if(!_maybe_scan_following_colon())
6195 _c4err(
"could not find ':' colon after key");
6196 _maybe_skip_whitespace_tokens();
6200 else if(C4_UNLIKELY(first ==
'|'))
6202 _c4err(
"block literal keys must be enclosed in '?'");
6204 else if(C4_UNLIKELY(first ==
'>'))
6206 _c4err(
"block literal keys must be enclosed in '?'");
6208 else if(_scan_scalar_plain_map_blck(&sc))
6210 _c4dbgp(
"mapblck[RKEY]: plain scalar");
6211 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6212 _handle_annotations_before_blck_key_scalar();
6213 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6215 if(!_maybe_scan_following_colon())
6216 _c4err(
"could not find ':' colon after key");
6217 _maybe_skip_whitespace_tokens();
6219 else if(first ==
'?')
6221 _c4dbgp(
"mapblck[RKEY]: key token!");
6223 _line_progressed(1);
6224 _maybe_skip_whitespace_tokens();
6225 m_was_inside_qmrk =
true;
6228 else if(first ==
':')
6230 _c4dbgp(
"mapblck[RKEY]: setting empty key");
6231 _handle_annotations_before_blck_key_scalar();
6232 m_evt_handler->set_key_scalar_plain_empty();
6234 _line_progressed(1);
6235 _maybe_skip_whitespace_tokens();
6237 else if(first ==
'*')
6239 csubstr ref = _scan_ref_map();
6240 _c4dbgpf(
"mapblck[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
6241 _handle_annotations_before_blck_key_scalar();
6242 m_evt_handler->set_key_ref(ref);
6244 if(!_maybe_scan_following_colon())
6245 _c4err(
"could not find ':' colon after key");
6246 _maybe_skip_whitespace_tokens();
6248 else if(first ==
'&')
6250 csubstr anchor = _scan_anchor();
6251 _c4dbgpf(
"mapblck[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
6252 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6254 else if(first ==
'!')
6256 csubstr tag = _scan_tag();
6257 _c4dbgpf(
"mapblck[RKEY]: key tag! [{}]~~~{}~~~", tag.len, tag);
6258 _add_annotation(&m_pending_tags, tag, startindent, startline);
6260 else if(first ==
'[')
6265 _c4dbgp(
"mapblck[RKEY]: start child seqflow (!)");
6267 _handle_annotations_before_blck_key_scalar();
6268 m_evt_handler->begin_seq_key_flow();
6270 _line_progressed(1);
6271 _set_indentation(startindent);
6272 goto mapblck_finish;
6274 else if(first ==
'{')
6279 _c4dbgp(
"mapblck[RKEY]: start child mapflow (!)");
6281 _handle_annotations_before_blck_key_scalar();
6282 m_evt_handler->begin_map_key_flow();
6284 _line_progressed(1);
6285 _set_indentation(startindent);
6286 goto mapblck_finish;
6288 else if(first ==
'-')
6290 _c4dbgp(
"mapblck[RKEY]: maybe doc?");
6291 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(rem))
6293 _c4dbgp(
"mapblck[RKEY]: end+start doc");
6294 _start_doc_suddenly();
6295 _line_progressed(3);
6296 _maybe_skip_whitespace_tokens();
6297 goto mapblck_finish;
6304 else if(first ==
'.')
6306 _c4dbgp(
"mapblck[RKEY]: maybe end doc?");
6307 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(rem))
6309 _c4dbgp(
"mapblck[RKEY]: end doc");
6310 _end_doc_suddenly();
6311 _line_progressed(3);
6312 _maybe_skip_whitespace_tokens();
6313 goto mapblck_finish;
6321 else if(first ==
'\t')
6323 _c4dbgp(
"mapblck[RKEY]: skip tabs");
6324 _maybe_skipchars(
'\t');
6331 else if(has_any(
RKCL))
6333 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6334 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6335 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6336 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6340 if(m_evt_handler->m_curr->at_line_beginning())
6342 if(m_evt_handler->m_curr->indentation_eq())
6344 _c4dbgpf(
"mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
6345 _line_progressed(m_evt_handler->m_curr->indref);
6346 rem = m_evt_handler->m_curr->line_contents.rem;
6350 else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
6352 _c4err(
"invalid indentation");
6355 const char first = rem.str[0];
6356 _c4dbgpf(
"mapblck[RKCL]: '{}'", first);
6359 _c4dbgp(
"mapblck[RKCL]: found the colon");
6361 _line_progressed(1);
6362 _maybe_skip_whitespace_tokens();
6364 else if(first ==
'?')
6366 _c4dbgp(
"mapblck[RKCL]: got '?'. val was empty");
6367 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_was_inside_qmrk);
6368 m_evt_handler->set_val_scalar_plain_empty();
6369 m_evt_handler->add_sibling();
6371 _line_progressed(1);
6372 _maybe_skip_whitespace_tokens();
6374 else if(first ==
'-')
6376 if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6378 _c4dbgp(
"mapblck[RKCL]: end+start doc");
6379 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6380 _start_doc_suddenly();
6381 _line_progressed(3);
6382 _maybe_skip_whitespace_tokens();
6383 goto mapblck_finish;
6390 else if(first ==
'.')
6392 _c4dbgp(
"mapblck[RKCL]: maybe end doc?");
6393 csubstr rs = rem.sub(1);
6394 if(rs ==
".." || rs.begins_with(
".. "))
6396 _c4dbgp(
"mapblck[RKCL]: end+start doc");
6397 _end_doc_suddenly();
6398 _line_progressed(3);
6399 goto mapblck_finish;
6406 else if(m_was_inside_qmrk)
6408 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_eq());
6409 _c4dbgp(
"mapblck[RKCL]: missing :");
6410 m_evt_handler->set_val_scalar_plain_empty();
6411 m_evt_handler->add_sibling();
6412 m_was_inside_qmrk =
false;
6420 else if(has_any(
RVAL))
6422 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6423 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6424 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6425 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6429 if(m_evt_handler->m_curr->at_line_beginning())
6431 _c4dbgpf(
"mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6432 m_evt_handler->m_curr->more_indented =
false;
6433 if(m_evt_handler->m_curr->indref ==
npos)
6435 _c4dbgpf(
"mapblck[RVAL]: setting indentation={}", m_evt_handler->m_parent->indref);
6436 _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6437 _line_progressed(m_evt_handler->m_curr->indref);
6438 rem = m_evt_handler->m_curr->line_contents.rem;
6442 else if(m_evt_handler->m_curr->indentation_eq())
6444 _c4dbgp(
"mapblck[RVAL]: skip indentation!");
6445 _line_progressed(m_evt_handler->m_curr->indref);
6446 rem = m_evt_handler->m_curr->line_contents.rem;
6474 else if(m_evt_handler->m_curr->indentation_gt())
6476 _c4dbgp(
"mapblck[RVAL]: more indented!");
6477 m_evt_handler->m_curr->more_indented =
true;
6478 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6479 rem = m_evt_handler->m_curr->line_contents.rem;
6483 else if(m_evt_handler->m_curr->indentation_lt())
6485 _c4dbgp(
"mapblck[RVAL]: smaller indentation!");
6486 _handle_indentation_pop_from_block_map();
6489 _c4dbgp(
"mapblck[RVAL]: still mapblck!");
6490 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6493 _c4dbgp(
"mapblck[RVAL]: speculatively expect next keyval");
6494 m_evt_handler->add_sibling();
6501 _c4dbgp(
"mapblck[RVAL]: no longer mapblck!");
6502 goto mapblck_finish;
6505 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
6507 _c4dbgp(
"mapblck[RVAL]: empty line!");
6508 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6515 const char first = rem.str[0];
6516 const size_t startline = m_evt_handler->m_curr->pos.line;
6517 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6518 _c4dbgpf(
"mapblck[RVAL]: '{}'", first);
6522 _c4dbgp(
"mapblck[RVAL]: scanning single-quoted scalar");
6523 sc = _scan_scalar_squot();
6524 if(!_maybe_scan_following_colon())
6526 _c4dbgp(
"mapblck[RVAL]: set as val");
6527 _handle_annotations_before_blck_val_scalar();
6528 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6529 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6534 if(startindent != m_evt_handler->m_curr->indref)
6536 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6537 _handle_annotations_before_start_mapblck(startline);
6540 m_evt_handler->begin_map_val_block();
6541 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6542 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6543 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6544 _maybe_skip_whitespace_tokens();
6550 _c4dbgp(
"mapblck[RVAL]: prev val empty+this is a key");
6551 m_evt_handler->set_val_scalar_plain_empty();
6552 m_evt_handler->add_sibling();
6553 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6554 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6556 _maybe_skip_whitespace_tokens();
6560 else if(first ==
'"')
6562 _c4dbgp(
"mapblck[RVAL]: scanning double-quoted scalar");
6563 sc = _scan_scalar_dquot();
6564 if(!_maybe_scan_following_colon())
6566 _c4dbgp(
"mapblck[RVAL]: set as val");
6567 _handle_annotations_before_blck_val_scalar();
6568 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6569 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6574 if(startindent != m_evt_handler->m_curr->indref)
6576 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6577 _handle_annotations_before_start_mapblck(startline);
6580 m_evt_handler->begin_map_val_block();
6581 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6582 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6583 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6584 _maybe_skip_whitespace_tokens();
6590 _c4dbgp(
"mapblck[RVAL]: prev val empty+this is a key");
6591 m_evt_handler->set_val_scalar_plain_empty();
6592 m_evt_handler->add_sibling();
6593 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6594 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6596 _maybe_skip_whitespace_tokens();
6602 else if(first ==
'|')
6604 _c4dbgp(
"mapblck[RVAL]: scanning block-literal scalar");
6606 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6607 _handle_annotations_before_blck_val_scalar();
6608 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6609 m_evt_handler->set_val_scalar_literal(maybe_filtered);
6612 else if(first ==
'>')
6614 _c4dbgp(
"mapblck[RVAL]: scanning block-folded scalar");
6616 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6617 _handle_annotations_before_blck_val_scalar();
6618 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6619 m_evt_handler->set_val_scalar_folded(maybe_filtered);
6622 else if(_scan_scalar_plain_map_blck(&sc))
6624 _c4dbgp(
"mapblck[RVAL]: plain scalar.");
6625 if(!_maybe_scan_following_colon())
6627 _c4dbgp(
"mapblck[RVAL]: set as val");
6628 _handle_annotations_before_blck_val_scalar();
6629 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6630 m_evt_handler->set_val_scalar_plain(maybe_filtered);
6635 if(startindent != m_evt_handler->m_curr->indref)
6637 _c4dbgpf(
"mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
6639 _handle_annotations_before_start_mapblck(startline);
6641 m_evt_handler->begin_map_val_block();
6642 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6643 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6644 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6645 _maybe_skip_whitespace_tokens();
6651 _c4dbgp(
"mapblck[RVAL]: prev val empty+this is a key");
6652 _handle_annotations_before_blck_val_scalar();
6653 m_evt_handler->set_val_scalar_plain_empty();
6654 m_evt_handler->add_sibling();
6655 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6656 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6658 _maybe_skip_whitespace_tokens();
6662 else if(first ==
'-')
6666 _c4dbgp(
"mapblck[RVAL]: start val seqblck");
6668 _handle_annotations_before_blck_val_scalar();
6669 m_evt_handler->begin_seq_val_block();
6671 _set_indentation(startindent);
6672 _line_progressed(1);
6673 _maybe_skip_whitespace_tokens();
6674 goto mapblck_finish;
6676 else if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6678 _c4dbgp(
"mapblck[RVAL]: end+start doc");
6679 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6680 _start_doc_suddenly();
6681 _line_progressed(3);
6682 _maybe_skip_whitespace_tokens();
6683 goto mapblck_finish;
6690 else if(first ==
'[')
6692 _c4dbgp(
"mapblck[RVAL]: start val seqflow");
6694 _handle_annotations_before_blck_val_scalar();
6695 m_evt_handler->begin_seq_val_flow();
6697 _set_indentation(m_evt_handler->m_curr->indref + 1u);
6698 _line_progressed(1);
6699 goto mapblck_finish;
6701 else if(first ==
'{')
6703 _c4dbgp(
"mapblck[RVAL]: start val mapflow");
6705 _handle_annotations_before_blck_val_scalar();
6706 m_evt_handler->begin_map_val_flow();
6708 m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
6709 _set_indentation(m_evt_handler->m_curr->indref + 1u);
6710 _line_progressed(1);
6711 goto mapblck_finish;
6713 else if(first ==
'*')
6715 csubstr ref = _scan_ref_map();
6716 _c4dbgpf(
"mapblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
6717 if(startindent == m_evt_handler->m_curr->indref)
6719 _c4dbgpf(
"mapblck[RVAL]: same indentation {}", startindent);
6720 m_evt_handler->set_val_ref(ref);
6725 _c4dbgpf(
"mapblck[RVAL]: larger indentation {}>{}", startindent, m_evt_handler->m_curr->indref);
6726 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref);
6727 if(_maybe_scan_following_colon())
6729 _c4dbgp(
"mapblck[RVAL]: start child map, block");
6731 _handle_annotations_before_blck_val_scalar();
6732 m_evt_handler->begin_map_val_block();
6733 m_evt_handler->set_key_ref(ref);
6734 _set_indentation(startindent);
6740 _c4dbgp(
"mapblck[RVAL]: was val ref");
6741 _handle_annotations_before_blck_val_scalar();
6742 m_evt_handler->set_val_ref(ref);
6746 _maybe_skip_whitespace_tokens();
6748 else if(first ==
'&')
6750 csubstr anchor = _scan_anchor();
6751 _c4dbgpf(
"mapblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
6752 if(startindent == m_evt_handler->m_curr->indref)
6754 _c4dbgp(
"mapblck[RVAL]: anchor for next key. val is missing!");
6755 m_evt_handler->set_val_scalar_plain_empty();
6756 m_evt_handler->add_sibling();
6761 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6763 else if(first ==
'!')
6765 csubstr tag = _scan_tag();
6766 _c4dbgpf(
"mapblck[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
6767 if(startindent == m_evt_handler->m_curr->indref)
6769 _c4dbgp(
"mapblck[RVAL]: tag for next key. val is missing!");
6770 _handle_annotations_before_blck_val_scalar();
6771 m_evt_handler->set_val_scalar_plain_empty();
6772 m_evt_handler->add_sibling();
6777 _add_annotation(&m_pending_tags, tag, startindent, startline);
6779 else if(first ==
'?')
6781 if(startindent == m_evt_handler->m_curr->indref)
6783 _c4dbgp(
"mapblck[RVAL]: got '?'. val was empty");
6784 _handle_annotations_before_blck_val_scalar();
6785 m_evt_handler->set_val_scalar_plain_empty();
6786 m_evt_handler->add_sibling();
6789 else if(startindent > m_evt_handler->m_curr->indref)
6791 _c4dbgp(
"mapblck[RVAL]: start val mapblck");
6793 _handle_annotations_before_blck_val_scalar();
6794 m_evt_handler->begin_map_val_block();
6796 _set_indentation(startindent);
6802 m_was_inside_qmrk =
true;
6803 _line_progressed(1);
6804 _maybe_skip_whitespace_tokens();
6807 else if(first ==
':')
6809 if(startindent == m_evt_handler->m_curr->indref)
6811 _c4dbgp(
"mapblck[RVAL]: got ':'. val was empty, next key as well");
6812 m_evt_handler->set_val_scalar_plain_empty();
6813 m_evt_handler->add_sibling();
6814 m_evt_handler->set_key_scalar_plain_empty();
6816 else if(startindent > m_evt_handler->m_curr->indref)
6818 _c4dbgp(
"mapblck[RVAL]: start val mapblck");
6820 _handle_annotations_before_start_mapblck(startline);
6822 m_evt_handler->begin_map_val_block();
6823 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6824 m_evt_handler->set_key_scalar_plain_empty();
6832 _line_progressed(1);
6833 _maybe_skip_whitespace_tokens();
6836 else if(first ==
'.')
6838 _c4dbgp(
"mapblck[RVAL]: maybe doc?");
6839 csubstr rs = rem.sub(1);
6840 if(rs ==
".." || rs.begins_with(
".. "))
6842 _c4dbgp(
"seqblck[RVAL]: end doc expl");
6843 _end_doc_suddenly();
6844 _line_progressed(3);
6845 _maybe_skip_whitespace_tokens();
6846 goto mapblck_finish;
6854 else if(first ==
'\t')
6856 _c4dbgp(
"mapblck[RVAL]: skip tabs");
6857 _maybe_skipchars(
'\t');
6864 else if(has_any(
RNXT))
6866 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6867 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6868 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6869 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6873 if(m_evt_handler->m_curr->at_line_beginning())
6875 _c4dbgpf(
"mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6876 if(m_evt_handler->m_curr->indentation_eq())
6878 _c4dbgpf(
"mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6879 _line_progressed(m_evt_handler->m_curr->indref);
6880 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
6881 m_evt_handler->add_sibling();
6885 else if(m_evt_handler->m_curr->indentation_lt())
6887 _c4dbgp(
"mapblck[RNXT]: smaller indentation!");
6888 _handle_indentation_pop_from_block_map();
6891 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6894 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
6895 m_evt_handler->add_sibling();
6902 goto mapblck_finish;
6908 _c4dbgp(
"mapblck[RNXT]: NOT at line begin");
6909 if(!rem.begins_with_any(
" \t"))
6916 rem = m_evt_handler->m_curr->line_contents.rem;
6919 _c4dbgp(
"seqblck[RNXT]: again");
6927 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
6928 const char first = rem.str[0];
6929 _c4dbgpf(
"mapblck[RNXT]: '{}'", _c4prc(first));
6932 if(m_evt_handler->m_curr->more_indented)
6934 _c4dbgp(
"mapblck[RNXT]: start child block map");
6935 C4_NOT_IMPLEMENTED();
6937 _line_progressed(1);
6938 _set_indentation(m_evt_handler->m_curr->scalar_col);
6939 m_evt_handler->m_curr->more_indented =
false;
6947 else if(first ==
' ')
6949 _c4dbgp(
"mapblck[RNXT]: skip spaces");
6950 _maybe_skip_whitespace_tokens();
6957 else if(has_any(
QMRK))
6959 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6960 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6961 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6962 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6966 if(m_evt_handler->m_curr->at_line_beginning())
6968 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation !=
npos);
6969 if(m_evt_handler->m_curr->indentation_eq())
6971 _c4dbgpf(
"mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref);
6972 _line_progressed(m_evt_handler->m_curr->indref);
6973 rem = m_evt_handler->m_curr->line_contents.rem;
6977 else if(m_evt_handler->m_curr->indentation_lt())
6979 _c4dbgp(
"mapblck[QMRK]: smaller indentation!");
6980 _handle_indentation_pop_from_block_map();
6981 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6984 _c4dbgp(
"mapblck[QMRK]: still mapblck!");
6985 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_any(
QMRK));
6986 rem = m_evt_handler->m_curr->line_contents.rem;
6992 _c4dbgp(
"mapblck[QMRK]: no longer mapblck!");
6993 goto mapblck_finish;
6999 _c4dbgp(
"mapblck[QMRK]: larger indentation !");
7000 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7001 rem = m_evt_handler->m_curr->line_contents.rem;
7009 const char first = rem.str[0];
7010 const size_t startline = m_evt_handler->m_curr->pos.line;
7011 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
7012 _c4dbgpf(
"mapblck[QMRK]: '{}'", first);
7016 _c4dbgp(
"mapblck[QMRK]: scanning single-quoted scalar");
7017 sc = _scan_scalar_squot();
7018 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7019 if(!_maybe_scan_following_colon())
7021 _c4dbgp(
"mapblck[QMRK]: set as key");
7022 _handle_annotations_before_blck_key_scalar();
7023 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7028 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7030 _handle_annotations_before_start_mapblck_as_key();
7031 m_evt_handler->begin_map_key_block();
7032 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7033 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7034 _maybe_skip_whitespace_tokens();
7035 _set_indentation(startindent);
7040 else if(first ==
'"')
7042 _c4dbgp(
"mapblck[QMRK]: scanning double-quoted scalar");
7043 sc = _scan_scalar_dquot();
7044 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7045 if(!_maybe_scan_following_colon())
7047 _c4dbgp(
"mapblck[QMRK]: set as key");
7048 _handle_annotations_before_blck_key_scalar();
7049 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7054 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7056 _handle_annotations_before_start_mapblck_as_key();
7057 m_evt_handler->begin_map_key_block();
7058 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7059 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7060 _maybe_skip_whitespace_tokens();
7061 _set_indentation(startindent);
7066 else if(first ==
'|')
7068 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
7070 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7071 csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb);
7072 _handle_annotations_before_blck_key_scalar();
7073 m_evt_handler->set_key_scalar_literal(maybe_filtered);
7076 else if(first ==
'>')
7078 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
7080 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7081 csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb);
7082 _handle_annotations_before_blck_key_scalar();
7083 m_evt_handler->set_key_scalar_folded(maybe_filtered);
7086 else if(_scan_scalar_plain_map_blck(&sc))
7088 _c4dbgp(
"mapblck[QMRK]: plain scalar");
7089 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
7090 if(!_maybe_scan_following_colon())
7092 _c4dbgp(
"mapblck[QMRK]: set as key");
7093 _handle_annotations_before_blck_key_scalar();
7094 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7099 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7101 _handle_annotations_before_start_mapblck_as_key();
7102 m_evt_handler->begin_map_key_block();
7103 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7104 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7105 _maybe_skip_whitespace_tokens();
7106 _set_indentation(startindent);
7111 else if(first ==
':')
7113 if(startindent == m_evt_handler->m_curr->indref)
7115 _c4dbgp(
"mapblck[QMRK]: empty key");
7117 _handle_annotations_before_blck_key_scalar();
7118 m_evt_handler->set_key_scalar_plain_empty();
7119 _line_progressed(1);
7120 _maybe_skip_whitespace_tokens();
7124 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), empty key");
7126 _handle_annotations_before_start_mapblck_as_key();
7127 m_evt_handler->begin_map_key_block();
7128 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7129 m_evt_handler->set_key_scalar_plain_empty();
7130 _line_progressed(1);
7131 _maybe_skip_whitespace_tokens();
7132 _set_indentation(startindent);
7137 else if(first ==
'*')
7139 csubstr ref = _scan_ref_map();
7140 _c4dbgpf(
"mapblck[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
7141 if(!_maybe_scan_following_colon())
7143 _c4dbgp(
"mapblck[QMRK]: set ref as key");
7144 _handle_annotations_before_blck_key_scalar();
7145 m_evt_handler->set_key_ref(ref);
7150 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set ref as key");
7152 _handle_annotations_before_blck_key_scalar();
7153 m_evt_handler->begin_map_key_block();
7154 m_evt_handler->set_key_ref(ref);
7155 _set_indentation(startindent);
7159 _maybe_skip_whitespace_tokens();
7161 else if(first ==
'&')
7163 csubstr anchor = _scan_anchor();
7164 _c4dbgpf(
"mapblck[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
7165 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7167 else if(first ==
'!')
7169 csubstr tag = _scan_tag();
7170 _c4dbgpf(
"mapblck[QMRK]: key tag! [{}]~~~{}~~~", tag.len, tag);
7171 _add_annotation(&m_pending_tags, tag, startindent, startline);
7173 else if(first ==
'-')
7175 _c4dbgp(
"mapblck[QMRK]: maybe doc?");
7176 csubstr rs = rem.sub(1);
7177 if(rs ==
"--" || rs.begins_with(
"-- "))
7179 _c4dbgp(
"mapblck[QMRK]: end+start doc");
7180 _start_doc_suddenly();
7181 _line_progressed(3);
7185 _c4dbgp(
"mapblck[QMRK]: start child seqblck (!)");
7187 _handle_annotations_before_blck_key_scalar();
7188 m_evt_handler->begin_seq_key_block();
7190 _set_indentation(startindent);
7191 _line_progressed(1);
7193 _maybe_skip_whitespace_tokens();
7194 goto mapblck_finish;
7196 else if(first ==
'[')
7198 _c4dbgp(
"mapblck[QMRK]: start child seqflow (!)");
7200 m_evt_handler->begin_seq_key_flow();
7202 _set_indentation(m_evt_handler->m_parent->indref);
7203 _line_progressed(1);
7204 goto mapblck_finish;
7206 else if(first ==
'{')
7208 _c4dbgp(
"mapblck[QMRK]: start child mapblck (!)");
7210 m_evt_handler->begin_map_key_flow();
7212 _set_indentation(m_evt_handler->m_parent->indref);
7213 _line_progressed(1);
7214 goto mapblck_finish;
7216 else if(first ==
'?')
7218 _c4dbgp(
"mapblck[QMRK]: another QMRK '?'");
7219 m_evt_handler->set_key_scalar_plain_empty();
7220 m_evt_handler->set_val_scalar_plain_empty();
7221 m_evt_handler->add_sibling();
7222 _line_progressed(1);
7224 else if(first ==
'.')
7226 _c4dbgp(
"mapblck[QMRK]: maybe end doc?");
7227 csubstr rs = rem.sub(1);
7228 if(rs ==
".." || rs.begins_with(
".. "))
7230 _c4dbgp(
"mapblck[QMRK]: end+start doc");
7231 _end_doc_suddenly();
7232 _line_progressed(3);
7233 goto mapblck_finish;
7247 _c4dbgt(
"mapblck: again", 0);
7248 if(_finished_line())
7252 if(_finished_file())
7254 _c4dbgp(
"mapblck: file finished!");
7256 goto mapblck_finish;
7263 _c4dbgp(
"mapblck: finish");
7269 template<
class EventHandler>
7270 void ParseEngine<EventHandler>::_handle_unk_json()
7272 _c4dbgpf(
"handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7274 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP));
7275 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP));
7277 _maybe_skip_comment();
7278 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7282 size_t pos = rem.first_not_of(
" \t");
7285 pos = pos !=
npos ? pos : rem.len;
7286 _c4dbgpf(
"skipping indentation of {}", pos);
7287 _line_progressed(pos);
7288 rem = m_evt_handler->m_curr->line_contents.rem;
7291 _c4dbgpf(
"rem is now [{}]~~~{}~~~", rem.len, rem);
7294 if(rem.begins_with(
'['))
7296 _c4dbgp(
"it's a seq");
7297 m_evt_handler->check_trailing_doc_token();
7299 m_evt_handler->begin_seq_val_flow();
7301 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7302 m_doc_empty =
false;
7303 _line_progressed(1);
7305 else if(rem.begins_with(
'{'))
7307 _c4dbgp(
"it's a map");
7308 m_evt_handler->check_trailing_doc_token();
7310 m_evt_handler->begin_map_val_flow();
7312 m_doc_empty =
false;
7313 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7314 _line_progressed(1);
7316 else if(_handle_bom())
7318 _c4dbgp(
"byte order mark");
7322 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
7323 _maybe_skip_whitespace_tokens();
7324 csubstr s = m_evt_handler->m_curr->line_contents.rem;
7327 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7328 const char first = s.str[0];
7332 _c4dbgp(
"runk_json: scanning double-quoted scalar");
7333 m_evt_handler->check_trailing_doc_token();
7336 m_doc_empty =
false;
7337 sc = _scan_scalar_dquot();
7338 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7339 if(!_maybe_scan_following_colon())
7341 _c4dbgp(
"runk_json: set as val");
7342 _handle_annotations_before_blck_val_scalar();
7343 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7350 else if(_scan_scalar_plain_unk(&sc))
7352 _c4dbgp(
"runk_json: got a plain scalar");
7353 m_evt_handler->check_trailing_doc_token();
7356 m_doc_empty =
false;
7357 if(!_maybe_scan_following_colon())
7359 _c4dbgp(
"runk_json: set as val");
7360 _handle_annotations_before_blck_val_scalar();
7361 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7362 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7379 template<
class EventHandler>
7380 void ParseEngine<EventHandler>::_handle_unk()
7382 _c4dbgpf(
"handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7384 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP));
7385 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP));
7387 _maybe_skip_comment();
7388 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7392 size_t pos = rem.first_not_of(
" \t");
7395 pos = pos !=
npos ? pos : rem.len;
7396 _c4dbgpf(
"skipping {} whitespace characters", pos);
7397 _line_progressed(pos);
7398 rem = m_evt_handler->m_curr->line_contents.rem;
7401 _c4dbgpf(
"rem is now [{}]~~~{}~~~", rem.len, rem);
7404 if(m_evt_handler->m_curr->line_contents.indentation == 0u && (_at_line_begin() || (m_bom_len && (m_evt_handler->m_curr->pos.line == m_bom_line))))
7406 _c4dbgpf(
"rtop: zero indent + at line begin. offset={}", m_evt_handler->m_curr->pos.offset);
7407 _c4dbgp(
"check BOM");
7410 m_bom_line = m_evt_handler->m_curr->pos.line;
7411 _c4dbgpf(
"byte order mark! line={} offset={}", m_bom_line, m_evt_handler->m_curr->pos.offset);
7414 const char first = rem.str[0];
7417 _c4dbgp(
"rtop: suspecting doc");
7418 if(_is_doc_begin_token(rem))
7420 _c4dbgp(
"rtop: begin doc");
7423 _set_indentation(0);
7425 _line_progressed(3u);
7426 _maybe_skip_whitespace_tokens();
7430 else if(first ==
'.')
7432 _c4dbgp(
"rtop: suspecting doc end");
7433 if(_is_doc_end_token(rem))
7435 _c4dbgp(
"rtop: end doc");
7442 _c4dbgp(
"rtop: ignore end doc");
7445 _line_progressed(3u);
7446 _maybe_skip_whitespace_tokens();
7450 else if(first ==
'%')
7452 _c4dbgpf(
"directive: {}", rem);
7453 if(C4_UNLIKELY(!m_doc_empty && has_none(
NDOC)))
7454 _c4err(
"need document footer before directives");
7455 _handle_directive(rem);
7461 char first = rem.str[0];
7463 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7464 size_t remindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7467 _c4dbgpf(
"prev BOMlen={}", m_bom_len);
7468 if(m_evt_handler->m_curr->pos.line == m_bom_line)
7470 _c4dbgpf(
"BOM remindent={} offset={}", remindent, m_evt_handler->m_curr->pos.offset);
7471 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, remindent >= m_bom_len);
7472 remindent -= m_bom_len;
7482 m_evt_handler->check_trailing_doc_token();
7484 m_doc_empty =
false;
7485 if(C4_LIKELY( ! _annotations_require_key_container()))
7487 _c4dbgp(
"it's a seq, flow");
7488 _handle_annotations_before_blck_val_scalar();
7489 m_evt_handler->begin_seq_val_flow();
7491 _set_indentation(remindent);
7495 _c4dbgp(
"start new block map, set flow seq as key (!)");
7496 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7497 m_evt_handler->begin_map_val_block();
7499 _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7500 m_evt_handler->begin_seq_key_flow();
7502 _set_indentation(remindent);
7504 _line_progressed(1);
7506 else if(first ==
'{')
7508 m_evt_handler->check_trailing_doc_token();
7510 m_doc_empty =
false;
7511 if(C4_LIKELY( ! _annotations_require_key_container()))
7513 _c4dbgp(
"it's a map, flow");
7514 _handle_annotations_before_blck_val_scalar();
7515 m_evt_handler->begin_map_val_flow();
7517 _set_indentation(remindent);
7521 _c4dbgp(
"start new block map, set flow map as key (!)");
7522 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7523 m_evt_handler->begin_map_val_block();
7525 _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7526 m_evt_handler->begin_map_key_flow();
7528 _set_indentation(remindent);
7530 _line_progressed(1);
7532 else if(first ==
'-' && _is_blck_token(rem))
7534 _c4dbgp(
"it's a seq, block");
7535 m_evt_handler->check_trailing_doc_token();
7537 _handle_annotations_before_blck_val_scalar();
7538 m_evt_handler->begin_seq_val_block();
7540 m_doc_empty =
false;
7541 _set_indentation(remindent);
7542 _line_progressed(1);
7543 _maybe_skip_whitespace_tokens();
7545 else if(first ==
'?' && _is_blck_token(rem))
7547 _c4dbgp(
"it's a map + this key is complex");
7548 m_evt_handler->check_trailing_doc_token();
7550 _handle_annotations_before_blck_val_scalar();
7551 m_evt_handler->begin_map_val_block();
7553 m_doc_empty =
false;
7554 m_was_inside_qmrk =
true;
7555 _set_indentation(remindent);
7556 _line_progressed(1);
7557 _maybe_skip_whitespace_tokens();
7559 else if(first ==
':' && _is_blck_token(rem))
7563 _c4dbgp(
"it's a map with an empty key");
7564 const size_t startline = m_evt_handler->m_curr->pos.line;
7565 m_evt_handler->check_trailing_doc_token();
7567 _handle_annotations_before_start_mapblck(startline);
7569 m_evt_handler->begin_map_val_block();
7570 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7571 m_evt_handler->set_key_scalar_plain_empty();
7572 m_doc_empty =
false;
7573 _set_indentation(startindent);
7577 _c4dbgp(
"actually prev val is a key!");
7578 size_t prev_indentation = m_evt_handler->m_curr->indref;
7579 m_evt_handler->actually_val_is_first_key_of_new_map_block();
7580 _set_indentation(prev_indentation);
7583 _line_progressed(1);
7584 _maybe_skip_whitespace_tokens();
7586 else if(first ==
'&')
7588 csubstr anchor = _scan_anchor();
7589 _c4dbgpf(
"anchor! [{}]~~~{}~~~", anchor.len, anchor);
7590 m_evt_handler->check_trailing_doc_token();
7592 const size_t line = m_evt_handler->m_curr->pos.line;
7593 _add_annotation(&m_pending_anchors, anchor, remindent, line);
7594 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7595 m_doc_empty =
false;
7597 else if(first ==
'*')
7599 csubstr ref = _scan_ref_map();
7600 _c4dbgpf(
"ref! [{}]~~~{}~~~", ref.len, ref);
7601 m_evt_handler->check_trailing_doc_token();
7603 m_doc_empty =
false;
7604 if(!_maybe_scan_following_colon())
7606 _c4dbgp(
"runk: set val ref");
7607 _handle_annotations_before_blck_val_scalar();
7608 m_evt_handler->set_val_ref(ref);
7612 _c4dbgp(
"runk: start new block map, set ref as key");
7613 const size_t startline = m_evt_handler->m_curr->pos.line;
7614 _handle_annotations_before_start_mapblck(startline);
7615 m_evt_handler->begin_map_val_block();
7616 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7617 m_evt_handler->set_key_ref(ref);
7618 _maybe_skip_whitespace_tokens();
7619 _set_indentation(startindent);
7623 else if(first ==
'!')
7625 csubstr tag = _scan_tag();
7626 _c4dbgpf(
"unk: val tag! [{}]~~~{}~~~", tag.len, tag);
7629 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7630 const size_t line = m_evt_handler->m_curr->pos.line;
7631 _add_annotation(&m_pending_tags, tag, indentation, line);
7635 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
7636 _maybe_skip_whitespace_tokens();
7637 csubstr s = m_evt_handler->m_curr->line_contents.rem;
7640 const size_t startline = m_evt_handler->m_curr->pos.line;
7645 _c4dbgp(
"runk: scanning single-quoted scalar");
7646 m_evt_handler->check_trailing_doc_token();
7649 m_doc_empty =
false;
7650 sc = _scan_scalar_squot();
7651 if(!_maybe_scan_following_colon())
7653 _c4dbgp(
"runk: set as val");
7654 _handle_annotations_before_blck_val_scalar();
7655 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
7656 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
7660 _c4dbgp(
"runk: start new block map, set scalar as key");
7661 _handle_annotations_before_start_mapblck(startline);
7663 m_evt_handler->begin_map_val_block();
7664 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7665 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7666 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7667 _maybe_skip_whitespace_tokens();
7668 _set_indentation(startindent);
7672 else if(first ==
'"')
7674 _c4dbgp(
"runk: scanning double-quoted scalar");
7675 m_evt_handler->check_trailing_doc_token();
7678 m_doc_empty =
false;
7679 sc = _scan_scalar_dquot();
7680 if(!_maybe_scan_following_colon())
7682 _c4dbgp(
"runk: set as val");
7683 _handle_annotations_before_blck_val_scalar();
7684 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7685 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7689 _c4dbgp(
"runk: start new block map, set double-quoted scalar as key");
7690 _handle_annotations_before_start_mapblck(startline);
7691 m_evt_handler->begin_map_val_block();
7693 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7694 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7695 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7696 _maybe_skip_whitespace_tokens();
7697 _set_indentation(startindent);
7701 else if(first ==
'|')
7703 _c4dbgp(
"runk: scanning block-literal scalar");
7704 m_evt_handler->check_trailing_doc_token();
7707 m_doc_empty =
false;
7709 _scan_block(&sb, startindent);
7710 if(C4_LIKELY(!_maybe_scan_following_colon()))
7712 _c4dbgp(
"runk: set as val");
7713 _handle_annotations_before_blck_val_scalar();
7714 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7715 m_evt_handler->set_val_scalar_literal(maybe_filtered);
7719 _c4err(
"block literal keys must be enclosed in '?'");
7722 else if(first ==
'>')
7724 _c4dbgp(
"runk: scanning block-folded scalar");
7725 m_evt_handler->check_trailing_doc_token();
7728 m_doc_empty =
false;
7730 _scan_block(&sb, startindent);
7731 if(C4_LIKELY(!_maybe_scan_following_colon()))
7733 _c4dbgp(
"runk: set as val");
7734 _handle_annotations_before_blck_val_scalar();
7735 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7736 m_evt_handler->set_val_scalar_folded(maybe_filtered);
7740 _c4err(
"block folded keys must be enclosed in '?'");
7743 else if(_scan_scalar_plain_unk(&sc))
7745 _c4dbgp(
"runk: got a plain scalar");
7746 m_evt_handler->check_trailing_doc_token();
7749 m_doc_empty =
false;
7750 if(!_maybe_scan_following_colon())
7752 _c4dbgp(
"runk: set as val");
7753 _handle_annotations_before_blck_val_scalar();
7754 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7755 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7759 _c4dbgp(
"runk: start new block map, set scalar as key");
7760 _handle_annotations_before_start_mapblck(startline);
7762 m_evt_handler->begin_map_val_block();
7763 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7764 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7765 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7766 _maybe_skip_whitespace_tokens();
7767 _set_indentation(startindent);
7777 template<
class EventHandler>
7778 C4_COLD
void ParseEngine<EventHandler>::_handle_usty()
7780 _c4dbgpf(
"handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7782 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK|
RFLOW));
7784 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
7787 _c4dbgp(
"usty[RNXT]: finishing!");
7792 _maybe_skip_comment();
7793 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7797 size_t pos = rem.first_not_of(
" \t");
7800 pos = pos !=
npos ? pos : rem.len;
7801 _c4dbgpf(
"skipping indentation of {}", pos);
7802 _line_progressed(pos);
7803 rem = m_evt_handler->m_curr->line_contents.rem;
7806 _c4dbgpf(
"rem is now [{}]~~~{}~~~", rem.len, rem);
7809 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
7810 size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7811 char first = rem.str[0];
7814 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RMAP));
7815 _c4dbgpf(
"usty[RSEQ]: first='{}'", _c4prc(first));
7818 _c4dbgp(
"usty[RSEQ]: it's a flow seq. merging it");
7820 m_evt_handler->_push();
7822 _set_indentation(startindent);
7823 _line_progressed(1);
7824 _maybe_skip_whitespace_tokens();
7826 else if(first ==
'-' && _is_blck_token(rem))
7828 _c4dbgp(
"usty[RSEQ]: it's a block seq. merging it");
7830 m_evt_handler->_push();
7832 _set_indentation(startindent);
7833 _line_progressed(1);
7834 _maybe_skip_whitespace_tokens();
7838 _c4err(
"can only parse a seq into an existing seq");
7841 else if(has_any(
RMAP))
7843 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ));
7844 _c4dbgpf(
"usty[RMAP]: first='{}'", _c4prc(first));
7847 _c4dbgp(
"usty[RMAP]: it's a flow map. merging it");
7849 _handle_annotations_before_blck_val_scalar();
7850 m_evt_handler->_push();
7852 _set_indentation(startindent);
7853 _line_progressed(1);
7854 _maybe_skip_whitespace_tokens();
7856 else if(first ==
'?' && _is_blck_token(rem))
7858 _c4dbgp(
"usty[RMAP]: it's a block map + this key is complex");
7860 _handle_annotations_before_blck_val_scalar();
7861 m_evt_handler->_push();
7863 m_was_inside_qmrk =
true;
7864 _save_indentation();
7865 _line_progressed(1);
7866 _maybe_skip_whitespace_tokens();
7868 else if(first ==
':' && _is_blck_token(rem))
7870 _c4dbgp(
"usty[RMAP]: it's a map with an empty key");
7872 _handle_annotations_before_blck_val_scalar();
7873 m_evt_handler->_push();
7874 m_evt_handler->set_key_scalar_plain_empty();
7876 _save_indentation();
7877 _line_progressed(1);
7878 _maybe_skip_whitespace_tokens();
7880 else if(rem.begins_with(
'&'))
7882 csubstr anchor = _scan_anchor();
7883 _c4dbgpf(
"usty[RMAP]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
7884 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7885 const size_t line = m_evt_handler->m_curr->pos.line;
7886 _add_annotation(&m_pending_anchors, anchor, indentation, line);
7887 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7889 else if(first ==
'*')
7891 csubstr ref = _scan_ref_map();
7892 _c4dbgpf(
"usty[RMAP]: ref! [{}]~~~{}~~~", ref.len, ref);
7893 if(!_maybe_scan_following_colon())
7895 _c4err(
"cannot read a VAL to a map");
7899 _c4dbgp(
"usty[RMAP]: start new block map, set ref as key");
7900 const size_t startline = m_evt_handler->m_curr->pos.line;
7902 _handle_annotations_before_start_mapblck(startline);
7903 m_evt_handler->_push();
7904 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7905 m_evt_handler->set_key_ref(ref);
7906 _maybe_skip_whitespace_tokens();
7907 _set_indentation(startindent);
7911 else if(first ==
'!')
7913 csubstr tag = _scan_tag();
7914 _c4dbgpf(
"usty[RMAP]: val tag! [{}]~~~{}~~~", tag.len, tag);
7917 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7918 const size_t line = m_evt_handler->m_curr->pos.line;
7919 _add_annotation(&m_pending_tags, tag, indentation, line);
7921 else if(first ==
'[' || (first ==
'-' && _is_blck_token(rem)))
7923 _c4err(
"cannot parse a seq into an existing map");
7927 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
7928 startindent = m_evt_handler->m_curr->line_contents.indentation;
7929 const size_t startline = m_evt_handler->m_curr->pos.line;
7931 _c4dbgpf(
"usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
7934 _c4dbgp(
"usty[RMAP]: scanning single-quoted scalar");
7935 sc = _scan_scalar_squot();
7936 if(!_maybe_scan_following_colon())
7938 _c4err(
"cannot read a VAL to a map");
7942 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
7944 _handle_annotations_before_start_mapblck(startline);
7945 m_evt_handler->_push();
7946 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7947 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7948 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7949 _set_indentation(startindent);
7951 _maybe_skip_whitespace_tokens();
7954 else if(first ==
'"')
7956 _c4dbgp(
"usty[RMAP]: scanning double-quoted scalar");
7957 sc = _scan_scalar_dquot();
7958 if(!_maybe_scan_following_colon())
7960 _c4err(
"cannot read a VAL to a map");
7964 _c4dbgp(
"usty[RMAP]: start new block map, set double-quoted scalar as key");
7966 _handle_annotations_before_start_mapblck(startline);
7967 m_evt_handler->_push();
7968 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7969 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7970 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7971 _set_indentation(startindent);
7973 _maybe_skip_whitespace_tokens();
7976 else if(first ==
'|')
7978 _c4err(
"block literal keys must be enclosed in '?'");
7980 else if(first ==
'>')
7982 _c4err(
"block literal keys must be enclosed in '?'");
7984 else if(_scan_scalar_plain_unk(&sc))
7986 _c4dbgp(
"usty[RMAP]: got a plain scalar");
7987 if(!_maybe_scan_following_colon())
7989 _c4err(
"cannot read a VAL to a map");
7993 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
7995 _handle_annotations_before_start_mapblck(startline);
7996 m_evt_handler->_push();
7997 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7998 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7999 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8000 _set_indentation(startindent);
8002 _maybe_skip_whitespace_tokens();
8013 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ));
8014 _c4dbgpf(
"usty[UNK]: first='{}'", _c4prc(first));
8017 _c4dbgp(
"usty[UNK]: it's a flow seq");
8019 _handle_annotations_before_blck_val_scalar();
8020 m_evt_handler->begin_seq_val_flow();
8022 _set_indentation(startindent);
8023 _line_progressed(1);
8024 _maybe_skip_whitespace_tokens();
8026 else if(first ==
'-' && _is_blck_token(rem))
8028 _c4dbgp(
"usty[UNK]: it's a block seq");
8030 _handle_annotations_before_blck_val_scalar();
8031 m_evt_handler->begin_seq_val_block();
8033 _set_indentation(startindent);
8034 _line_progressed(1);
8035 _maybe_skip_whitespace_tokens();
8037 else if(first ==
'{')
8039 _c4dbgp(
"usty[UNK]: it's a flow map");
8041 _handle_annotations_before_blck_val_scalar();
8042 m_evt_handler->begin_map_val_flow();
8044 _set_indentation(startindent);
8045 _line_progressed(1);
8046 _maybe_skip_whitespace_tokens();
8048 else if(first ==
'?' && _is_blck_token(rem))
8050 _c4dbgp(
"usty[UNK]: it's a map + this key is complex");
8052 _handle_annotations_before_blck_val_scalar();
8053 m_evt_handler->begin_map_val_block();
8055 m_was_inside_qmrk =
true;
8056 _save_indentation();
8057 _line_progressed(1);
8058 _maybe_skip_whitespace_tokens();
8060 else if(first ==
':' && _is_blck_token(rem))
8062 _c4dbgp(
"usty[UNK]: it's a map with an empty key");
8064 _handle_annotations_before_blck_val_scalar();
8065 m_evt_handler->begin_map_val_block();
8066 m_evt_handler->set_key_scalar_plain_empty();
8068 _save_indentation();
8069 _line_progressed(1);
8070 _maybe_skip_whitespace_tokens();
8072 else if(first ==
'&')
8074 csubstr anchor = _scan_anchor();
8075 _c4dbgpf(
"usty[UNK]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
8076 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8077 const size_t line = m_evt_handler->m_curr->pos.line;
8078 _add_annotation(&m_pending_anchors, anchor, indentation, line);
8079 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8081 else if(first ==
'*')
8083 csubstr ref = _scan_ref_map();
8084 _c4dbgpf(
"usty[UNK]: ref! [{}]~~~{}~~~", ref.len, ref);
8085 if(!_maybe_scan_following_colon())
8087 _c4dbgp(
"usty[UNK]: set val ref");
8088 _handle_annotations_before_blck_val_scalar();
8089 m_evt_handler->set_val_ref(ref);
8093 _c4dbgp(
"usty[UNK]: start new block map, set ref as key");
8094 const size_t startline = m_evt_handler->m_curr->pos.line;
8096 _handle_annotations_before_start_mapblck(startline);
8097 m_evt_handler->begin_map_val_block();
8098 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8099 m_evt_handler->set_key_ref(ref);
8100 _maybe_skip_whitespace_tokens();
8101 _set_indentation(startindent);
8105 else if(first ==
'!')
8107 csubstr tag = _scan_tag();
8108 _c4dbgpf(
"usty[UNK]: val tag! [{}]~~~{}~~~", tag.len, tag);
8111 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8112 const size_t line = m_evt_handler->m_curr->pos.line;
8113 _add_annotation(&m_pending_tags, tag, indentation, line);
8117 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
8118 startindent = m_evt_handler->m_curr->line_contents.indentation;
8119 const size_t startline = m_evt_handler->m_curr->pos.line;
8122 _c4dbgpf(
"usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
8125 _c4dbgp(
"usty[UNK]: scanning single-quoted scalar");
8126 sc = _scan_scalar_squot();
8127 if(!_maybe_scan_following_colon())
8129 _c4dbgp(
"usty[UNK]: set as val");
8130 _handle_annotations_before_blck_val_scalar();
8131 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8132 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8137 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
8139 _handle_annotations_before_start_mapblck(startline);
8140 m_evt_handler->begin_map_val_block();
8141 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8142 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8143 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8144 _set_indentation(startindent);
8146 _maybe_skip_whitespace_tokens();
8149 else if(first ==
'"')
8151 _c4dbgp(
"usty[UNK]: scanning double-quoted scalar");
8152 sc = _scan_scalar_dquot();
8153 if(!_maybe_scan_following_colon())
8155 _c4dbgp(
"usty[UNK]: set as val");
8156 _handle_annotations_before_blck_val_scalar();
8157 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8158 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8163 _c4dbgp(
"usty[UNK]: start new block map, set double-quoted scalar as key");
8165 _handle_annotations_before_start_mapblck(startline);
8166 m_evt_handler->begin_map_val_block();
8167 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8168 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8169 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8170 _set_indentation(startindent);
8172 _maybe_skip_whitespace_tokens();
8175 else if(first ==
'|')
8177 _c4dbgp(
"usty[UNK]: scanning block-literal scalar");
8179 _scan_block(&sb, startindent);
8180 _c4dbgp(
"usty[UNK]: set as val");
8181 _handle_annotations_before_blck_val_scalar();
8182 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8183 m_evt_handler->set_val_scalar_literal(maybe_filtered);
8186 else if(first ==
'>')
8188 _c4dbgp(
"usty[UNK]: scanning block-folded scalar");
8190 _scan_block(&sb, startindent);
8191 _c4dbgp(
"usty[UNK]: set as val");
8192 _handle_annotations_before_blck_val_scalar();
8193 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8194 m_evt_handler->set_val_scalar_folded(maybe_filtered);
8197 else if(_scan_scalar_plain_unk(&sc))
8199 _c4dbgp(
"usty[UNK]: got a plain scalar");
8200 if(!_maybe_scan_following_colon())
8202 _c4dbgp(
"usty[UNK]: set as val");
8203 _handle_annotations_before_blck_val_scalar();
8204 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8205 m_evt_handler->set_val_scalar_plain(maybe_filtered);
8210 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
8212 _handle_annotations_before_start_mapblck(startline);
8213 m_evt_handler->begin_map_val_block();
8214 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8215 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8216 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8217 _set_indentation(startindent);
8219 _maybe_skip_whitespace_tokens();
8233 template<
class EventHandler>
8236 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8240 m_evt_handler->start_parse(filename.str, src, &_s_relocate_arena,
this);
8241 m_evt_handler->begin_stream();
8242 while( ! _finished_file())
8245 while( ! _finished_line())
8248 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8253 else if(has_any(
RMAP))
8257 else if(has_any(
RUNK))
8263 _c4err(
"internal error");
8266 if(_finished_file())
8271 m_evt_handler->finish_parse();
8277 template<
class EventHandler>
8280 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8284 m_evt_handler->start_parse(filename.str, src, &_s_relocate_arena,
this);
8285 m_evt_handler->begin_stream();
8286 while( ! _finished_file())
8289 while( ! _finished_line())
8292 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8303 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
8312 else if(has_any(
RBLCK))
8316 _handle_seq_block();
8320 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
8321 _handle_map_block();
8324 else if(has_any(
RUNK))
8328 else if(has_any(
USTY))
8334 _c4err(
"internal error");
8337 if(_finished_file())
8342 m_evt_handler->finish_parse();
8351 #undef _c4dbgnextline
8353 #if defined(_MSC_VER)
8354 # pragma warning(pop)
8355 #elif defined(__clang__)
8356 # pragma clang diagnostic pop
8357 #elif defined(__GNUC__)
8358 # pragma GCC diagnostic pop
Lightweight generic type-safe wrappers for converting individual values to/from strings.
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&) noexcept
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
void err_parse(ErrorDataParse const &errdata, const char *msg)
trigger a parse error to its respective handler, with a non-formatted error message.
enum c4::yml::BlockChomp_ BlockChomp_e
@ CHOMP_CLIP
single newline at end (default)
@ CHOMP_KEEP
all newlines from end (+)
@ CHOMP_STRIP
no newline at end (-)
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
@ npos
a null string position
@ RTOP
reading at top level
@ RSET
the (implicit) map being read is a !!set.
@ RNXT
read next val or keyval
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a scalar as key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ QSCL
stored scalar was quoted
@ RBLCK
reading in block mode
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a scalar as val
@ RFLOW
reading is inside explicit flow chars: [] or {}
int ParserFlag_t
data type for ParserState_e
size_t to_chars(substr buf, escaped_scalar e)
formatting implementation to escape a scalar with x
@ UTF16BE
UTF16, Big-Endian.
@ UTF16LE
UTF16, Little-Endian.
@ NOBOM
No Byte Order Mark was found.
@ UTF32BE
UTF32, Big-Endian.
@ UTF32LE
UTF32, Little-Endian.
enum c4::yml::Encoding_ Encoding_e
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
#define _RYML_WITHOUT_TAB_TOKENS(...)
#define _ryml_relocate(s)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _RYML_WITH_TAB_TOKENS(...)
Options to give to the parser to control its behavior.
utilities for UTF and Byte Order Mark