1 #ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2 #define _C4_YML_PARSE_ENGINE_DEF_HPP_
5 #include "c4/error.hpp"
11 #include "c4/yml/detail/parser_dbg.hpp"
14 #include <c4/dump.hpp>
15 #include "c4/yml/detail/print.hpp"
19 #if defined(RYML_WITH_TAB_TOKENS)
20 #define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
21 #define _RYML_WITHOUT_TAB_TOKENS(...)
22 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
24 #define _RYML_WITH_TAB_TOKENS(...)
25 #define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
26 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
31 #define _c4dbgnextline() \
33 _c4dbgq("\n-----------"); \
34 _c4dbgt("handling line={}, offset={}B", \
35 m_evt_handler->m_curr->pos.line, \
36 m_evt_handler->m_curr->pos.offset); \
41 # pragma warning(push)
42 # pragma warning(disable: 4296)
43 # pragma warning(disable: 4702)
44 #elif defined(__clang__)
45 # pragma clang diagnostic push
46 # pragma clang diagnostic ignored "-Wtype-limits"
47 # pragma clang diagnostic ignored "-Wformat-nonliteral"
48 # pragma clang diagnostic ignored "-Wold-style-cast"
49 #elif defined(__GNUC__)
50 # pragma GCC diagnostic push
51 # pragma GCC diagnostic ignored "-Wtype-limits"
52 # pragma GCC diagnostic ignored "-Wformat-nonliteral"
53 # pragma GCC diagnostic ignored "-Wold-style-cast"
55 # pragma GCC diagnostic ignored "-Wduplicated-branches"
66 C4_HOT C4_ALWAYS_INLINE
bool _is_blck_token(csubstr s) noexcept
68 RYML_ASSERT(s.len > 0);
69 RYML_ASSERT(s.str[0] ==
'-' || s.str[0] ==
':' || s.str[0] ==
'?');
73 inline bool _is_doc_begin_token(csubstr s)
75 RYML_ASSERT(s.begins_with(
'-'));
76 RYML_ASSERT(!s.ends_with(
"\n"));
77 RYML_ASSERT(!s.ends_with(
"\r"));
78 return (s.len >= 3 && s.str[1] ==
'-' && s.str[2] ==
'-')
82 inline bool _is_doc_end_token(csubstr s)
84 RYML_ASSERT(s.begins_with(
'.'));
85 RYML_ASSERT(!s.ends_with(
"\n"));
86 RYML_ASSERT(!s.ends_with(
"\r"));
87 return (s.len >= 3 && s.str[1] ==
'.' && s.str[2] ==
'.')
91 inline bool _is_doc_token(csubstr s) noexcept
119 return (s.str[1] ==
'-' && s.str[2] ==
'-')
123 return (s.str[1] ==
'.' && s.str[2] ==
'.')
130 inline size_t _is_special_json_scalar(csubstr s)
136 if(s.len >= 5 && s.begins_with(
"false"))
140 if(s.len >= 4 && s.begins_with(
"true"))
144 if(s.len >= 4 && s.begins_with(
"null"))
154 C4_ALWAYS_INLINE
size_t _extend_from_combined_newline(
char nl,
char following)
156 return (nl ==
'\n' && following ==
'\r') || (nl ==
'\r' && following ==
'\n');
160 inline substr from_next_line(substr rem)
162 size_t nlpos = rem.first_of(
"\r\n");
165 const char nl = rem[nlpos];
166 rem = rem.right_of(nlpos);
169 if(_extend_from_combined_newline(nl, rem.front()))
177 inline size_t _count_following_newlines(csubstr r,
size_t *C4_RESTRICT i)
179 RYML_ASSERT(r[*i] ==
'\n');
180 size_t numnl_following = 0;
182 for( ; *i < r.len; ++(*i))
184 if(r.str[*i] ==
'\n')
187 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
192 return numnl_following;
197 inline size_t _count_following_newlines(csubstr r,
size_t *C4_RESTRICT i,
size_t indentation)
199 RYML_ASSERT(r[*i] ==
'\n');
200 size_t numnl_following = 0;
204 for( ; *i < r.len; ++(*i))
206 if(r.str[*i] ==
'\n')
209 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
217 for( ; *i < r.len; ++(*i))
219 if(r.str[*i] ==
'\n')
223 size_t stop = *i + indentation;
224 for( ; *i < r.len; ++(*i))
226 if(r.str[*i] !=
' ' && r.str[*i] !=
'\r')
228 RYML_ASSERT(*i < stop);
233 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
239 return numnl_following;
249 template<
class EventHandler>
256 template<
class EventHandler>
261 , m_evt_handler(evt_handler)
262 , m_pending_anchors()
264 , m_was_inside_qmrk(false)
268 , m_newline_offsets()
269 , m_newline_offsets_size(0)
270 , m_newline_offsets_capacity(0)
271 , m_newline_offsets_buf()
273 RYML_CHECK(evt_handler);
276 template<
class EventHandler>
278 : m_options(that.m_options)
279 , m_file(that.m_file)
281 , m_evt_handler(that.m_evt_handler)
282 , m_pending_anchors(that.m_pending_anchors)
283 , m_pending_tags(that.m_pending_tags)
284 , m_was_inside_qmrk(
false)
288 , m_newline_offsets(that.m_newline_offsets)
289 , m_newline_offsets_size(that.m_newline_offsets_size)
290 , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
291 , m_newline_offsets_buf(that.m_newline_offsets_buf)
296 template<
class EventHandler>
298 : m_options(that.m_options)
299 , m_file(that.m_file)
301 , m_evt_handler(that.m_evt_handler)
302 , m_pending_anchors(that.m_pending_anchors)
303 , m_pending_tags(that.m_pending_tags)
304 , m_was_inside_qmrk(false)
308 , m_newline_offsets()
309 , m_newline_offsets_size()
310 , m_newline_offsets_capacity()
311 , m_newline_offsets_buf()
313 if(that.m_newline_offsets_capacity)
315 _resize_locations(that.m_newline_offsets_capacity);
316 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
317 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size *
sizeof(
size_t));
318 m_newline_offsets_size = that.m_newline_offsets_size;
322 template<
class EventHandler>
326 m_options = (that.m_options);
327 m_file = (that.m_file);
328 m_buf = (that.m_buf);
329 m_evt_handler = that.m_evt_handler;
330 m_pending_anchors = that.m_pending_anchors;
331 m_pending_tags = that.m_pending_tags;
332 m_was_inside_qmrk = that.m_was_inside_qmrk;
333 m_doc_empty = that.m_doc_empty;
334 m_prev_colon = that.m_prev_colon;
335 m_encoding = that.m_encoding;
336 m_newline_offsets = (that.m_newline_offsets);
337 m_newline_offsets_size = (that.m_newline_offsets_size);
338 m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
339 m_newline_offsets_buf = (that.m_newline_offsets_buf);
344 template<
class EventHandler>
350 m_options = (that.m_options);
351 m_file = (that.m_file);
352 m_buf = (that.m_buf);
353 m_evt_handler = that.m_evt_handler;
354 m_pending_anchors = that.m_pending_anchors;
355 m_pending_tags = that.m_pending_tags;
356 m_was_inside_qmrk = that.m_was_inside_qmrk;
357 m_doc_empty = that.m_doc_empty;
358 m_prev_colon = that.m_prev_colon;
359 m_encoding = that.m_encoding;
360 if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
361 _resize_locations(that.m_newline_offsets_capacity);
362 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
363 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
364 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size *
sizeof(
size_t));
365 m_newline_offsets_size = that.m_newline_offsets_size;
366 m_newline_offsets_buf = that.m_newline_offsets_buf;
371 template<
class EventHandler>
378 m_pending_anchors = {};
380 m_was_inside_qmrk =
false;
384 m_newline_offsets = {};
385 m_newline_offsets_size = {};
386 m_newline_offsets_capacity = {};
387 m_newline_offsets_buf = {};
390 template<
class EventHandler>
391 void ParseEngine<EventHandler>::_free()
393 if(m_newline_offsets)
395 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
396 m_newline_offsets =
nullptr;
397 m_newline_offsets_size = 0u;
398 m_newline_offsets_capacity = 0u;
399 m_newline_offsets_buf =
nullptr;
406 template<
class EventHandler>
407 void ParseEngine<EventHandler>::_reset()
409 m_pending_anchors = {};
412 m_was_inside_qmrk =
false;
415 if(m_options.locations())
417 _prepare_locations();
424 template<
class EventHandler>
425 void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena)
427 #define _ryml_relocate(s) \
428 if((s).is_sub(prev_arena)) \
430 (s).str = next_arena.str + ((s).str - prev_arena.str); \
434 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
436 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
438 #undef _ryml_relocate
441 template<
class EventHandler>
442 void ParseEngine<EventHandler>::_s_relocate_arena(
void* data, csubstr prev_arena, substr next_arena)
444 ((ParseEngine*)data)->_relocate_arena(prev_arena, next_arena);
450 template<
class EventHandler>
451 template<
class DumpFn>
452 void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn)
const
454 auto const *
const C4_RESTRICT st = m_evt_handler->m_curr;
455 auto const& lc = st->line_contents;
456 csubstr contents = lc.stripped;
460 size_t offs = 3u +
to_chars(substr{}, st->pos.line) +
to_chars(substr{}, st->pos.col);
463 detail::_dump(std::forward<DumpFn>(dumpfn),
"{}:", m_file);
464 offs += m_file.len + 1;
466 detail::_dump(std::forward<DumpFn>(dumpfn),
"{}:{}: ", st->pos.line, st->pos.col);
467 csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
468 csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr(
"..."));
469 detail::_dump(std::forward<DumpFn>(dumpfn),
"{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len);
471 size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin());
472 size_t lastcol = firstcol + lc.rem.len;
473 for(
size_t i = 0; i < offs + firstcol; ++i)
474 std::forward<DumpFn>(dumpfn)(
" ");
475 std::forward<DumpFn>(dumpfn)(
"^");
476 for(
size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i)
477 std::forward<DumpFn>(dumpfn)(
"~");
478 detail::_dump(std::forward<DumpFn>(dumpfn),
"{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
482 std::forward<DumpFn>(dumpfn)(
"\n");
489 detail::_dump(std::forward<DumpFn>(dumpfn),
"top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
497 template<
class EventHandler>
498 template<
class ...Args>
499 void ParseEngine<EventHandler>::_err(csubstr fmt, Args
const& C4_RESTRICT ...args)
const
502 detail::_SubstrWriter writer(errmsg);
503 auto dumpfn = [&writer](csubstr s){ writer.append(s); };
504 detail::_dump(dumpfn, fmt, args...);
508 m_evt_handler->cancel_parse();
509 m_evt_handler->m_stack.m_callbacks.m_error(errmsg, len, m_evt_handler->m_curr->pos, m_evt_handler->m_stack.m_callbacks.m_user_data);
515 template<
class EventHandler>
516 template<
class ...Args>
517 void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args
const& C4_RESTRICT ...args)
const
521 auto dumpfn = [](csubstr s){
if(s.str) fwrite(s.str, 1, s.len, stdout); };
522 detail::_dump(dumpfn, fmt, args...);
531 template<
class EventHandler>
532 bool ParseEngine<EventHandler>::_finished_file()
const
534 bool ret = m_evt_handler->m_curr->pos.offset >= m_buf.len;
537 _c4dbgp(
"finished file!!!");
542 template<
class EventHandler>
543 C4_HOT C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_finished_line()
const
545 return m_evt_handler->m_curr->line_contents.rem.empty();
551 template<
class EventHandler>
552 void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
554 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
560 _c4dbgpf(
"skip {} whitespace characters", pos);
561 _line_progressed(pos);
565 template<
class EventHandler>
566 void ParseEngine<EventHandler>::_maybe_skipchars(
char c)
568 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
569 if(rem.len && rem.str[0] == c)
571 size_t pos = rem.first_not_of(c);
574 _c4dbgpf(
"skip {}x'{}'", pos, c);
575 _line_progressed(pos);
579 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
580 template<
class EventHandler>
581 void ParseEngine<EventHandler>::_maybe_skipchars_up_to(
char c,
size_t max_to_skip)
583 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
584 if(rem.len && rem.str[0] == c)
586 size_t pos = rem.first_not_of(c);
589 if(pos > max_to_skip)
591 _c4dbgpf(
"skip {}x'{}'", pos, c);
592 _line_progressed(pos);
597 template<
class EventHandler>
599 void ParseEngine<EventHandler>::_skipchars(
const char (&chars)[N])
601 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars));
602 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
604 pos = m_evt_handler->m_curr->line_contents.rem.len;
605 _c4dbgpf(
"skip {} characters", pos);
606 _line_progressed(pos);
609 template<
class EventHandler>
610 void ParseEngine<EventHandler>::_skip_comment()
612 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'));
613 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full));
614 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
615 csubstr full = m_evt_handler->m_curr->line_contents.full;
617 if(!full.begins_with(
'#'))
619 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str > full.str);
620 const char c = full[(size_t)(rem.str - full.str - 1)];
621 if(C4_UNLIKELY(c !=
' ' && c !=
'\t'))
622 _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks,
"comment not preceded by whitespace");
626 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str == full.str);
628 _c4dbgpf(
"comment was '{}'", rem);
629 _line_progressed(rem.len);
632 template<
class EventHandler>
633 void ParseEngine<EventHandler>::_maybe_skip_comment()
635 csubstr s = m_evt_handler->m_curr->line_contents.rem.triml(
' ');
636 if(s.begins_with(
'#'))
638 _line_progressed((
size_t)(s.str - m_evt_handler->m_curr->line_contents.rem.str));
643 template<
class EventHandler>
644 bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
646 if(m_evt_handler->m_curr->line_contents.rem.len)
648 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
650 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
652 pos = m_evt_handler->m_curr->line_contents.rem.len;
653 _c4dbgpf(
"skip {}x'{}'", pos,
' ');
654 _line_progressed(pos);
656 if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] ==
':'))
658 _c4dbgp(
"found ':' colon next");
666 template<
class EventHandler>
667 bool ParseEngine<EventHandler>::_maybe_scan_following_comma() noexcept
669 if(m_evt_handler->m_curr->line_contents.rem.len)
671 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
673 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
675 pos = m_evt_handler->m_curr->line_contents.rem.len;
676 _c4dbgpf(
"skip {}x'{}'", pos,
' ');
677 _line_progressed(pos);
679 if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] ==
','))
681 _c4dbgp(
"found ',' comma next");
692 template<
class EventHandler>
693 csubstr ParseEngine<EventHandler>::_scan_anchor()
695 csubstr s = m_evt_handler->m_curr->line_contents.rem;
696 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'&'));
697 csubstr anchor = s.range(1, s.first_of(
' '));
698 _line_progressed(1u + anchor.len);
699 _maybe_skipchars(
' ');
703 template<
class EventHandler>
704 csubstr ParseEngine<EventHandler>::_scan_ref_seq()
706 csubstr s = m_evt_handler->m_curr->line_contents.rem;
707 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'));
708 csubstr ref = s.first(s.first_of(
",] :"));
709 _line_progressed(ref.len);
713 template<
class EventHandler>
714 csubstr ParseEngine<EventHandler>::_scan_ref_map()
716 csubstr s = m_evt_handler->m_curr->line_contents.rem;
717 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'));
718 csubstr ref = s.first(s.first_of(
",} "));
719 _line_progressed(ref.len);
723 template<
class EventHandler>
724 csubstr ParseEngine<EventHandler>::_scan_tag()
726 csubstr rem = m_evt_handler->m_curr->line_contents.rem.triml(
' ');
727 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with(
'!'));
729 if(rem.begins_with(
"!!"))
731 _c4dbgp(
"begins with '!!'");
733 t = rem.left_of(rem.first_of(
" ,"));
735 t = rem.left_of(rem.first_of(
' '));
737 else if(rem.begins_with(
"!<"))
739 _c4dbgp(
"begins with '!<'");
740 t = rem.left_of(rem.first_of(
'>'),
true);
742 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
743 else if(rem.begins_with(
"!h!"))
745 _c4dbgp(
"begins with '!h!'");
746 t = rem.left_of(rem.first_of(
' '));
751 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with(
'!'));
752 _c4dbgp(
"begins with '!'");
754 t = rem.left_of(rem.first_of(
" ,"));
756 t = rem.left_of(rem.first_of(
' '));
758 _line_progressed(t.len);
759 _maybe_skip_whitespace_tokens();
766 template<
class EventHandler>
767 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
769 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.empty());
785 _c4dbgpf(
"not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
799 _c4err(
"invalid token \":{}\"", _c4prc(s.str[1]));
806 _c4dbgpf(
"not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
828 _c4dbgpf(
"not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
834 _c4err(
"invalid token \"?{}\"", _c4prc(s.str[1]));
853 template<
class EventHandler>
854 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
856 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP));
857 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK));
858 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ|
RSEQIMAP));
859 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
FLOW));
860 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL));
862 substr s = m_evt_handler->m_curr->line_contents.rem;
863 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
864 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
'\n'));
869 if(!_is_valid_start_scalar_plain_flow(s))
872 _c4dbgp(
"scanning seqflow scalar...");
874 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
875 bool needs_filter =
false;
878 _c4dbgpf(
"scanning scalar: curr line=[{}]~~~{}~~~", s.len, s);
879 for(
size_t i = 0; i < s.len; ++i)
881 const char c = s.str[i];
885 _c4dbgpf(
"found terminating character at {}: '{}'", i, c);
887 if(m_evt_handler->m_curr->pos.offset + i > start_offset)
893 _c4dbgp(
"at the beginning. no scalar here.");
898 _c4dbgpf(
"found terminating character at {}: '{}'", i, c);
903 _c4dbgp(
"found suspicious '#'");
906 _c4dbgpf(
"found terminating character at {}: '{}'", i, c);
912 _c4dbgp(
"found suspicious ':'");
915 const char next = s.str[i+1];
916 _c4dbgpf(
"next char is '{}'", _c4prc(next));
919 _c4dbgp(
"map starting!");
920 if(m_evt_handler->m_curr->pos.offset + i > start_offset)
922 _c4dbgp(
"scalar finished!");
928 _c4dbgp(
"at the beginning. no scalar here.");
934 _c4dbgp(
"it's a scalar indeed.");
938 else if(s.len == i+1)
940 _c4dbgp(
"':' at line end. map starting!");
948 _c4err(
"invalid character: '{}'", c);
953 _line_progressed(s.len);
954 if(!_finished_file())
956 _c4dbgp(
"next line!");
962 _c4dbgp(
"file finished!");
965 s = m_evt_handler->m_curr->line_contents.rem;
972 sc->needs_filter = needs_filter;
974 _c4prscalar(
"scanned plain scalar", sc->scalar,
true);
979 template<
class EventHandler>
980 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
982 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ) || has_any(
RSEQIMAP));
983 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK));
984 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP|
RSEQIMAP));
985 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
FLOW));
986 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK));
988 substr s = m_evt_handler->m_curr->line_contents.rem;
989 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
994 if(!_is_valid_start_scalar_plain_flow(s))
997 _c4dbgp(
"scanning scalar...");
999 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1000 bool needs_filter =
false;
1003 for(
size_t i = 0; i < s.len; ++i)
1005 const char c = s.str[i];
1010 _line_progressed(i);
1011 _c4dbgpf(
"found terminating character: '{}'", c);
1014 if(s.len == i+1 || s.str[i+1] ==
' ' || s.str[i+1] ==
',' || s.str[i+1] ==
'}' _RYML_WITH_TAB_TOKENS(|| s.str[i+1] ==
'\t'))
1016 _line_progressed(i);
1017 _c4dbgpf(
"found terminating character: '{}'", c);
1023 _line_progressed(i);
1024 _c4err(
"invalid character: '{}'", c);
1027 _line_progressed(i);
1031 _c4err(
"invalid character: '{}'", c);
1036 _line_progressed(i);
1037 _c4dbgpf(
"found terminating character: '{}'", c);
1045 _c4dbgp(
"next line!");
1046 _line_progressed(s.len);
1047 if(!_finished_file())
1049 _c4dbgp(
"next line!");
1055 _c4dbgp(
"file finished!");
1058 s = m_evt_handler->m_curr->line_contents.rem;
1059 needs_filter =
true;
1065 sc->needs_filter = needs_filter;
1067 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1069 return sc->scalar.len > 0u;
1072 template<
class EventHandler>
1073 bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1075 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP));
1076 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK));
1077 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ));
1078 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
FLOW));
1080 substr s = m_evt_handler->m_curr->line_contents.rem;
1081 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
1086 _c4dbgp(
"scanning scalar...");
1093 _c4dbgp(
"not a scalar.");
1098 const size_t len = _is_special_json_scalar(s);
1101 sc->scalar = s.first(len);
1102 sc->needs_filter =
false;
1103 _c4dbgpf(
"special json scalar: '{}'", sc->scalar);
1104 _line_progressed(len);
1111 for( ; i < s.len; ++i)
1113 const char c = s.str[i];
1120 _c4dbgpf(
"found terminating character: '{}'", c);
1123 if(!i || s.str[i-1] ==
' ')
1125 _c4dbgpf(
"found terminating character: '{}'", c);
1136 if(C4_LIKELY(i > 0))
1138 _line_progressed(i);
1139 sc->scalar = s.first(i);
1140 sc->needs_filter =
false;
1141 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1148 template<
class EventHandler>
1149 bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1151 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ));
1152 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK));
1153 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP));
1154 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
FLOW));
1155 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL));
1157 substr s = m_evt_handler->m_curr->line_contents.rem;
1158 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
1163 _c4dbgp(
"scanning scalar...");
1166 const size_t len = _is_special_json_scalar(s);
1169 sc->scalar = s.first(len);
1170 sc->needs_filter =
false;
1171 _c4dbgpf(
"special json scalar: '{}'", sc->scalar);
1172 _line_progressed(len);
1179 for( ; i < s.len; ++i)
1181 const char c = s.str[i];
1188 _c4dbgpf(
"found terminating character: '{}'", c);
1191 if(!i || s.str[i-1] ==
' ')
1193 _c4dbgpf(
"found terminating character: '{}'", c);
1204 if(C4_LIKELY(i > 0))
1206 _line_progressed(i);
1207 sc->scalar = s.first(i);
1208 sc->needs_filter =
false;
1209 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1216 template<
class EventHandler>
1217 bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1219 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] ==
'-');
1220 return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_begin_token(s));
1223 template<
class EventHandler>
1224 bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1226 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] ==
'.');
1227 return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_end_token(s));
1230 template<
class EventHandler>
1231 bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc,
size_t indentation)
1233 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1234 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP));
1235 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
BLCK|
RUNK|
USTY));
1237 substr s = m_evt_handler->m_curr->line_contents.rem;
1238 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
1246 if(_is_blck_token(s))
1250 else if(_is_doc_begin(s))
1252 _c4dbgp(
"token is doc start");
1258 if(_is_blck_token(s))
1271 _c4dbgp(
"token is doc end");
1277 _c4dbgpf(
"plain scalar! indentation={}", indentation);
1279 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1280 const size_t start_line = m_evt_handler->m_curr->pos.line;
1282 bool needs_filter =
false;
1285 _c4dbgpf(
"plain scalar line: [{}]~~~{}~~~", s.len, s);
1286 for(
size_t i = 0; i < s.len; ++i)
1288 const char curr = s.str[i];
1293 _c4dbgpf(
"[{}]: got suspicious ':'", i);
1297 _c4dbgpf(
"followed by '{}'", i+1 == s.len ? csubstr(
"\\n") : _c4prc(s.str[i+1]));
1298 _line_progressed(i);
1300 if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1302 _c4dbgp(
"start line. scalar ends here");
1307 _c4err(
"parse error");
1313 while(j + 1 < s.len && s.str[j+1] ==
':')
1315 _c4dbgp(
"skip colon");
1318 i = j > i ? j-1 : i;
1319 _c4dbgp(
"nothing to see here");
1323 _c4dbgp(
"got suspicious '#'");
1324 if(!i || (s.str[i-1] ==
' ' || s.str[i-1] ==
'\t'))
1326 _c4dbgp(
"comment! scalar ends here");
1327 _line_progressed(i);
1332 _c4dbgp(
"nothing to see here");
1337 _line_progressed(s.len);
1338 csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1339 next_peeked = next_peeked.trimr(
"\n\r");
1340 const size_t next_indentation = next_peeked.first_not_of(
' ');
1341 _c4dbgpf(
"indentation curr={} next={}", indentation, next_indentation);
1342 if(next_indentation < indentation)
1344 _c4dbgp(
"smaller indentation! scalar ended");
1347 else if(next_indentation == 0 && next_peeked.len > 0)
1349 const char first = next_peeked.str[0];
1353 next_peeked = next_peeked.trimr(
"\n\r");
1354 _c4dbgpf(
"doc begin? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ?
"..." :
"");
1355 if(_is_doc_begin_token(next_peeked))
1357 _c4dbgp(
"doc begin! scalar ended");
1362 next_peeked = next_peeked.trimr(
"\n\r");
1363 _c4dbgpf(
"doc end? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ?
"..." :
"");
1364 if(_is_doc_end_token(next_peeked))
1366 _c4dbgp(
"doc end! scalar ended");
1373 _c4dbgp(
"next line!");
1374 if(!_finished_file())
1376 _c4dbgp(
"next line!");
1382 _c4dbgp(
"file finished!");
1385 s = m_evt_handler->m_curr->line_contents.rem;
1386 needs_filter =
true;
1391 sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(
" \n\r\t");
1392 sc->needs_filter = needs_filter;
1394 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1399 template<
class EventHandler>
1400 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc)
1402 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP));
1403 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1404 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP));
1405 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ));
1406 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
BLCK));
1407 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL));
1408 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1411 template<
class EventHandler>
1412 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc)
1414 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ));
1415 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1416 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP));
1417 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
BLCK));
1418 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK));
1419 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1422 template<
class EventHandler>
1423 bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc)
1425 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RUNK|
USTY));
1426 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1432 template<
class EventHandler>
1433 substr ParseEngine<EventHandler>::_peek_next_line(
size_t pos)
const
1437 pos = pos ==
npos ? m_evt_handler->m_curr->pos.offset : pos;
1438 if(pos >= m_buf.len)
1442 rem = from_next_line(m_buf.sub(pos));
1447 nlpos = rem.first_of(
"\r\n");
1449 nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1450 rem = rem.left_of(nlpos,
true);
1452 _c4dbgpf(
"peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr(
"\r\n"));
1456 _c4dbgpf(
"peek next line @ {}: (len=0)''", pos);
1462 template<
class EventHandler>
1463 void ParseEngine<EventHandler>::_scan_line()
1465 if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < m_buf.len))
1466 m_evt_handler->m_curr->line_contents.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
1468 m_evt_handler->m_curr->line_contents.reset(m_buf.last(0), m_buf.last(0));
1471 template<
class EventHandler>
1472 void ParseEngine<EventHandler>::_line_progressed(
size_t ahead)
1474 _c4dbgpf(
"line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->line_contents.full.len, ahead, m_evt_handler->m_curr->pos.col, m_evt_handler->m_curr->pos.col+ahead, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset+ahead);
1475 m_evt_handler->m_curr->pos.offset += ahead;
1476 m_evt_handler->m_curr->pos.col += ahead;
1477 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.stripped.len+1);
1478 m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1481 template<
class EventHandler>
1482 void ParseEngine<EventHandler>::_line_ended()
1484 _c4dbgpf(
"line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1485 m_evt_handler->m_curr->pos.line,
1486 m_evt_handler->m_curr->line_contents.full.len,
1487 m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len,
1488 m_evt_handler->m_curr->pos.col, 1);
1489 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.stripped.len + 1);
1490 m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
1491 ++m_evt_handler->m_curr->pos.line;
1492 m_evt_handler->m_curr->pos.col = 1;
1495 template<
class EventHandler>
1496 void ParseEngine<EventHandler>::_line_ended_undo()
1498 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u);
1499 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u);
1500 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len);
1501 const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
1502 _c4dbgpf(
"line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1503 m_evt_handler->m_curr->pos.offset -= delta;
1504 --m_evt_handler->m_curr->pos.line;
1505 m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.stripped.len + 1u;
1508 m_evt_handler->m_curr->line_contents.rem = m_buf.sub(m_evt_handler->m_curr->pos.offset, 0);
1513 template<
class EventHandler>
1514 void ParseEngine<EventHandler>::_set_indentation(
size_t indentation)
1516 m_evt_handler->m_curr->indref = indentation;
1517 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1520 template<
class EventHandler>
1521 void ParseEngine<EventHandler>::_save_indentation()
1523 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begin() >= m_evt_handler->m_curr->line_contents.full.begin());
1524 m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1525 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1531 template<
class EventHandler>
1532 void ParseEngine<EventHandler>::_end_map_blck()
1534 _c4dbgp(
"mapblck: end");
1537 _c4dbgp(
"mapblck: set missing val");
1538 _handle_annotations_before_blck_val_scalar();
1539 m_evt_handler->set_val_scalar_plain_empty();
1541 else if(has_any(
QMRK))
1543 _c4dbgp(
"mapblck: set missing keyval");
1544 _handle_annotations_before_blck_key_scalar();
1545 m_evt_handler->set_key_scalar_plain_empty();
1546 _handle_annotations_before_blck_val_scalar();
1547 m_evt_handler->set_val_scalar_plain_empty();
1549 m_evt_handler->end_map();
1552 template<
class EventHandler>
1553 void ParseEngine<EventHandler>::_end_seq_blck()
1557 _c4dbgp(
"seqblck: set missing val");
1558 _handle_annotations_before_blck_val_scalar();
1559 m_evt_handler->set_val_scalar_plain_empty();
1561 m_evt_handler->end_seq();
1564 template<
class EventHandler>
1565 void ParseEngine<EventHandler>::_end2_map()
1567 _c4dbgp(
"map: end");
1568 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP));
1575 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1576 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
USTY));
1577 m_evt_handler->_pop();
1581 template<
class EventHandler>
1582 void ParseEngine<EventHandler>::_end2_seq()
1584 _c4dbgp(
"seq: end");
1585 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ));
1592 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1593 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
USTY));
1594 m_evt_handler->_pop();
1598 template<
class EventHandler>
1599 void ParseEngine<EventHandler>::_begin2_doc()
1603 m_evt_handler->begin_doc();
1604 m_evt_handler->m_curr->indref = 0;
1607 template<
class EventHandler>
1608 void ParseEngine<EventHandler>::_begin2_doc_expl()
1612 m_evt_handler->begin_doc_expl();
1613 m_evt_handler->m_curr->indref = 0;
1616 template<
class EventHandler>
1617 void ParseEngine<EventHandler>::_end2_doc()
1619 _c4dbgp(
"doc: end");
1620 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC));
1621 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1623 _c4dbgp(
"doc was empty; add empty val");
1624 _handle_annotations_before_blck_val_scalar();
1625 m_evt_handler->set_val_scalar_plain_empty();
1627 m_evt_handler->end_doc();
1630 template<
class EventHandler>
1631 void ParseEngine<EventHandler>::_end2_doc_expl()
1633 _c4dbgp(
"doc: end");
1634 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1636 _c4dbgp(
"doc: no children; add empty val");
1637 _handle_annotations_before_blck_val_scalar();
1638 m_evt_handler->set_val_scalar_plain_empty();
1640 m_evt_handler->end_doc_expl();
1643 template<
class EventHandler>
1644 void ParseEngine<EventHandler>::_maybe_begin_doc()
1648 _c4dbgp(
"doc must be started");
1652 template<
class EventHandler>
1653 void ParseEngine<EventHandler>::_maybe_end_doc()
1657 _c4dbgp(
"doc must be finished");
1660 else if(m_doc_empty && (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1662 _c4dbgp(
"no doc to finish, but pending annotations");
1663 m_evt_handler->begin_doc();
1664 _handle_annotations_before_blck_val_scalar();
1665 m_evt_handler->set_val_scalar_plain_empty();
1666 m_evt_handler->end_doc();
1670 template<
class EventHandler>
1671 void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1673 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
1674 if(m_evt_handler->m_stack[0].flags &
RDOC)
1676 _c4dbgp(
"root is RDOC");
1677 if(m_evt_handler->m_curr->level != 0)
1678 _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1680 else if((m_evt_handler->m_stack.size() > 1) && (m_evt_handler->m_stack[1].flags &
RDOC))
1682 _c4dbgp(
"root is STREAM");
1683 if(m_evt_handler->m_curr->level != 1)
1684 _handle_indentation_pop(&m_evt_handler->m_stack[1]);
1688 _c4err(
"internal error");
1690 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC));
1693 template<
class EventHandler>
1694 void ParseEngine<EventHandler>::_end_doc_suddenly()
1696 _c4dbgp(
"end doc suddenly");
1697 _end_doc_suddenly__pop();
1702 template<
class EventHandler>
1703 void ParseEngine<EventHandler>::_start_doc_suddenly()
1705 _c4dbgp(
"start doc suddenly");
1706 _end_doc_suddenly__pop();
1711 template<
class EventHandler>
1712 void ParseEngine<EventHandler>::_end_stream()
1714 _c4dbgpf(
"end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1716 _c4err(
"missing terminating ]");
1718 _c4err(
"missing terminating }");
1719 if(m_evt_handler->m_stack.size() > 1)
1720 _handle_indentation_pop(m_evt_handler->m_stack.begin());
1727 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1731 m_evt_handler->begin_doc();
1732 _handle_annotations_before_blck_val_scalar();
1733 m_evt_handler->set_val_scalar_plain_empty();
1734 m_evt_handler->end_doc();
1738 m_evt_handler->end_stream();
1742 template<
class EventHandler>
1743 void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState
const* popto)
1745 _c4dbgpf(
"popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ?
"s" :
""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
1746 while(m_evt_handler->m_curr != popto)
1750 _c4dbgpf(
"popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1753 else if(has_any(
RMAP))
1755 _c4dbgpf(
"popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1763 _c4dbgpf(
"current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1766 template<
class EventHandler>
1767 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
1770 using state_type =
typename EventHandler::state;
1771 state_type
const* popto =
nullptr;
1772 auto &stack = m_evt_handler->m_stack;
1773 _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous());
1774 _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1775 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1780 for(state_type
const& s : stack)
1781 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1784 for(state_type
const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
1786 _c4dbgpf(
"searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
1787 if(s->indref == ind)
1789 _c4dbgpf(
"gotit!!! level={} node={}", s->level, s->node_id);
1794 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1796 _c4err(
"parse error: incorrect indentation?");
1798 _handle_indentation_pop(popto);
1801 template<
class EventHandler>
1802 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
1805 using state_type =
typename EventHandler::state;
1806 auto &stack = m_evt_handler->m_stack;
1807 _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous());
1808 _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1809 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1810 state_type
const* popto =
nullptr;
1815 for(state_type
const& s : stack)
1816 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1819 for(state_type
const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s)
1821 _c4dbgpf(
"searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
1826 else if(s->indref == ind)
1828 _c4dbgpf(
"same indentation!!! level={} node={}", s->level, s->node_id);
1829 if(popto && has_any(
RTOP, s) && has_none(
RMAP|
RSEQ, s))
1836 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1837 const size_t first = rem.first_not_of(
' ');
1838 _RYML_CB_ASSERT(stack.m_callbacks, first == ind || first ==
npos);
1839 rem = rem.right_of(first,
true);
1840 _c4dbgpf(
"indentless? rem='{}' first={}", rem, first);
1841 if(rem.begins_with(
'-') && _is_blck_token(rem))
1843 _c4dbgp(
"parent was indentless seq");
1849 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1851 _c4err(
"parse error: incorrect indentation?");
1853 _handle_indentation_pop(popto);
1858 template<
class EventHandler>
1859 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
1865 size_t b = m_evt_handler->m_curr->pos.offset;
1866 substr s = m_buf.sub(b);
1867 if(s.begins_with(
' '))
1870 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1871 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1872 _line_progressed((
size_t)(s.begin() - m_buf.sub(b).begin()));
1874 b = m_evt_handler->m_curr->pos.offset;
1875 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'\''));
1878 _line_progressed(1);
1881 bool needs_filter =
false;
1883 size_t numlines = 1;
1885 while( ! _finished_file())
1887 const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1888 bool line_is_blank =
true;
1889 _c4dbgpf(
"scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_evt_handler->m_curr->pos.line, line);
1890 for(
size_t i = 0; i < line.len; ++i)
1892 const char curr = line.str[i];
1895 const char next = i+1 < line.len ? line.str[i+1] :
'~';
1903 needs_filter =
true;
1907 else if(curr !=
' ')
1909 line_is_blank =
false;
1914 needs_filter = needs_filter
1917 || (_at_line_begin() && line.begins_with(
' '));
1921 _line_progressed(line.len);
1926 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
1927 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] ==
'\'');
1928 _line_progressed(pos + 1);
1929 pos = m_evt_handler->m_curr->pos.offset - b - 1;
1939 _c4err(
"reached end of file while looking for closing quote");
1943 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
1944 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
1945 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() ==
'\'');
1946 s = s.sub(0, pos-1);
1949 _c4prscalar(
"scanned squoted scalar", s,
true);
1951 return ScannedScalar { s, needs_filter };
1956 template<
class EventHandler>
1957 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
1963 size_t b = m_evt_handler->m_curr->pos.offset;
1964 substr s = m_buf.sub(b);
1965 if(s.begins_with(
' '))
1968 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1969 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1970 _line_progressed((
size_t)(s.begin() - m_buf.sub(b).begin()));
1972 b = m_evt_handler->m_curr->pos.offset;
1973 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'"'));
1976 _line_progressed(1);
1979 bool needs_filter =
false;
1981 size_t numlines = 1;
1983 while( ! _finished_file())
1985 const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1986 #if defined(__GNUC__) && __GNUC__ == 11
1987 C4_DONT_OPTIMIZE(line);
1989 bool line_is_blank =
true;
1990 _c4dbgpf(
"scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, line);
1991 for(
size_t i = 0; i < line.len; ++i)
1993 const char curr = line.str[i];
1995 line_is_blank =
false;
1999 const char next = i+1 < line.len ? line.str[i+1] :
'~';
2000 needs_filter =
true;
2001 if(next ==
'"' || next ==
'\\')
2004 else if(curr ==
'"')
2012 needs_filter = needs_filter
2015 || (_at_line_begin() && line.begins_with(
' '));
2019 _line_progressed(line.len);
2024 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
2025 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] ==
'"');
2026 _line_progressed(pos + 1);
2027 pos = m_evt_handler->m_curr->pos.offset - b - 1;
2037 _c4err(
"reached end of file looking for closing quote");
2041 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
2042 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() ==
'"');
2043 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
2044 s = s.sub(0, pos-1);
2047 _c4prscalar(
"scanned dquoted scalar", s,
true);
2049 return ScannedScalar { s, needs_filter };
2054 template<
class EventHandler>
2055 void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb,
size_t indref)
2057 _c4dbgpf(
"blck: indref={}", indref);
2058 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, indref !=
npos);
2061 csubstr s = m_evt_handler->m_curr->line_contents.rem;
2062 csubstr trimmed = s.triml(
' ');
2063 if(trimmed.str > s.str)
2065 _c4dbgp(
"skipping whitespace");
2066 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, trimmed.str >= s.str);
2067 _line_progressed(
static_cast<size_t>(trimmed.str - s.str));
2070 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'|') || s.begins_with(
'>'));
2072 _c4dbgpf(
"blck: specs=[{}]~~~{}~~~", s.len, s);
2075 BlockChomp_e chomp = CHOMP_CLIP;
2076 size_t indentation =
npos;
2080 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(
"|>"));
2081 csubstr t = s.sub(1);
2082 _c4dbgpf(
"blck: spec is multichar: '{}'", t);
2083 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, t.len >= 1);
2084 size_t pos = t.first_of(
"-+");
2085 _c4dbgpf(
"blck: spec chomp char at {}", pos);
2089 chomp = CHOMP_STRIP;
2090 else if(t[pos] ==
'+')
2098 digits = t.left_of(t.first_not_of(
"0123456789"));
2099 if( ! digits.empty())
2101 if(C4_UNLIKELY(digits.len > 1))
2102 _c4err(
"parse error: invalid indentation");
2103 _c4dbgpf(
"blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
2104 if(C4_UNLIKELY( !
c4::atou(digits, &indentation)))
2105 _c4err(
"parse error: could not read indentation as decimal");
2106 if(C4_UNLIKELY( ! indentation))
2107 _c4err(
"parse error: null indentation");
2108 _c4dbgpf(
"blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2109 indentation += m_evt_handler->m_curr->indref;
2113 _c4dbgpf(
"blck: style={} chomp={} indentation={}", s.begins_with(
'>') ?
"fold" :
"literal", chomp==CHOMP_CLIP ?
"clip" : (chomp==CHOMP_STRIP ?
"strip" :
"keep"), indentation);
2116 _line_progressed(s.len);
2121 substr raw_block(m_buf.data() + m_evt_handler->m_curr->pos.offset,
size_t(0));
2122 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.begin());
2130 size_t num_lines = 0;
2131 size_t first = m_evt_handler->m_curr->pos.line;
2132 size_t provisional_indentation =
npos;
2134 while(( ! _finished_file()))
2137 lc.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
2138 #if defined(__GNUC__) && (__GNUC__ == 12 || __GNUC__ == 13)
2139 C4_DONT_OPTIMIZE(lc.rem);
2141 _c4dbgpf(
"blck: peeking at [{}]~~~{}~~~", lc.stripped.len, lc.stripped);
2143 if(indentation !=
npos)
2145 _c4dbgpf(
"blck: indentation={}", indentation);
2147 if(lc.indentation < indentation && ( ! lc.rem.trim(
" \t").empty()))
2151 _c4dbgpf(
"blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2155 _c4err(
"indentation decreased without any scalar");
2159 else if(indentation == 0)
2161 _c4dbgpf(
"blck: noindent. lc.rem=[{}]~~~{}~~~", lc.rem.len, lc.rem);
2162 if(_is_doc_token(lc.rem))
2164 _c4dbgp(
"blck: stop. indentation=0 and doc ended");
2171 const size_t fns = lc.stripped.first_not_of(
' ');
2172 _c4dbgpf(
"blck: indentation ref not set. firstnonws={}", fns);
2176 if(C4_UNLIKELY(lc.stripped.begins_with(
'\t')))
2177 _c4err(
"parse error");
2179 _c4dbgpf(
"blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2180 if(provisional_indentation ==
npos)
2182 if(lc.indentation < indref)
2184 _c4dbgpf(
"blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2185 if(raw_block.len == 0)
2187 _c4dbgp(
"blck: was empty, undo next line");
2192 else if(lc.indentation == m_evt_handler->m_curr->indref)
2196 _c4dbgpf(
"blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2200 _c4dbgpf(
"blck: set indentation ref from this line: ref={}", lc.indentation);
2201 indentation = lc.indentation;
2205 if(lc.indentation >= provisional_indentation)
2207 _c4dbgpf(
"blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2209 indentation = lc.indentation;
2220 _c4dbgpf(
"blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.stripped.len, lc.indentation, provisional_indentation);
2221 if(provisional_indentation !=
npos)
2223 if(lc.stripped.len >= provisional_indentation)
2225 _c4dbgpf(
"blck: increase provisional_ref {} -> {}", provisional_indentation, lc.stripped.len);
2226 provisional_indentation = lc.stripped.len;
2228 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
2229 else if(lc.indentation >= provisional_indentation && lc.indentation !=
npos)
2231 _c4dbgpf(
"blck: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation);
2232 provisional_indentation = lc.indentation;
2238 provisional_indentation = lc.indentation ? lc.indentation : has_any(
RSEQ|
RVAL);
2239 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2240 if(provisional_indentation ==
npos)
2242 provisional_indentation = lc.stripped.len ? lc.stripped.len : has_any(
RSEQ|
RVAL);
2243 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2245 if(provisional_indentation < indref)
2247 provisional_indentation = indref;
2248 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2254 m_evt_handler->m_curr->line_contents = lc;
2255 _c4dbgpf(
"blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2256 raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2257 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2261 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0));
2262 C4_UNUSED(num_lines);
2265 if(indentation ==
npos)
2267 _c4dbgpf(
"blck: set indentation from provisional: {}", provisional_indentation);
2268 indentation = provisional_indentation;
2274 _c4prscalar(
"scanned block", raw_block,
true);
2276 sb->scalar = raw_block;
2277 sb->indentation = indentation;
2289 #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2291 #define _c4dbgfws(...)
2294 template<
class EventHandler>
2295 template<
class FilterProcessor>
2296 bool ParseEngine<EventHandler>::_filter_ws_handle_to_first_non_space(FilterProcessor &proc)
2298 _c4dbgfws(
"found whitespace '{}'", _c4prc(proc.curr()));
2299 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
' ' || proc.curr() ==
'\t');
2301 const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(
" \t", proc.rpos) : proc.src.first_not_of(
' ', proc.rpos);
2302 if(first_pos !=
npos)
2304 const char first_char = proc.src[first_pos];
2305 _c4dbgfws(
"firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2306 if(first_char ==
'\n' || first_char ==
'\r')
2308 _c4dbgfws(
"whitespace is trailing on line",
"");
2309 proc.skip(first_pos - proc.rpos);
2314 _c4dbgfws(
"legit whitespace. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2318 _c4dbgfws(
"whitespace is trailing on line",
"");
2322 template<
class EventHandler>
2323 template<
class FilterProcessor>
2324 void ParseEngine<EventHandler>::_filter_ws_copy_trailing(FilterProcessor &proc)
2326 if(!_filter_ws_handle_to_first_non_space(proc))
2328 _c4dbgfws(
"... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2329 proc.copy(proc.src.len - proc.rpos);
2333 template<
class EventHandler>
2334 template<
class FilterProcessor>
2335 void ParseEngine<EventHandler>::_filter_ws_skip_trailing(FilterProcessor &proc)
2337 if(!_filter_ws_handle_to_first_non_space(proc))
2339 _c4dbgfws(
"... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2340 proc.skip(proc.src.len - proc.rpos);
2354 #define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2356 #define _c4dbgfps(fmt, ...)
2359 template<
class EventHandler>
2360 template<
class FilterProcessor>
2361 void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation)
2363 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
'\n');
2365 _c4dbgfps(
"found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2366 size_t ii = proc.rpos;
2367 const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2370 proc.set(
'\n', numnl_following);
2371 _c4dbgfps(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2375 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2379 _c4dbgfps(
"single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2383 _c4dbgfps(
"last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2390 template<
class EventHandler>
2391 template<
class FilterProcessor>
2392 auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation) -> decltype(proc.result())
2394 _RYML_CB_ASSERT(this->callbacks(), indentation !=
npos);
2395 _c4dbgfps(
"before=[{}]~~~{}~~~", proc.src.len, proc.src);
2397 while(proc.has_more_chars())
2399 const char curr = proc.curr();
2400 _c4dbgfps(
"'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2405 _c4dbgfps(
"whitespace", curr);
2406 _filter_ws_skip_trailing(proc);
2409 _c4dbgfps(
"newline", curr);
2410 _filter_nl_plain(proc, indentation);
2413 _c4dbgfps(
"carriage return, ignore", curr);
2422 _c4dbgfps(
"after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2424 return proc.result();
2430 template<
class EventHandler>
2433 FilterProcessorSrcDst proc(scalar, dst);
2434 return _filter_plain(proc, indentation);
2437 template<
class EventHandler>
2440 FilterProcessorInplaceEndExtending proc(dst, cap);
2441 return _filter_plain(proc, indentation);
2452 #define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2454 #define _c4dbgfsq(fmt, ...)
2457 template<
class EventHandler>
2458 template<
class FilterProcessor>
2459 void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2461 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
'\n');
2463 _c4dbgfsq(
"found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2464 size_t ii = proc.rpos;
2465 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2468 proc.set(
'\n', numnl_following);
2469 _c4dbgfsq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2473 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2477 _c4dbgfsq(
"single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2482 _c4dbgfsq(
"single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2488 template<
class EventHandler>
2489 template<
class FilterProcessor>
2490 auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2492 _c4dbgfsq(
"before=[{}]~~~{}~~~", proc.src.len, proc.src);
2496 while(proc.has_more_chars())
2498 const char curr = proc.curr();
2499 _c4dbgfsq(
"'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2504 _c4dbgfsq(
"whitespace", curr);
2505 _filter_ws_copy_trailing(proc);
2508 _c4dbgfsq(
"newline", curr);
2509 _filter_nl_squoted(proc);
2512 _c4dbgfsq(
"skip cr", curr);
2516 _c4dbgfsq(
"squote", curr);
2517 if(proc.next() ==
'\'')
2519 _c4dbgfsq(
"two consecutive squotes", curr);
2525 _c4err(
"filter error");
2534 _c4dbgfsq(
": #filteredchars={} after=~~~[{}]{}~~~", proc.src.len-proc.sofar().len, proc.sofar().len, proc.sofar());
2536 return proc.result();
2541 template<
class EventHandler>
2544 FilterProcessorSrcDst proc(scalar, dst);
2545 return _filter_squoted(proc);
2548 template<
class EventHandler>
2551 FilterProcessorInplaceEndExtending proc(dst, cap);
2552 return _filter_squoted(proc);
2563 #define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2565 #define _c4dbgfdq(...)
2568 template<
class EventHandler>
2569 template<
class FilterProcessor>
2570 void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2572 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
'\n');
2574 _c4dbgfdq(
"found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2575 size_t ii = proc.rpos;
2576 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2579 proc.set(
'\n', numnl_following);
2580 _c4dbgfdq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2584 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2588 _c4dbgfdq(
"single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2593 _c4dbgfdq(
"single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2595 if(ii < proc.src.len && proc.src.str[ii] ==
'\\')
2597 _c4dbgfdq(
"backslash at [{}]", ii);
2598 const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] :
'\0';
2599 if(next ==
' ' || next ==
'\t')
2601 _c4dbgfdq(
"extend skip to backslash",
"");
2609 template<
class EventHandler>
2610 template<
class FilterProcessor>
2611 void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2613 char next = proc.next();
2614 _c4dbgfdq(
"backslash, next='{}'", _c4prc(next));
2617 if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] ==
'\n')
2621 _c4dbgfdq(
"[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2627 size_t ii = proc.rpos + 2;
2628 for( ; ii < proc.src.len; ++ii)
2631 if(proc.src.str[ii] ==
' ' || proc.src.str[ii] ==
'\t')
2636 proc.skip(ii - proc.rpos);
2638 else if(next ==
'"' || next ==
'/' || next ==
' ' || next ==
'\t')
2641 proc.translate_esc(next);
2642 _c4dbgfdq(
"here, used '{}'", _c4prc(next));
2644 else if(next ==
'\r')
2648 else if(next ==
'n')
2650 proc.translate_esc(
'\n');
2652 else if(next ==
'r')
2654 proc.translate_esc(
'\r');
2656 else if(next ==
't')
2658 proc.translate_esc(
'\t');
2660 else if(next ==
'\\')
2662 proc.translate_esc(
'\\');
2664 else if(next ==
'x')
2666 if(C4_UNLIKELY(proc.rpos + 1u + 2u >= proc.src.len))
2667 _c4err(
"\\x requires 2 hex digits. scalar pos={}", proc.rpos);
2668 csubstr codepoint = proc.src.sub(proc.rpos + 2u, 2u);
2669 _c4dbgfdq(
"utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2670 uint8_t byteval = {};
2671 if(C4_UNLIKELY(!
read_hex(codepoint, &byteval)))
2672 _c4err(
"failed to read \\x codepoint. scalar pos={}", proc.rpos);
2673 proc.translate_esc_bulk((
const char*)&byteval, 1u, 3u);
2674 _c4dbgfdq(
"utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2676 else if(next ==
'u')
2678 if(C4_UNLIKELY(proc.rpos + 1u + 4u >= proc.src.len))
2679 _c4err(
"\\u requires 4 hex digits. scalar pos={}", proc.rpos);
2681 csubstr codepoint = proc.src.sub(proc.rpos + 2u, 4u);
2682 uint32_t codepoint_val = {};
2683 if(C4_UNLIKELY(!
read_hex(codepoint, &codepoint_val)))
2684 _c4err(
"failed to parse \\u codepoint. scalar pos={}", proc.rpos);
2685 const size_t numbytes =
decode_code_point((uint8_t*)readbuf,
sizeof(readbuf), codepoint_val);
2686 if(C4_UNLIKELY(numbytes == 0))
2687 _c4err(
"failed to decode code point={}", proc.rpos);
2688 _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2689 proc.translate_esc_bulk(readbuf, numbytes, 5u);
2691 else if(next ==
'U')
2693 if(C4_UNLIKELY(proc.rpos + 1u + 8u >= proc.src.len))
2694 _c4err(
"\\U requires 8 hex digits. scalar pos={}", proc.rpos);
2696 csubstr codepoint = proc.src.sub(proc.rpos + 2u, 8u);
2697 uint32_t codepoint_val = {};
2698 if(C4_UNLIKELY(!
read_hex(codepoint, &codepoint_val)))
2699 _c4err(
"failed to parse \\U codepoint. scalar pos={}", proc.rpos);
2700 const size_t numbytes =
decode_code_point((uint8_t*)readbuf,
sizeof(readbuf), codepoint_val);
2701 if(C4_UNLIKELY(numbytes == 0))
2702 _c4err(
"failed to decode code point={}", proc.rpos);
2703 _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2704 proc.translate_esc_bulk(readbuf, numbytes, 9u);
2707 else if(next ==
'0')
2709 proc.translate_esc(
'\0');
2711 else if(next ==
'b')
2713 proc.translate_esc(
'\b');
2715 else if(next ==
'f')
2717 proc.translate_esc(
'\f');
2719 else if(next ==
'a')
2721 proc.translate_esc(
'\a');
2723 else if(next ==
'v')
2725 proc.translate_esc(
'\v');
2727 else if(next ==
'e')
2729 proc.translate_esc(
'\x1b');
2731 else if(next ==
'_')
2734 const char payload[] = {
2735 _RYML_CHCONST(-0x3e, 0xc2),
2736 _RYML_CHCONST(-0x60, 0xa0),
2738 proc.translate_esc_bulk(payload, 2, 1);
2740 else if(next ==
'N')
2743 const char payload[] = {
2744 _RYML_CHCONST(-0x3e, 0xc2),
2745 _RYML_CHCONST(-0x7b, 0x85),
2747 proc.translate_esc_bulk(payload, 2, 1);
2749 else if(next ==
'L')
2752 const char payload[] = {
2753 _RYML_CHCONST(-0x1e, 0xe2),
2754 _RYML_CHCONST(-0x80, 0x80),
2755 _RYML_CHCONST(-0x58, 0xa8),
2757 proc.translate_esc_extending(payload, 3, 1);
2759 else if(next ==
'P')
2762 const char payload[] = {
2763 _RYML_CHCONST(-0x1e, 0xe2),
2764 _RYML_CHCONST(-0x80, 0x80),
2765 _RYML_CHCONST(-0x57, 0xa9),
2767 proc.translate_esc_extending(payload, 3, 1);
2769 else if(next ==
'\0')
2775 _c4err(
"unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2777 _c4dbgfdq(
"backslash...sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2781 template<
class EventHandler>
2782 template<
class FilterProcessor>
2783 auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2785 _c4dbgfdq(
"before=[{}]~~~{}~~~", proc.src.len, proc.src);
2788 while(proc.has_more_chars())
2790 const char curr = proc.curr();
2791 _c4dbgfdq(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2797 _c4dbgfdq(
"whitespace", curr);
2798 _filter_ws_copy_trailing(proc);
2803 _c4dbgfdq(
"newline", curr);
2804 _filter_nl_dquoted(proc);
2809 _c4dbgfdq(
"carriage return, ignore", curr);
2815 _filter_dquoted_backslash(proc);
2825 _c4dbgfdq(
"after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2826 return proc.result();
2832 template<
class EventHandler>
2835 FilterProcessorSrcDst proc(scalar, dst);
2836 return _filter_dquoted(proc);
2839 template<
class EventHandler>
2842 FilterProcessorInplaceMidExtending proc(dst, cap);
2843 return _filter_dquoted(proc);
2852 template<
class EventHandler>
2853 template<
class FilterProcessor>
2854 void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp,
size_t indentation)
2856 _RYML_CB_ASSERT(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP);
2857 _RYML_CB_ASSERT(this->callbacks(), proc.rem().first_not_of(
" \n\r") ==
npos);
2861 #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2863 #define _c4dbgchomp(...)
2871 _c4dbgchomp(
"found newline and larger indentation. last={}", last);
2872 last = proc.rpos + last + size_t(1) + indentation;
2873 _RYML_CB_ASSERT(this->callbacks(), last <= proc.src.len);
2875 while((proc.rpos < last) && proc.has_more_chars())
2877 const char curr = proc.curr();
2878 _c4dbgchomp(
"curr='{}'", _c4prc(curr));
2883 _c4dbgchomp(
"newline! remlen={}", proc.rem().len);
2886 csubstr at_next_line = proc.rem();
2887 if(at_next_line.begins_with(
' '))
2889 _c4dbgchomp(
"next line begins with spaces. indentation={}", indentation);
2891 size_t first_non_space = at_next_line.first_not_of(
' ');
2892 _c4dbgchomp(
"first_non_space={}", first_non_space);
2893 if(first_non_space ==
npos)
2895 _c4dbgchomp(
"{} spaces, to the end", at_next_line.len);
2896 first_non_space = at_next_line.len;
2898 if(first_non_space <= indentation)
2900 _c4dbgchomp(
"skip spaces={}<=indentation={}", first_non_space, indentation);
2901 proc.skip(first_non_space);
2905 _c4dbgchomp(
"skip indentation={}<spaces={}", indentation, first_non_space);
2906 proc.skip(indentation);
2908 _c4dbgchomp(
"copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
2909 proc.copy(first_non_space - indentation);
2918 _c4err(
"parse error");
2930 bool had_one =
false;
2931 while(proc.has_more_chars())
2933 const char curr = proc.curr();
2934 _c4dbgchomp(
"CLIP: '{}'", _c4prc(curr));
2939 _c4dbgchomp(
"copy newline!", curr);
2947 _c4dbgchomp(
"skip!", curr);
2954 _c4dbgchomp(
"chomp=CLIP: add missing newline @{}", proc.wpos);
2961 _c4dbgchomp(
"chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
2962 while(proc.has_more_chars())
2964 const char curr = proc.curr();
2965 _c4dbgchomp(
"KEEP: '{}'", _c4prc(curr));
2969 _c4dbgchomp(
"copy newline!", curr);
2974 _c4dbgchomp(
"skip!", curr);
2983 _c4dbgchomp(
"chomp=STRIP: strip {} characters", proc.rem().len);
2995 #define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2997 #define _c4dbgfb(...)
3000 template<
class EventHandler>
3001 template<
class FilterProcessor>
3002 void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc,
size_t indentation)
3004 csubstr rem = proc.rem();
3007 size_t first = rem.first_not_of(
' ');
3010 _c4dbgfb(
"{} spaces follow before next nonws character", first);
3011 if(first < indentation)
3013 _c4dbgfb(
"skip {}<{} spaces from indentation", first, indentation);
3018 _c4dbgfb(
"skip {} spaces from indentation", indentation);
3019 proc.skip(indentation);
3022 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
3025 _c4dbgfb(
"all spaces to the end: {} spaces", first);
3029 if(first < indentation)
3031 _c4dbgfb(
"skip everything", first);
3032 proc.skip(proc.src.len - proc.rpos);
3036 _c4dbgfb(
"skip {} spaces from indentation", indentation);
3037 proc.skip(indentation);
3045 template<
class EventHandler>
3046 template<
class FilterProcessor>
3047 size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
3049 csubstr contents = proc.src.trimr(
" \n\r");
3050 _c4dbgfb(
"ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
3053 _c4dbgfb(
"ws: all whitespace: len={}", proc.src.len);
3054 if(chomp == CHOMP_KEEP && proc.src.len)
3056 _c4dbgfb(
"ws: chomp=KEEP all {} newlines", proc.src.count(
'\n'));
3057 while(proc.has_more_chars())
3059 const char curr = proc.curr();
3071 return contents.len;
3074 template<
class EventHandler>
3075 template<
class FilterProcessor>
3076 size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc,
size_t contents_len)
3078 _c4dbgfb(
"contents_len={}", contents_len);
3080 _RYML_CB_ASSERT(this->callbacks(), contents_len > 0u);
3084 size_t firstnewl = proc.src.first_of(
'\n', contents_len);
3085 if(firstnewl !=
npos)
3087 contents_len = firstnewl;
3088 _c4dbgfb(
"contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3092 contents_len = proc.src.len;
3093 _c4dbgfb(
"contents_len={} <--- src.len={}", contents_len, proc.src.len);
3096 return contents_len;
3108 #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3110 #define _c4dbgfbl(...)
3113 template<
class EventHandler>
3114 template<
class FilterProcessor>
3115 auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc,
size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3117 _c4dbgfbl(
"indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3119 size_t contents_len = _handle_all_whitespace(proc, chomp);
3121 return proc.result();
3123 contents_len = _extend_to_chomp(proc, contents_len);
3125 _c4dbgfbl(
"to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3127 _filter_block_indentation(proc, indentation);
3130 while(proc.has_more_chars(contents_len))
3132 const char curr = proc.curr();
3133 _c4dbgfbl(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3138 _c4dbgfbl(
"found newline. skip indentation on the next line", curr);
3140 _filter_block_indentation(proc, indentation);
3152 _c4dbgfbl(
"before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3154 _filter_chomp(proc, chomp, indentation);
3156 _c4dbgfbl(
"final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3158 return proc.result();
3163 template<
class EventHandler>
3166 FilterProcessorSrcDst proc(scalar, dst);
3167 return _filter_block_literal(proc, indentation, chomp);
3170 template<
class EventHandler>
3173 FilterProcessorInplaceEndExtending proc(scalar, cap);
3174 return _filter_block_literal(proc, indentation, chomp);
3184 #define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3186 #define _c4dbgfbf(...)
3190 template<
class EventHandler>
3191 template<
class FilterProcessor>
3192 void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len)
3194 _filter_block_indentation(proc, indentation);
3195 while(proc.has_more_chars(len))
3197 const char curr = proc.curr();
3198 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3202 _c4dbgfbf(
"newline.", curr);
3204 _filter_block_indentation(proc, indentation);
3212 size_t first = proc.rem().first_not_of(
" \t");
3213 _c4dbgfbf(
"space. first={}", first);
3215 first = proc.rem().len;
3216 _c4dbgfbf(
"... indentation increased to {}", first);
3217 _filter_block_folded_indented_block(proc, indentation, len, first);
3221 _c4dbgfbf(
"newl leading: not space, not newline. stop.", 0);
3227 template<
class EventHandler>
3228 template<
class FilterProcessor>
3229 size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc,
size_t num_newl,
size_t wpos_at_first_newl)
3234 _c4dbgfbf(
"... this is the first newline. turn into space. wpos={}", proc.wpos);
3235 wpos_at_first_newl = proc.wpos;
3240 _c4dbgfbf(
"... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3241 _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl !=
npos);
3242 _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
' ');
3243 _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos);
3245 proc.set_at(wpos_at_first_newl,
'\n');
3246 _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
'\n');
3249 _c4dbgfbf(
"... subsequent newline (num_newl={}). copy", num_newl);
3253 return wpos_at_first_newl;
3256 template<
class EventHandler>
3257 template<
class FilterProcessor>
3258 void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len)
3260 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
'\n');
3261 size_t num_newl = 0;
3262 size_t wpos_at_first_newl =
npos;
3263 while(proc.has_more_chars(len))
3265 const char curr = proc.curr();
3266 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3271 _c4dbgfbf(
"newline. sofar={}", num_newl);
3307 wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3308 _filter_block_indentation(proc, indentation);
3314 size_t first = proc.rem().first_not_of(
" \t");
3315 _c4dbgfbf(
"space. first={}", first);
3317 first = proc.rem().len;
3318 _c4dbgfbf(
"... indentation increased to {}", first);
3321 _c4dbgfbf(
"... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3322 proc.set_at(wpos_at_first_newl,
'\n');
3326 _c4dbgfbf(
"... add missing newline", wpos_at_first_newl);
3329 _filter_block_folded_indented_block(proc, indentation, len, first);
3331 wpos_at_first_newl =
npos;
3338 _c4dbgfbf(
"not space, not newline. stop.", 0);
3345 template<
class EventHandler>
3346 template<
class FilterProcessor>
3347 void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len,
size_t curr_indentation) noexcept
3349 _RYML_CB_ASSERT(this->callbacks(), (proc.rem().first_not_of(
" \t") == curr_indentation) || (proc.rem().first_not_of(
" \t") ==
npos));
3350 if(curr_indentation)
3351 proc.copy(curr_indentation);
3352 while(proc.has_more_chars(len))
3354 const char curr = proc.curr();
3355 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3361 _filter_block_indentation(proc, indentation);
3362 csubstr rem = proc.rem();
3363 const size_t first = rem.first_not_of(
' ');
3364 _c4dbgfbf(
"newline. firstns={}", first);
3367 const char c = rem[first];
3368 _c4dbgfbf(
"firstns={}='{}'", first, _c4prc(c));
3369 if(c ==
'\n' || c ==
'\r')
3375 _c4dbgfbf(
"done with indented block", first);
3379 else if(first !=
npos)
3382 _c4dbgfbf(
"copy all {} spaces", first);
3400 template<
class EventHandler>
3401 template<
class FilterProcessor>
3402 auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc,
size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3404 _c4dbgfbf(
"indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3406 size_t contents_len = _handle_all_whitespace(proc, chomp);
3408 return proc.result();
3410 contents_len = _extend_to_chomp(proc, contents_len);
3412 _c4dbgfbf(
"to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3414 _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3417 while(proc.has_more_chars(contents_len))
3419 const char curr = proc.curr();
3420 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3425 _c4dbgfbf(
"found newline", curr);
3426 _filter_block_folded_newlines(proc, indentation, contents_len);
3438 _c4dbgfbf(
"before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3440 _filter_chomp(proc, chomp, indentation);
3442 _c4dbgfbf(
"final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3444 return proc.result();
3449 template<
class EventHandler>
3452 FilterProcessorSrcDst proc(scalar, dst);
3453 return _filter_block_folded(proc, indentation, chomp);
3456 template<
class EventHandler>
3459 FilterProcessorInplaceEndExtending proc(scalar, cap);
3460 return _filter_block_folded(proc, indentation, chomp);
3468 template<
class EventHandler>
3469 csubstr ParseEngine<EventHandler>::_filter_scalar_plain(substr s,
size_t indentation)
3471 _c4dbgpf(
"filtering plain scalar: s=[{}]~~~{}~~~", s.len, s);
3472 FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3473 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, r.valid());
3474 _c4dbgpf(
"filtering plain scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3480 template<
class EventHandler>
3481 csubstr ParseEngine<EventHandler>::_filter_scalar_squot(substr s)
3483 _c4dbgpf(
"filtering squo scalar: s=[{}]~~~{}~~~", s.len, s);
3484 FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3485 _RYML_CB_ASSERT(this->callbacks(), r.valid());
3486 _c4dbgpf(
"filtering squo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3493 template<
class EventHandler>
3494 csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
3496 _c4dbgpf(
"filtering dquo scalar: s=[{}]~~~{}~~~", s.len, s);
3497 FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3498 if(C4_LIKELY(r.valid()))
3500 _c4dbgpf(
"filtering dquo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3505 const size_t len = r.required_len();
3506 _c4dbgpf(
"filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3507 substr dst = m_evt_handler->alloc_arena(len, &s);
3508 _c4dbgpf(
"filtering dquo scalar: dst.len={}", dst.len);
3509 _RYML_CB_ASSERT(this->callbacks(), dst.len == len);
3510 FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3511 _c4dbgpf(
"filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3512 _RYML_CB_ASSERT(this->callbacks(), rsd.required_len() <= len);
3513 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3514 _c4dbgpf(
"filtering dquo scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3521 template<
class EventHandler>
3522 csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s,
size_t indentation, BlockChomp_e chomp)
3524 _c4dbgpf(
"filtering block literal scalar: s=[{}]~~~{}~~~", s.len, s);
3525 FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3526 if(C4_LIKELY(r.valid()))
3528 _c4dbgpf(
"filtering block literal scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3533 _c4dbgpf(
"filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3534 substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
3535 FilterResult rsd = this->filter_scalar_block_literal(s, dst, indentation, chomp);
3536 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3537 _c4dbgpf(
"filtering block literal scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3544 template<
class EventHandler>
3545 csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s,
size_t indentation, BlockChomp_e chomp)
3547 _c4dbgpf(
"filtering block folded scalar: s=[{}]~~~{}~~~", s.len, s);
3548 FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3549 if(C4_LIKELY(r.valid()))
3551 _c4dbgpf(
"filtering block folded scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3556 _c4dbgpf(
"filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3557 substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
3558 FilterResult rsd = this->filter_scalar_block_folded(s, dst, indentation, chomp);
3559 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3560 _c4dbgpf(
"filtering block folded scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3568 template<
class EventHandler>
3569 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar
const& C4_RESTRICT sc,
size_t indentation)
3573 if(m_options.scalar_filtering())
3575 return _filter_scalar_plain(sc.scalar, indentation);
3579 _c4dbgp(
"plain scalar left unfiltered");
3580 m_evt_handler->mark_key_scalar_unfiltered();
3585 _c4dbgp(
"plain scalar doesn't need filtering");
3590 template<
class EventHandler>
3591 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar
const& C4_RESTRICT sc,
size_t indentation)
3595 if(m_options.scalar_filtering())
3597 return _filter_scalar_plain(sc.scalar, indentation);
3601 _c4dbgp(
"plain scalar left unfiltered");
3602 m_evt_handler->mark_val_scalar_unfiltered();
3607 _c4dbgp(
"plain scalar doesn't need filtering");
3615 template<
class EventHandler>
3616 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar
const& C4_RESTRICT sc)
3620 if(m_options.scalar_filtering())
3622 return _filter_scalar_squot(sc.scalar);
3626 _c4dbgp(
"squo key scalar left unfiltered");
3627 m_evt_handler->mark_key_scalar_unfiltered();
3632 _c4dbgp(
"squo key scalar doesn't need filtering");
3637 template<
class EventHandler>
3638 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar
const& C4_RESTRICT sc)
3642 if(m_options.scalar_filtering())
3644 return _filter_scalar_squot(sc.scalar);
3648 _c4dbgp(
"squo val scalar left unfiltered");
3649 m_evt_handler->mark_val_scalar_unfiltered();
3654 _c4dbgp(
"squo val scalar doesn't need filtering");
3662 template<
class EventHandler>
3663 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar
const& C4_RESTRICT sc)
3667 if(m_options.scalar_filtering())
3669 return _filter_scalar_dquot(sc.scalar);
3673 _c4dbgp(
"dquo scalar left unfiltered");
3674 m_evt_handler->mark_key_scalar_unfiltered();
3679 _c4dbgp(
"dquo scalar doesn't need filtering");
3684 template<
class EventHandler>
3685 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar
const& C4_RESTRICT sc)
3689 if(m_options.scalar_filtering())
3691 return _filter_scalar_dquot(sc.scalar);
3695 _c4dbgp(
"dquo scalar left unfiltered");
3696 m_evt_handler->mark_val_scalar_unfiltered();
3701 _c4dbgp(
"dquo scalar doesn't need filtering");
3709 template<
class EventHandler>
3710 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_literal(ScannedBlock
const& C4_RESTRICT sb)
3712 if(m_options.scalar_filtering())
3714 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3718 _c4dbgp(
"literal scalar left unfiltered");
3719 m_evt_handler->mark_key_scalar_unfiltered();
3724 template<
class EventHandler>
3725 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_literal(ScannedBlock
const& C4_RESTRICT sb)
3727 if(m_options.scalar_filtering())
3729 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3733 _c4dbgp(
"literal scalar left unfiltered");
3734 m_evt_handler->mark_val_scalar_unfiltered();
3742 template<
class EventHandler>
3743 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_folded(ScannedBlock
const& C4_RESTRICT sb)
3745 if(m_options.scalar_filtering())
3747 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3751 _c4dbgp(
"folded scalar left unfiltered");
3752 m_evt_handler->mark_key_scalar_unfiltered();
3757 template<
class EventHandler>
3758 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_folded(ScannedBlock
const& C4_RESTRICT sb)
3760 if(m_options.scalar_filtering())
3762 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3766 _c4dbgp(
"folded scalar left unfiltered");
3767 m_evt_handler->mark_val_scalar_unfiltered();
3779 template<
class EventHandler>
3780 void ParseEngine<EventHandler>::add_flags(
ParserFlag_t on, ParserState * s)
3782 char buf1_[64], buf2_[64], buf3_[64];
3783 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3784 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3785 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
3786 _c4dbgpf(
"state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
3790 template<
class EventHandler>
3793 char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
3794 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3795 csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
3796 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
3797 csubstr buf4 = detail::_parser_flags_to_str(buf4_, ((s->flags|on)&(~off)));
3798 _c4dbgpf(
"state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
3803 template<
class EventHandler>
3804 void ParseEngine<EventHandler>::rem_flags(
ParserFlag_t off, ParserState * s)
3806 char buf1_[64], buf2_[64], buf3_[64];
3807 csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
3808 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3809 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
3810 _c4dbgpf(
"state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
3814 inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf,
ParserFlag_t flags)
3817 bool gotone =
false;
3819 #define _prflag(fl) \
3820 if((flags & fl) == (fl)) \
3824 if(pos + 1 < buf.len) \
3828 csubstr fltxt = #fl; \
3829 if(pos + fltxt.len <= buf.len) \
3830 memcpy(buf.str + pos, fltxt.str, fltxt.len); \
3860 RYML_CHECK(pos <= buf.len);
3862 return buf.first(pos);
3872 template<
class EventHandler>
3875 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, loc.offset < m_buf.len);
3876 return m_buf.sub(loc.offset);
3879 template<
class EventHandler>
3882 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, node.readable());
3883 return location(*node.tree(), node.id());
3886 template<
class EventHandler>
3891 if(_location_from_node(tree, node, &loc, 0))
3893 return val_location(m_buf.str);
3896 template<
class EventHandler>
3897 bool ParseEngine<EventHandler>::_location_from_node(Tree
const& tree,
id_type node, Location *C4_RESTRICT loc,
id_type level)
const
3899 if(tree.has_key(node))
3901 csubstr k = tree.key(node);
3902 if(C4_LIKELY(k.str !=
nullptr))
3904 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, k.is_sub(m_buf));
3905 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(k));
3906 *loc = val_location(k.str);
3911 if(tree.has_val(node))
3913 csubstr v = tree.val(node);
3914 if(C4_LIKELY(v.str !=
nullptr))
3916 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, v.is_sub(m_buf));
3917 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(v));
3918 *loc = val_location(v.str);
3923 if(tree.is_container(node))
3925 if(_location_from_cont(tree, node, loc))
3929 if(tree.type(node) !=
NOTYPE && level == 0)
3933 const id_type prev = tree.prev_sibling(node);
3936 if(_location_from_node(tree, prev, loc, level+1))
3942 const id_type next = tree.next_sibling(node);
3945 if(_location_from_node(tree, next, loc, level+1))
3951 const id_type parent = tree.parent(node);
3954 if(_location_from_node(tree, parent, loc, level+1))
3963 template<
class EventHandler>
3964 bool ParseEngine<EventHandler>::_location_from_cont(Tree
const& tree,
id_type node, Location *C4_RESTRICT loc)
const
3966 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, tree.is_container(node));
3967 if(!tree.is_stream(node))
3969 const char *node_start = tree._p(node)->m_val.scalar.str;
3970 if(tree.has_children(node))
3972 id_type child = tree.first_child(node);
3973 if(tree.has_key(child))
3976 csubstr k = tree.key(child);
3977 if(k.str && node_start > k.str)
3981 *loc = val_location(node_start);
3986 *loc = val_location(m_buf.str);
3992 template<
class EventHandler>
3995 if(C4_UNLIKELY(val ==
nullptr))
3996 return {m_file, 0, 0, 0};
3997 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4000 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str);
4001 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len);
4002 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4003 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
4004 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets !=
nullptr);
4005 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
4007 csubstr src = m_buf;
4008 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, val !=
nullptr || src.str ==
nullptr);
4009 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str ==
nullptr && val ==
nullptr));
4011 using lineptr_type =
size_t const* C4_RESTRICT;
4012 lineptr_type lineptr =
nullptr;
4013 size_t offset = (size_t)(val - src.begin());
4017 for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
4032 size_t count = m_newline_offsets_size;
4035 lineptr = m_newline_offsets;
4039 it = lineptr + step;
4051 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
4052 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
4053 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
4056 loc.offset = offset;
4057 loc.line = (size_t)(lineptr - m_newline_offsets);
4058 if(lineptr > m_newline_offsets)
4059 loc.col = (offset - *(lineptr-1) - 1u);
4065 template<
class EventHandler>
4066 void ParseEngine<EventHandler>::_prepare_locations()
4068 m_newline_offsets_buf = m_buf;
4069 size_t numnewlines = 1u + m_buf.count(
'\n');
4070 _resize_locations(numnewlines);
4071 m_newline_offsets_size = 0;
4072 for(
size_t i = 0; i < m_buf.len; i++)
4073 if(m_buf[i] ==
'\n')
4074 m_newline_offsets[m_newline_offsets_size++] = i;
4075 m_newline_offsets[m_newline_offsets_size++] = m_buf.len;
4076 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4079 template<
class EventHandler>
4080 void ParseEngine<EventHandler>::_resize_locations(
size_t numnewlines)
4082 if(numnewlines > m_newline_offsets_capacity)
4084 if(m_newline_offsets)
4085 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
4086 m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks,
size_t, numnewlines, m_newline_offsets);
4087 m_newline_offsets_capacity = numnewlines;
4091 template<
class EventHandler>
4092 bool ParseEngine<EventHandler>::_locations_dirty()
const
4094 return !m_newline_offsets_size;
4102 template<
class EventHandler>
4103 void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4106 if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4108 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
4110 _c4dbgpf(
"starts with whitespace: '{}'", _c4prc(m_evt_handler->m_curr->line_contents.rem.str[0]));
4114 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
4116 _c4dbgpf(
"it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4117 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4126 template<
class EventHandler>
4127 void ParseEngine<EventHandler>::_handle_colon()
4129 size_t curr = m_evt_handler->m_curr->pos.line;
4130 if(m_prev_colon !=
npos)
4132 if(curr == m_prev_colon)
4133 _c4err(
"two colons on same line");
4135 m_prev_colon = curr;
4138 template<
class EventHandler>
4139 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str,
size_t indentation,
size_t line)
4141 _c4dbgpf(
"store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, str, indentation, line);
4142 if(C4_UNLIKELY(dst->num_entries >= C4_COUNTOF(dst->annotations)))
4143 _c4err(
"too many annotations");
4144 dst->annotations[dst->num_entries].str = str;
4145 dst->annotations[dst->num_entries].indentation = indentation;
4146 dst->annotations[dst->num_entries].line = line;
4150 template<
class EventHandler>
4151 void ParseEngine<EventHandler>::_clear_annotations(Annotation *C4_RESTRICT dst)
4153 dst->num_entries = 0;
4156 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
4157 template<
class EventHandler>
4158 bool ParseEngine<EventHandler>::_handle_indentation_from_annotations()
4160 if(m_pending_anchors.num_entries == 1u || m_pending_tags.num_entries == 1u)
4162 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries < 2u && m_pending_tags.num_entries < 2u);
4163 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.annotations[0].line < m_evt_handler->m_curr->pos.line);
4164 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.annotations[1].line < m_evt_handler->m_curr->pos.line);
4165 size_t to_skip = m_evt_handler->m_curr->indref;
4166 if(m_pending_anchors.num_entries)
4167 to_skip = m_pending_anchors.annotations[0].indentation > to_skip ? m_pending_anchors.annotations[0].indentation : to_skip;
4168 if(m_pending_tags.num_entries)
4169 to_skip = m_pending_tags.annotations[0].indentation > to_skip ? m_pending_tags.annotations[0].indentation : to_skip;
4170 _c4dbgpf(
"annotations pending, skip indentation up to {}!", to_skip);
4171 _maybe_skipchars_up_to(
' ', to_skip);
4178 template<
class EventHandler>
4179 bool ParseEngine<EventHandler>::_annotations_require_key_container()
const
4181 return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4184 template<
class EventHandler>
4185 void ParseEngine<EventHandler>::_check_tag(csubstr tag)
4187 if(!tag.begins_with(
"!<"))
4189 if(C4_UNLIKELY(tag.first_of(
"[]{},") !=
npos))
4190 _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks,
"tags must not contain any of '[]{},'", m_evt_handler->m_curr->pos);
4194 if(C4_UNLIKELY(!tag.ends_with(
'>')))
4195 _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks,
"malformed tag", m_evt_handler->m_curr->pos);
4199 template<
class EventHandler>
4200 void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4202 _c4dbgpf(
"annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4203 if(m_pending_tags.num_entries)
4205 _c4dbgpf(
"annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4206 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4208 _check_tag(m_pending_tags.annotations[0].str);
4209 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4210 _clear_annotations(&m_pending_tags);
4214 _c4err(
"too many tags");
4217 if(m_pending_anchors.num_entries)
4219 _c4dbgpf(
"annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4220 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4222 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4223 _clear_annotations(&m_pending_anchors);
4227 _c4err(
"too many anchors");
4232 template<
class EventHandler>
4233 void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4235 _c4dbgpf(
"annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4236 if(m_pending_tags.num_entries)
4238 _c4dbgpf(
"annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4239 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4241 _check_tag(m_pending_tags.annotations[0].str);
4242 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4243 _clear_annotations(&m_pending_tags);
4247 _c4err(
"too many tags");
4250 if(m_pending_anchors.num_entries)
4252 _c4dbgpf(
"annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4253 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4255 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4256 _clear_annotations(&m_pending_anchors);
4260 _c4err(
"too many anchors");
4265 template<
class EventHandler>
4266 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(
size_t current_line)
4268 _c4dbgpf(
"annotations_before_start_mapblck, current_line={}", current_line);
4269 if(m_pending_tags.num_entries == 2)
4271 _c4dbgp(
"2 tags, setting entry 0");
4272 _check_tag(m_pending_tags.annotations[0].str);
4273 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4275 else if(m_pending_tags.num_entries == 1)
4277 _c4dbgpf(
"1 tag. line={}, curr={}", m_pending_tags.annotations[0].line);
4278 if(m_pending_tags.annotations[0].line < current_line)
4280 _c4dbgp(
"...tag is for the map. setting it.");
4281 _check_tag(m_pending_tags.annotations[0].str);
4282 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4283 _clear_annotations(&m_pending_tags);
4287 if(m_pending_anchors.num_entries == 2)
4289 _c4dbgp(
"2 anchors, setting entry 0");
4290 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4292 else if(m_pending_anchors.num_entries == 1)
4294 _c4dbgpf(
"1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line);
4295 if(m_pending_anchors.annotations[0].line < current_line)
4297 _c4dbgp(
"...anchor is for the map. setting it.");
4298 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4299 _clear_annotations(&m_pending_anchors);
4304 template<
class EventHandler>
4305 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4307 _c4dbgp(
"annotations_before_start_mapblck_as_key");
4308 if(m_pending_tags.num_entries == 2)
4310 _check_tag(m_pending_tags.annotations[0].str);
4311 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4313 if(m_pending_anchors.num_entries == 2)
4315 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4319 template<
class EventHandler>
4320 void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(
size_t key_indentation,
size_t key_line)
4322 _c4dbgp(
"annotations_after_start_mapblck");
4323 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2);
4324 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2);
4325 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4327 key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4328 switch(m_pending_tags.num_entries)
4331 _check_tag(m_pending_tags.annotations[0].str);
4332 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4333 _clear_annotations(&m_pending_tags);
4336 _check_tag(m_pending_tags.annotations[1].str);
4337 m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4338 _clear_annotations(&m_pending_tags);
4341 switch(m_pending_anchors.num_entries)
4344 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4345 _clear_annotations(&m_pending_anchors);
4348 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4349 _clear_annotations(&m_pending_anchors);
4353 _set_indentation(key_indentation);
4356 template<
class EventHandler>
4357 size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(
size_t val_indentation,
size_t val_line)
4359 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries || m_pending_anchors.num_entries);
4361 auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4362 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4364 auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4365 if(ann.line > curr->line)
4367 else if(ann.indentation < curr->indentation)
4370 for(
size_t j = 0; j < m_pending_tags.num_entries; ++j)
4372 auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4373 if(ann.line > curr->line)
4375 else if(ann.indentation < curr->indentation)
4378 return curr->line < val_line ? val_indentation : curr->indentation;
4381 template<
class EventHandler>
4382 void ParseEngine<EventHandler>::_handle_directive(csubstr rem)
4384 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.is_sub(m_evt_handler->m_curr->line_contents.rem));
4385 const size_t pos = rem.find(
'#');
4386 _c4dbgpf(
"handle_directive: pos={} rem={}", pos, rem);
4389 m_evt_handler->add_directive(rem);
4390 _line_progressed(rem.len);
4394 csubstr to_comment = rem.first(pos);
4395 csubstr trimmed = to_comment.trimr(
" \t");
4396 m_evt_handler->add_directive(trimmed);
4397 _line_progressed(pos);
4402 template<
class EventHandler>
4403 bool ParseEngine<EventHandler>::_handle_bom()
4405 const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4408 const csubstr rest = rem.sub(1);
4410 #define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f')
4411 if(rem.begins_with({
"\x00\x00\xfe\xff", 4}) || (rem.begins_with({
"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
4413 _c4dbgp(
"byte order mark: UTF32BE");
4415 _line_progressed(4);
4418 else if(rem.begins_with(
"\xff\xfe\x00\x00") || (rest.begins_with({
"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
4420 _c4dbgp(
"byte order mark: UTF32LE");
4422 _line_progressed(4);
4425 else if(rem.begins_with(
"\xfe\xff") || (rem.begins_with(
'\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
4427 _c4dbgp(
"byte order mark: UTF16BE");
4429 _line_progressed(2);
4432 else if(rem.begins_with(
"\xff\xfe") || (rest.begins_with(
'\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
4434 _c4dbgp(
"byte order mark: UTF16LE");
4436 _line_progressed(2);
4439 else if(rem.begins_with(
"\xef\xbb\xbf"))
4441 _c4dbgp(
"byte order mark: UTF8");
4443 _line_progressed(3);
4451 template<
class EventHandler>
4452 void ParseEngine<EventHandler>::_handle_bom(
Encoding_e enc)
4454 if(m_encoding ==
NOBOM)
4456 const bool is_beginning_of_file = m_evt_handler->m_curr->line_contents.rem.str == m_buf.str;
4457 if(enc ==
UTF8 || is_beginning_of_file)
4460 _c4err(
"non-UTF8 byte order mark can appear only at the beginning of the file");
4462 else if(enc != m_encoding)
4464 _c4err(
"byte order mark can only be set once");
4471 template<
class EventHandler>
4472 void ParseEngine<EventHandler>::_handle_seq_json()
4475 _c4dbgpf(
"handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4477 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4478 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ));
4479 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
FLOW));
4480 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT));
4481 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT));
4483 _handle_flow_skip_whitespace();
4484 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4490 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4491 const char first = rem.str[0];
4492 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
4497 _c4dbgp(
"seqjson[RVAL]: scanning double-quoted scalar");
4498 ScannedScalar sc = _scan_scalar_dquot();
4499 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4500 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4506 _c4dbgp(
"seqjson[RVAL]: start child seqjson");
4508 m_evt_handler->begin_seq_val_flow();
4510 _line_progressed(1);
4515 _c4dbgp(
"seqjson[RVAL]: start child mapjson");
4517 m_evt_handler->begin_map_val_flow();
4519 _line_progressed(1);
4520 goto seqjson_finish;
4524 _c4dbgp(
"seqjson[RVAL]: end!");
4526 m_evt_handler->end_seq();
4527 _line_progressed(1);
4529 goto seqjson_finish;
4535 if(_scan_scalar_seq_json(&sc))
4537 _c4dbgp(
"seqjson[RVAL]: it's a plain scalar.");
4538 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4539 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4544 _c4err(
"parse error");
4551 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
4552 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4553 const char first = rem.str[0];
4554 _c4dbgpf(
"mapjson[RNXT]: '{}'", first);
4559 _c4dbgp(
"seqjson[RNXT]: expect next val");
4561 m_evt_handler->add_sibling();
4562 _line_progressed(1);
4567 _c4dbgp(
"seqjson[RNXT]: end!");
4568 m_evt_handler->end_seq();
4569 _line_progressed(1);
4570 goto seqjson_finish;
4573 _c4err(
"parse error");
4578 _c4dbgt(
"seqjson: go again", 0);
4579 if(_finished_line())
4581 if(C4_LIKELY(!_finished_file()))
4589 _c4err(
"missing terminating ]");
4595 _c4dbgp(
"seqjson: finish");
4601 template<
class EventHandler>
4602 void ParseEngine<EventHandler>::_handle_map_json()
4605 _c4dbgpf(
"handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4607 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
4608 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
FLOW));
4609 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4610 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT));
4611 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT)));
4613 _handle_flow_skip_whitespace();
4614 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4620 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4621 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4622 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4623 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4624 const char first = rem.str[0];
4625 _c4dbgpf(
"mapjson[RKEY]: '{}'", first);
4630 _c4dbgp(
"mapjson[RKEY]: scanning double-quoted scalar");
4631 ScannedScalar sc = _scan_scalar_dquot();
4632 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4633 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4639 _c4dbgp(
"mapjson[RKEY]: end!");
4640 m_evt_handler->end_map();
4641 _line_progressed(1);
4642 goto mapjson_finish;
4645 _c4err(
"parse error");
4648 else if(has_any(
RVAL))
4650 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4651 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4652 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4653 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4654 const char first = rem.str[0];
4655 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
4660 _c4dbgp(
"mapjson[RVAL]: scanning double-quoted scalar");
4661 ScannedScalar sc = _scan_scalar_dquot();
4662 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4663 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4669 _c4dbgp(
"mapjson[RVAL]: start val seqjson");
4671 m_evt_handler->begin_seq_val_flow();
4672 _set_indentation(m_evt_handler->m_parent->indref);
4674 _line_progressed(1);
4675 goto mapjson_finish;
4679 _c4dbgp(
"mapjson[RVAL]: start val mapjson");
4681 m_evt_handler->begin_map_val_flow();
4682 _set_indentation(m_evt_handler->m_parent->indref);
4684 _line_progressed(1);
4691 if(_scan_scalar_map_json(&sc))
4693 _c4dbgp(
"mapjson[RVAL]: plain scalar.");
4694 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4695 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4700 _c4err(
"parse error");
4706 else if(has_any(
RKCL))
4708 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4709 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4710 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4711 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4712 const char first = rem.str[0];
4713 _c4dbgpf(
"mapjson[RKCL]: '{}'", first);
4716 _c4dbgp(
"mapjson[RKCL]: found the colon");
4718 _line_progressed(1);
4722 _c4err(
"parse error");
4725 else if(has_any(
RNXT))
4727 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4728 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4729 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4730 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4731 _c4dbgpf(
"mapjson[RNXT]: '{}'", rem.str[0]);
4732 if(rem.begins_with(
','))
4734 _c4dbgp(
"mapjson[RNXT]: expect next keyval");
4735 m_evt_handler->add_sibling();
4737 _line_progressed(1);
4739 else if(rem.begins_with(
'}'))
4741 _c4dbgp(
"mapjson[RNXT]: end!");
4742 m_evt_handler->end_map();
4743 _line_progressed(1);
4744 goto mapjson_finish;
4748 _c4err(
"parse error");
4753 _c4dbgt(
"mapjson: go again", 0);
4754 if(_finished_line())
4756 if(C4_LIKELY(!_finished_file()))
4764 _c4err(
"missing terminating }");
4770 _c4dbgp(
"mapjson: finish");
4776 template<
class EventHandler>
4777 void ParseEngine<EventHandler>::_handle_seq_imap()
4780 _c4dbgpf(
"handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4782 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQIMAP));
4783 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4784 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT|
QMRK|
RKCL));
4785 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == has_all(
RVAL) + has_all(
RNXT) + has_all(
QMRK) + has_all(
RKCL));
4786 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3);
4788 _handle_flow_skip_whitespace();
4789 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4795 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL));
4796 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4797 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4798 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4799 const char first = rem.str[0];
4800 _c4dbgpf(
"seqimap[RVAL]: '{}'", _c4prc(first));
4804 _c4dbgp(
"seqimap[RVAL]: scanning single-quoted scalar");
4805 sc = _scan_scalar_squot();
4806 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
4807 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
4808 m_evt_handler->end_map();
4809 goto seqimap_finish;
4811 else if(first ==
'"')
4813 _c4dbgp(
"seqimap[RVAL]: scanning double-quoted scalar");
4814 sc = _scan_scalar_dquot();
4815 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4816 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4817 m_evt_handler->end_map();
4818 goto seqimap_finish;
4821 else if(_scan_scalar_plain_map_flow(&sc))
4823 _c4dbgp(
"seqimap[RVAL]: it's a scalar.");
4824 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4825 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4826 m_evt_handler->end_map();
4827 goto seqimap_finish;
4829 else if(first ==
'[')
4831 _c4dbgp(
"seqimap[RVAL]: start child seqflow");
4833 m_evt_handler->begin_seq_val_flow();
4835 _set_indentation(m_evt_handler->m_parent->indref);
4836 _line_progressed(1);
4837 goto seqimap_finish;
4839 else if(first ==
'{')
4841 _c4dbgp(
"seqimap[RVAL]: start child mapflow");
4843 m_evt_handler->begin_map_val_flow();
4845 _set_indentation(m_evt_handler->m_parent->indref);
4846 _line_progressed(1);
4847 goto seqimap_finish;
4849 else if(first ==
',' || first ==
']')
4851 _c4dbgp(
"seqimap[RVAL]: finish without val.");
4852 m_evt_handler->set_val_scalar_plain_empty();
4853 m_evt_handler->end_map();
4854 goto seqimap_finish;
4856 else if(first ==
'&')
4858 csubstr anchor = _scan_anchor();
4859 _c4dbgp(
"seqimap[RVAL]: anchor!");
4860 m_evt_handler->set_val_anchor(anchor);
4862 else if(first ==
'*')
4864 csubstr ref = _scan_ref_seq();
4865 _c4dbgp(
"seqimap[RVAL]: ref!");
4866 m_evt_handler->set_val_ref(ref);
4871 _c4err(
"parse error");
4874 else if(has_any(
RNXT))
4876 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
4877 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4878 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4879 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4880 const char first = rem.str[0];
4881 _c4dbgpf(
"seqimap[RNXT]: '{}'", _c4prc(first));
4882 if(first ==
',' || first ==
']')
4886 _c4dbgp(
"seqimap: done");
4887 m_evt_handler->end_map();
4888 goto seqimap_finish;
4892 _c4err(
"parse error");
4895 else if(has_any(
QMRK))
4897 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
QMRK));
4898 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4899 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4900 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4901 const char first = rem.str[0];
4902 _c4dbgpf(
"seqimap[QMRK]: '{}'", _c4prc(first));
4906 _c4dbgp(
"seqimap[QMRK]: scanning single-quoted scalar");
4907 sc = _scan_scalar_squot();
4908 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
4909 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
4913 else if(first ==
'"')
4915 _c4dbgp(
"seqimap[QMRK]: scanning double-quoted scalar");
4916 sc = _scan_scalar_dquot();
4917 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4918 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4923 else if(_scan_scalar_plain_map_flow(&sc))
4925 _c4dbgp(
"seqimap[QMRK]: it's a scalar.");
4926 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
4927 m_evt_handler->set_key_scalar_plain(maybe_filtered);
4931 else if(first ==
'[')
4933 _c4dbgp(
"seqimap[QMRK]: start child seqflow");
4935 m_evt_handler->begin_seq_key_flow();
4937 _set_indentation(m_evt_handler->m_parent->indref);
4938 _line_progressed(1);
4939 goto seqimap_finish;
4941 else if(first ==
'{')
4943 _c4dbgp(
"seqimap[QMRK]: start child mapflow");
4945 m_evt_handler->begin_map_key_flow();
4947 _set_indentation(m_evt_handler->m_parent->indref);
4948 _line_progressed(1);
4949 goto seqimap_finish;
4951 else if(first ==
',' || first ==
']')
4953 _c4dbgp(
"seqimap[QMRK]: finish without key.");
4954 m_evt_handler->set_key_scalar_plain_empty();
4955 m_evt_handler->set_val_scalar_plain_empty();
4956 m_evt_handler->end_map();
4957 goto seqimap_finish;
4959 else if(first ==
'&')
4961 csubstr anchor = _scan_anchor();
4962 _c4dbgp(
"seqimap[QMRK]: anchor!");
4963 m_evt_handler->set_key_anchor(anchor);
4965 else if(first ==
'*')
4967 csubstr ref = _scan_ref_seq();
4968 _c4dbgp(
"seqimap[QMRK]: ref!");
4969 m_evt_handler->set_key_ref(ref);
4974 _c4err(
"parse error");
4977 else if(has_any(
RKCL))
4979 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4980 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4981 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4982 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKCL));
4983 const char first = rem.str[0];
4984 _c4dbgpf(
"seqimap[RKCL]: '{}'", _c4prc(first));
4987 _c4dbgp(
"seqimap[RKCL]: found ':'");
4989 _line_progressed(1);
4992 else if(first ==
',' || first ==
']')
4994 _c4dbgp(
"seqimap[RKCL]: found ','. finish without val");
4995 m_evt_handler->set_val_scalar_plain_empty();
4996 m_evt_handler->end_map();
4997 goto seqimap_finish;
5001 _c4err(
"parse error");
5006 _c4dbgt(
"seqimap: go again", 0);
5007 if(_finished_line())
5009 if(C4_LIKELY(!_finished_file()))
5017 _c4err(
"parse error");
5023 _c4dbgp(
"seqimap: finish");
5029 template<
class EventHandler>
5030 void ParseEngine<EventHandler>::_handle_seq_flow()
5033 _c4dbgpf(
"handle2_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5035 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5036 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ));
5037 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
FLOW));
5038 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT));
5039 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT));
5040 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos);
5042 _handle_flow_skip_whitespace();
5044 if(!m_evt_handler->m_curr->line_contents.rem.len)
5049 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5050 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5054 _c4dbgp(
"seqflow[RVAL]: scanning single-quoted scalar");
5055 sc = _scan_scalar_squot();
5056 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5057 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5060 else if(first ==
'"')
5062 _c4dbgp(
"seqflow[RVAL]: scanning double-quoted scalar");
5063 sc = _scan_scalar_dquot();
5064 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5065 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5069 else if(_scan_scalar_plain_seq_flow(&sc))
5071 _c4dbgp(
"seqflow[RVAL]: it's a scalar.");
5072 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5073 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5076 else if(first ==
'[')
5078 _c4dbgp(
"seqflow[RVAL]: start child seqflow");
5080 m_evt_handler->begin_seq_val_flow();
5081 _set_indentation(m_evt_handler->m_parent->indref);
5083 _line_progressed(1);
5085 else if(first ==
'{')
5087 _c4dbgp(
"seqflow[RVAL]: start child mapflow");
5089 m_evt_handler->begin_map_val_flow();
5090 _set_indentation(m_evt_handler->m_parent->indref);
5092 _line_progressed(1);
5093 goto seqflow_finish;
5095 else if(first ==
']')
5097 _c4dbgp(
"seqflow[RVAL]: end!");
5098 _line_progressed(1);
5099 m_evt_handler->end_seq();
5100 goto seqflow_finish;
5102 else if(first ==
'*')
5104 csubstr ref = _scan_ref_seq();
5105 _c4dbgpf(
"seqflow[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5106 m_evt_handler->set_val_ref(ref);
5109 else if(first ==
'&')
5111 csubstr anchor = _scan_anchor();
5112 _c4dbgpf(
"seqflow[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5113 m_evt_handler->set_val_anchor(anchor);
5114 if(_maybe_scan_following_comma())
5116 _c4dbgp(
"seqflow[RVAL]: empty scalar!");
5117 m_evt_handler->set_val_scalar_plain_empty();
5118 m_evt_handler->add_sibling();
5121 else if(first ==
'!')
5123 csubstr tag = _scan_tag();
5124 _c4dbgpf(
"seqflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5126 m_evt_handler->set_val_tag(tag);
5127 if(_maybe_scan_following_comma())
5129 _c4dbgp(
"seqflow[RVAL]: empty scalar!");
5130 m_evt_handler->set_val_scalar_plain_empty();
5131 m_evt_handler->add_sibling();
5134 else if(first ==
':')
5136 _c4dbgpf(
"seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5138 m_evt_handler->begin_map_val_flow();
5139 _set_indentation(m_evt_handler->m_parent->indref);
5140 m_evt_handler->set_key_scalar_plain_empty();
5142 _line_progressed(1);
5143 goto seqflow_finish;
5145 else if(first ==
'?')
5147 _c4dbgp(
"seqflow[RVAL]: start child mapflow, explicit key");
5149 m_was_inside_qmrk =
true;
5150 m_evt_handler->begin_map_val_flow();
5151 _set_indentation(m_evt_handler->m_parent->indref);
5153 _line_progressed(1);
5154 _maybe_skip_whitespace_tokens();
5155 goto seqflow_finish;
5159 _c4err(
"parse error");
5164 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
5165 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5166 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5169 _c4dbgp(
"seqflow[RNXT]: expect next val");
5171 m_evt_handler->add_sibling();
5172 _line_progressed(1);
5174 else if(first ==
']')
5176 _c4dbgp(
"seqflow[RNXT]: end!");
5177 m_evt_handler->end_seq();
5178 _line_progressed(1);
5179 goto seqflow_finish;
5181 else if(first ==
':')
5183 _c4dbgpf(
"seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5184 m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5185 _set_indentation(m_evt_handler->m_parent->indref);
5186 _line_progressed(1);
5188 goto seqflow_finish;
5192 _c4err(
"parse error");
5197 _c4dbgt(
"seqflow: go again", 0);
5198 if(_finished_line())
5200 if(C4_LIKELY(!_finished_file()))
5208 _c4err(
"missing terminating ]");
5214 _c4dbgp(
"seqflow: finish");
5220 template<
class EventHandler>
5221 void ParseEngine<EventHandler>::_handle_map_flow()
5224 _c4dbgpf(
"handle2_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5226 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
5227 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
FLOW));
5229 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)));
5231 _handle_flow_skip_whitespace();
5232 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5238 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5239 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5240 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5241 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5242 const char first = rem.str[0];
5243 _c4dbgpf(
"mapflow[RKEY]: '{}'", first);
5247 _c4dbgp(
"mapflow[RKEY]: scanning single-quoted scalar");
5248 sc = _scan_scalar_squot();
5249 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5250 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5253 else if(first ==
'"')
5255 _c4dbgp(
"mapflow[RKEY]: scanning double-quoted scalar");
5256 sc = _scan_scalar_dquot();
5257 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5258 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5262 else if(_scan_scalar_plain_map_flow(&sc))
5264 _c4dbgp(
"mapflow[RKEY]: plain scalar");
5265 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5266 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5269 else if(first ==
'?')
5271 _c4dbgp(
"mapflow[RKEY]: explicit key");
5272 _line_progressed(1);
5274 _maybe_skip_whitespace_tokens();
5276 else if(first ==
':')
5278 _c4dbgp(
"mapflow[RKEY]: setting empty key");
5279 m_evt_handler->set_key_scalar_plain_empty();
5281 _line_progressed(1);
5282 _maybe_skip_whitespace_tokens();
5284 else if(first ==
',')
5286 _c4dbgp(
"mapflow[RKEY]: empty key+val!");
5287 m_evt_handler->set_key_scalar_plain_empty();
5288 m_evt_handler->set_val_scalar_plain_empty();
5292 else if(first ==
'}')
5294 _c4dbgp(
"mapflow[RKEY]: end!");
5295 m_evt_handler->end_map();
5296 _line_progressed(1);
5297 goto mapflow_finish;
5299 else if(first ==
'&')
5301 csubstr anchor = _scan_anchor();
5302 _c4dbgpf(
"mapflow[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5303 m_evt_handler->set_key_anchor(anchor);
5305 else if(first ==
'*')
5307 csubstr ref = _scan_ref_map();
5308 _c4dbgpf(
"mapflow[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
5309 m_evt_handler->set_key_ref(ref);
5312 else if(first ==
'[')
5317 _c4dbgp(
"mapflow[RKEY]: start child seqflow (!)");
5319 m_evt_handler->begin_seq_key_flow();
5321 _set_indentation(m_evt_handler->m_parent->indref);
5322 _line_progressed(1);
5323 goto mapflow_finish;
5325 else if(first ==
'{')
5330 _c4dbgp(
"mapflow[RKEY]: start child mapflow (!)");
5332 m_evt_handler->begin_map_key_flow();
5334 _set_indentation(m_evt_handler->m_parent->indref);
5335 _line_progressed(1);
5338 else if(first ==
'!')
5340 csubstr tag = _scan_tag();
5341 _c4dbgpf(
"mapflow[RKEY]: tag! [{}]~~~{}~~~", tag.len, tag);
5343 m_evt_handler->set_key_tag(tag);
5347 _c4err(
"parse error");
5350 else if(has_any(
RKCL))
5352 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5353 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5354 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5355 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5356 const char first = rem.str[0];
5357 _c4dbgpf(
"mapflow[RKCL]: '{}'", first);
5360 _c4dbgp(
"mapflow[RKCL]: found the colon");
5362 _line_progressed(1);
5364 else if(first ==
'}')
5366 _c4dbgp(
"mapflow[RKCL]: end with missing val!");
5368 m_evt_handler->set_val_scalar_plain_empty();
5369 m_evt_handler->end_map();
5370 _line_progressed(1);
5371 goto mapflow_finish;
5373 else if(first ==
',')
5375 _c4dbgp(
"mapflow[RKCL]: got comma. val is missing");
5376 m_evt_handler->set_val_scalar_plain_empty();
5377 m_evt_handler->add_sibling();
5379 _line_progressed(1);
5383 _c4err(
"parse error");
5386 else if(has_any(
RVAL))
5388 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5389 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5390 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5391 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5392 const char first = rem.str[0];
5393 _c4dbgpf(
"mapflow[RVAL]: '{}'", first);
5397 _c4dbgp(
"mapflow[RVAL]: scanning single-quoted scalar");
5398 sc = _scan_scalar_squot();
5399 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5400 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5403 else if(first ==
'"')
5405 _c4dbgp(
"mapflow[RVAL]: scanning double-quoted scalar");
5406 sc = _scan_scalar_dquot();
5407 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5408 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5412 else if(_scan_scalar_plain_map_flow(&sc))
5414 _c4dbgp(
"mapflow[RVAL]: plain scalar.");
5415 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5416 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5419 else if(first ==
'[')
5421 _c4dbgp(
"mapflow[RVAL]: start val seqflow");
5423 m_evt_handler->begin_seq_val_flow();
5424 _set_indentation(m_evt_handler->m_parent->indref);
5426 _line_progressed(1);
5427 goto mapflow_finish;
5429 else if(first ==
'{')
5431 _c4dbgp(
"mapflow[RVAL]: start val mapflow");
5433 m_evt_handler->begin_map_val_flow();
5434 _set_indentation(m_evt_handler->m_parent->indref);
5436 _line_progressed(1);
5439 else if(first ==
'}')
5441 _c4dbgp(
"mapflow[RVAL]: end!");
5442 m_evt_handler->set_val_scalar_plain_empty();
5443 m_evt_handler->end_map();
5444 _line_progressed(1);
5445 goto mapflow_finish;
5447 else if(first ==
',')
5449 _c4dbgp(
"mapflow[RVAL]: empty val!");
5450 m_evt_handler->set_val_scalar_plain_empty();
5454 else if(first ==
'*')
5456 csubstr ref = _scan_ref_map();
5457 _c4dbgpf(
"mapflow[RVAL]: key ref! [{}]~~~{}~~~", ref.len, ref);
5458 m_evt_handler->set_val_ref(ref);
5461 else if(first ==
'&')
5463 csubstr anchor = _scan_anchor();
5464 _c4dbgpf(
"mapflow[RVAL]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5465 m_evt_handler->set_val_anchor(anchor);
5467 else if(first ==
'!')
5469 csubstr tag = _scan_tag();
5470 _c4dbgpf(
"mapflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5472 m_evt_handler->set_val_tag(tag);
5476 _c4err(
"parse error");
5479 else if(has_any(
RNXT))
5481 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5482 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5483 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5484 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5485 _c4dbgpf(
"mapflow[RNXT]: '{}'", rem.str[0]);
5486 if(rem.begins_with(
','))
5488 _c4dbgp(
"mapflow[RNXT]: expect next keyval");
5489 m_evt_handler->add_sibling();
5491 _line_progressed(1);
5493 else if(rem.begins_with(
'}'))
5495 _c4dbgp(
"mapflow[RNXT]: end!");
5496 m_evt_handler->end_map();
5497 _line_progressed(1);
5498 goto mapflow_finish;
5502 _c4err(
"parse error");
5505 else if(has_any(
QMRK))
5507 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5508 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5509 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5510 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5511 const char first = rem.str[0];
5512 _c4dbgpf(
"mapflow[QMRK]: '{}'", first);
5516 _c4dbgp(
"mapflow[QMRK]: scanning single-quoted scalar");
5517 sc = _scan_scalar_squot();
5518 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5519 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5522 else if(first ==
'"')
5524 _c4dbgp(
"mapflow[QMRK]: scanning double-quoted scalar");
5525 sc = _scan_scalar_dquot();
5526 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5527 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5531 else if(_scan_scalar_plain_map_flow(&sc))
5533 _c4dbgp(
"mapflow[QMRK]: plain scalar");
5534 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5535 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5538 else if(first ==
':')
5540 _c4dbgp(
"mapflow[QMRK]: setting empty key");
5541 m_evt_handler->set_key_scalar_plain_empty();
5543 _line_progressed(1);
5544 _maybe_skip_whitespace_tokens();
5546 else if(first ==
'}')
5548 _c4dbgp(
"mapflow[QMRK]: end!");
5549 m_evt_handler->set_key_scalar_plain_empty();
5550 m_evt_handler->set_val_scalar_plain_empty();
5551 m_evt_handler->end_map();
5552 _line_progressed(1);
5553 goto mapflow_finish;
5555 else if(first ==
',')
5557 _c4dbgp(
"mapflow[QMRK]: empty key+val!");
5558 m_evt_handler->set_key_scalar_plain_empty();
5559 m_evt_handler->set_val_scalar_plain_empty();
5562 else if(first ==
'&')
5564 csubstr anchor = _scan_anchor();
5565 _c4dbgpf(
"mapflow[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5566 m_evt_handler->set_key_anchor(anchor);
5568 else if(first ==
'*')
5570 csubstr ref = _scan_ref_map();
5571 _c4dbgpf(
"mapflow[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
5572 m_evt_handler->set_key_ref(ref);
5575 else if(first ==
'[')
5580 _c4dbgp(
"mapflow[QMRK]: start child seqflow (!)");
5582 m_evt_handler->begin_seq_key_flow();
5584 _set_indentation(m_evt_handler->m_parent->indref);
5585 _line_progressed(1);
5586 goto mapflow_finish;
5588 else if(first ==
'{')
5593 _c4dbgp(
"mapflow[QMRK]: start child mapflow (!)");
5595 m_evt_handler->begin_map_key_flow();
5596 _set_indentation(m_evt_handler->m_parent->indref);
5598 _line_progressed(1);
5601 else if(first ==
'!')
5603 csubstr tag = _scan_tag();
5604 _c4dbgpf(
"mapflow[QMRK]: tag! [{}]~~~{}~~~", tag.len, tag);
5606 m_evt_handler->set_key_tag(tag);
5610 _c4err(
"parse error");
5615 _c4dbgt(
"mapflow: go again", 0);
5616 if(_finished_line())
5618 if(C4_LIKELY(!_finished_file()))
5626 _c4err(
"missing terminating }");
5632 _c4dbgp(
"mapflow: finish");
5638 template<
class EventHandler>
5639 void ParseEngine<EventHandler>::_handle_seq_block()
5642 _c4dbgpf(
"handle2_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5644 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ));
5645 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
BLCK));
5646 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT));
5647 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RVAL) + has_any(
RNXT)));
5649 _maybe_skip_comment();
5650 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5656 _c4dbgpf(
"seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
5657 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5658 if(m_evt_handler->m_curr->at_line_beginning())
5660 _c4dbgpf(
"seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5661 if(m_evt_handler->m_curr->indentation_ge())
5663 _c4dbgpf(
"seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
5664 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5665 rem = m_evt_handler->m_curr->line_contents.rem;
5669 else if(m_evt_handler->m_curr->indentation_lt())
5671 _c4dbgp(
"seqblck[RVAL]: smaller indentation!");
5672 _handle_indentation_pop_from_block_seq();
5673 goto seqblck_finish;
5675 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
5677 _c4dbgp(
"seqblck[RVAL]: empty line!");
5678 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5682 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
5690 if(rem.str[0] ==
' ')
5692 if(_handle_indentation_from_annotations())
5694 _c4dbgp(
"seqblck[RVAL]: annotations!");
5695 rem = m_evt_handler->m_curr->line_contents.rem;
5702 _RYML_CB_ASSERT(callbacks(), rem.len);
5703 _c4dbgpf(
"seqblck[RVAL]: '{}' node_id={}", rem.str[0], m_evt_handler->m_curr->node_id);
5704 const char first = rem.str[0];
5705 const size_t startline = m_evt_handler->m_curr->pos.line;
5708 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
5712 _c4dbgp(
"seqblck[RVAL]: single-quoted scalar");
5713 sc = _scan_scalar_squot();
5714 if(!_maybe_scan_following_colon())
5716 _c4dbgp(
"seqblck[RVAL]: set as val");
5717 _handle_annotations_before_blck_val_scalar();
5718 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5719 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5724 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
5726 _handle_annotations_before_start_mapblck(startline);
5728 m_evt_handler->begin_map_val_block();
5729 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5730 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5731 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5733 _maybe_skip_whitespace_tokens();
5734 goto seqblck_finish;
5737 else if(first ==
'"')
5739 _c4dbgp(
"seqblck[RVAL]: double-quoted scalar");
5740 sc = _scan_scalar_dquot();
5741 if(!_maybe_scan_following_colon())
5743 _c4dbgp(
"seqblck[RVAL]: set as val");
5744 _handle_annotations_before_blck_val_scalar();
5745 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5746 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5751 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
5753 _handle_annotations_before_start_mapblck(startline);
5755 m_evt_handler->begin_map_val_block();
5756 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5757 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5758 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5760 _maybe_skip_whitespace_tokens();
5761 goto seqblck_finish;
5767 else if(first ==
'|')
5769 _c4dbgp(
"seqblck[RVAL]: block-literal scalar");
5771 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5772 _handle_annotations_before_blck_val_scalar();
5773 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
5774 m_evt_handler->set_val_scalar_literal(maybe_filtered);
5777 else if(first ==
'>')
5779 _c4dbgp(
"seqblck[RVAL]: block-folded scalar");
5781 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5782 _handle_annotations_before_blck_val_scalar();
5783 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
5784 m_evt_handler->set_val_scalar_folded(maybe_filtered);
5787 else if(_scan_scalar_plain_seq_blck(&sc))
5789 _c4dbgp(
"seqblck[RVAL]: plain scalar.");
5790 if(!_maybe_scan_following_colon())
5792 _c4dbgp(
"seqblck[RVAL]: set as val");
5793 _handle_annotations_before_blck_val_scalar();
5794 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5795 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5800 if(startindent > m_evt_handler->m_curr->indref)
5802 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
5804 _handle_annotations_before_start_mapblck(startline);
5806 m_evt_handler->begin_map_val_block();
5807 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5808 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5809 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5811 _maybe_skip_whitespace_tokens();
5812 goto seqblck_finish;
5814 else if(m_evt_handler->m_parent && m_evt_handler->m_parent->indref == startindent && has_any(
RMAP|
BLCK, m_evt_handler->m_parent))
5816 _c4dbgp(
"seqblck[RVAL]: empty val + end indentless seq + set key");
5817 m_evt_handler->set_val_scalar_plain_empty();
5818 m_evt_handler->end_seq();
5819 m_evt_handler->add_sibling();
5820 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5821 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5823 _maybe_skip_whitespace_tokens();
5824 goto seqblck_finish;
5828 _c4err(
"parse error");
5832 else if(first ==
'[')
5834 _c4dbgp(
"seqblck[RVAL]: start child seqflow");
5836 _handle_annotations_before_blck_val_scalar();
5837 m_evt_handler->begin_seq_val_flow();
5839 _line_progressed(1);
5840 _set_indentation(m_evt_handler->m_parent->indref + 1u);
5841 goto seqblck_finish;
5843 else if(first ==
'{')
5845 _c4dbgp(
"seqblck[RVAL]: start child mapflow");
5847 _handle_annotations_before_blck_val_scalar();
5848 m_evt_handler->begin_map_val_flow();
5850 _line_progressed(1);
5851 _set_indentation(m_evt_handler->m_parent->indref + 1u);
5852 goto seqblck_finish;
5854 else if(first ==
'-')
5856 if(startindent == m_evt_handler->m_curr->indref)
5858 _c4dbgp(
"seqblck[RVAL]: prev val was empty");
5859 _handle_annotations_before_blck_val_scalar();
5860 m_evt_handler->set_val_scalar_plain_empty();
5862 m_evt_handler->add_sibling();
5866 _c4dbgp(
"seqblck[RVAL]: start child seqblck");
5867 _RYML_CB_ASSERT(this->callbacks(), startindent > m_evt_handler->m_curr->indref);
5869 _handle_annotations_before_blck_val_scalar();
5870 m_evt_handler->begin_seq_val_block();
5872 _save_indentation();
5875 _line_progressed(1);
5876 _maybe_skip_whitespace_tokens();
5878 else if(first ==
':')
5880 _c4dbgp(
"seqblck[RVAL]: start child mapblck with empty key");
5882 _handle_annotations_before_start_mapblck(startline);
5884 m_evt_handler->begin_map_val_block();
5885 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5886 m_evt_handler->set_key_scalar_plain_empty();
5888 _line_progressed(1);
5889 _maybe_skip_whitespace_tokens();
5890 goto seqblck_finish;
5892 else if(first ==
'&')
5894 const csubstr anchor = _scan_anchor();
5895 _c4dbgpf(
"seqblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5898 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
5900 else if(first ==
'*')
5902 csubstr ref = _scan_ref_seq();
5903 _c4dbgpf(
"seqblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5904 if(!_maybe_scan_following_colon())
5906 _c4dbgp(
"seqblck[RVAL]: set ref as val!");
5907 _handle_annotations_before_blck_val_scalar();
5908 m_evt_handler->set_val_ref(ref);
5913 _c4dbgp(
"seqblck[RVAL]: ref is key of map");
5915 _handle_annotations_before_start_mapblck(startline);
5916 m_evt_handler->begin_map_val_block();
5917 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5918 m_evt_handler->set_key_ref(ref);
5920 _set_indentation(startindent);
5921 _maybe_skip_whitespace_tokens();
5922 goto seqblck_finish;
5925 else if(first ==
'!')
5927 csubstr tag = _scan_tag();
5928 _c4dbgpf(
"seqblck[RVAL]: val tag! [{}]~~~{}~~~", tag.len, tag);
5931 _add_annotation(&m_pending_tags, tag, startindent, startline);
5933 else if(first ==
'?')
5935 _c4dbgp(
"seqblck[RVAL]: start child mapblck, explicit key");
5937 m_was_inside_qmrk =
true;
5938 m_evt_handler->begin_map_val_block();
5940 _save_indentation();
5941 _line_progressed(1);
5942 _maybe_skip_whitespace_tokens();
5943 goto seqblck_finish;
5947 _c4err(
"parse error");
5952 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
5953 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5957 _c4dbgpf(
"seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5958 if(C4_LIKELY(_at_line_begin()))
5960 _c4dbgp(
"seqblck[RNXT]: at line begin");
5961 if(m_evt_handler->m_curr->indentation_ge())
5963 _c4dbgpf(
"seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
5964 _line_progressed(m_evt_handler->m_curr->indref);
5965 _maybe_skip_whitespace_tokens();
5966 rem = m_evt_handler->m_curr->line_contents.rem;
5970 else if(m_evt_handler->m_curr->indentation_lt())
5972 _c4dbgp(
"seqblck[RNXT]: smaller indentation!");
5973 _handle_indentation_pop_from_block_seq();
5976 _c4dbgp(
"seqblck[RNXT]: still seqblck!");
5977 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
5978 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5979 rem = m_evt_handler->m_curr->line_contents.rem;
5985 _c4dbgp(
"seqblck[RNXT]: no longer seqblck!");
5986 goto seqblck_finish;
5989 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
5991 _c4dbgpf(
"seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
5992 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5993 rem = m_evt_handler->m_curr->line_contents.rem;
6000 _c4dbgp(
"seqblck[RNXT]: NOT at line begin");
6001 if(!rem.begins_with_any(
" \t"))
6003 _c4err(
"parse error");
6008 rem = m_evt_handler->m_curr->line_contents.rem;
6011 _c4dbgp(
"seqblck[RNXT]: again");
6019 const char first = rem.str[0];
6020 _c4dbgpf(
"seqblck[RNXT]: '{}' node_id={}", first, m_evt_handler->m_curr->node_id);
6023 if(m_evt_handler->m_curr->indref > 0 || m_evt_handler->m_curr->line_contents.indentation > 0 || !_is_doc_begin_token(rem))
6025 _c4dbgp(
"seqblck[RNXT]: expect next val");
6027 m_evt_handler->add_sibling();
6028 _line_progressed(1);
6029 _maybe_skip_whitespace_tokens();
6033 _c4dbgp(
"seqblck[RNXT]: start doc");
6034 _start_doc_suddenly();
6035 _line_progressed(3);
6036 _maybe_skip_whitespace_tokens();
6037 goto seqblck_finish;
6040 else if(first ==
':')
6046 auto const *C4_RESTRICT prev_state = m_evt_handler->m_parent;
6047 if(C4_LIKELY(prev_state && (prev_state->flags &
RMAP)))
6049 _c4dbgp(
"seqblck[RNXT]: actually this seq was '?' key of parent map");
6050 m_evt_handler->end_seq();
6051 goto seqblck_finish;
6055 _c4err(
"parse error");
6058 else if(first ==
'.')
6060 _c4dbgp(
"seqblck[RNXT]: maybe doc?");
6061 csubstr rs = rem.sub(1);
6062 if(rs ==
".." || rs.begins_with(
".. "))
6064 _c4dbgp(
"seqblck[RNXT]: end+start doc");
6065 _end_doc_suddenly();
6066 _line_progressed(3);
6067 _maybe_skip_whitespace_tokens();
6068 goto seqblck_finish;
6072 _c4err(
"parse error");
6081 for(
auto const& s : m_evt_handler->m_stack)
6083 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
6086 if(m_evt_handler->m_parent && has_all(
RMAP|
BLCK, m_evt_handler->m_parent) && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
6088 _c4dbgpf(
"seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
6089 _RYML_CB_ASSERT(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent);
6090 _handle_indentation_pop(m_evt_handler->m_parent);
6091 _RYML_CB_ASSERT(this->callbacks(), has_all(
RMAP|
BLCK));
6092 m_evt_handler->add_sibling();
6094 goto seqblck_finish;
6098 _c4err(
"parse error");
6104 _c4dbgt(
"seqblck: go again", 0);
6105 if(_finished_line())
6109 if(_finished_file())
6111 _c4dbgp(
"seqblck: finish!");
6113 goto seqblck_finish;
6120 _c4dbgp(
"seqblck: finish");
6126 template<
class EventHandler>
6127 void ParseEngine<EventHandler>::_handle_map_block()
6130 _c4dbgpf(
"handle2_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6133 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
6134 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
BLCK));
6136 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)));
6138 _maybe_skip_comment();
6139 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
6145 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6146 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6147 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6148 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6152 if(m_evt_handler->m_curr->at_line_beginning())
6154 if(m_evt_handler->m_curr->indentation_eq())
6156 _c4dbgpf(
"mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
6157 _line_progressed(m_evt_handler->m_curr->indref);
6158 rem = m_evt_handler->m_curr->line_contents.rem;
6162 else if(m_evt_handler->m_curr->indentation_lt())
6164 _c4dbgp(
"mapblck[RKEY]: smaller indentation!");
6165 _handle_indentation_pop_from_block_map();
6166 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6169 _c4dbgp(
"mapblck[RKEY]: still mapblck!");
6170 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY));
6171 rem = m_evt_handler->m_curr->line_contents.rem;
6177 _c4dbgp(
"mapblck[RKEY]: no longer mapblck!");
6178 goto mapblck_finish;
6183 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt());
6184 _c4err(
"invalid indentation");
6190 const char first = rem.str[0];
6191 const size_t startline = m_evt_handler->m_curr->pos.line;
6192 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6193 _c4dbgpf(
"mapblck[RKEY]: '{}'", first);
6197 _c4dbgp(
"mapblck[RKEY]: scanning single-quoted scalar");
6198 sc = _scan_scalar_squot();
6199 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6200 _handle_annotations_before_blck_key_scalar();
6201 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6203 if(!_maybe_scan_following_colon())
6204 _c4err(
"could not find ':' colon after key");
6205 _maybe_skip_whitespace_tokens();
6207 else if(first ==
'"')
6209 _c4dbgp(
"mapblck[RKEY]: scanning double-quoted scalar");
6210 sc = _scan_scalar_dquot();
6211 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6212 _handle_annotations_before_blck_key_scalar();
6213 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6215 if(!_maybe_scan_following_colon())
6216 _c4err(
"could not find ':' colon after key");
6217 _maybe_skip_whitespace_tokens();
6221 else if(C4_UNLIKELY(first ==
'|'))
6223 _c4err(
"block literal keys must be enclosed in '?'");
6225 else if(C4_UNLIKELY(first ==
'>'))
6227 _c4err(
"block literal keys must be enclosed in '?'");
6229 else if(_scan_scalar_plain_map_blck(&sc))
6231 _c4dbgp(
"mapblck[RKEY]: plain scalar");
6232 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6233 _handle_annotations_before_blck_key_scalar();
6234 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6236 if(!_maybe_scan_following_colon())
6237 _c4err(
"could not find ':' colon after key");
6238 _maybe_skip_whitespace_tokens();
6240 else if(first ==
'?')
6242 _c4dbgp(
"mapblck[RKEY]: key token!");
6244 _line_progressed(1);
6245 _maybe_skip_whitespace_tokens();
6246 m_was_inside_qmrk =
true;
6249 else if(first ==
':')
6251 _c4dbgp(
"mapblck[RKEY]: setting empty key");
6252 _handle_annotations_before_blck_key_scalar();
6253 m_evt_handler->set_key_scalar_plain_empty();
6255 _line_progressed(1);
6256 _maybe_skip_whitespace_tokens();
6258 else if(first ==
'*')
6260 csubstr ref = _scan_ref_map();
6261 _c4dbgpf(
"mapblck[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
6262 _handle_annotations_before_blck_key_scalar();
6263 m_evt_handler->set_key_ref(ref);
6265 if(!_maybe_scan_following_colon())
6266 _c4err(
"could not find ':' colon after key");
6267 _maybe_skip_whitespace_tokens();
6269 else if(first ==
'&')
6271 csubstr anchor = _scan_anchor();
6272 _c4dbgpf(
"mapblck[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
6273 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6275 else if(first ==
'!')
6277 csubstr tag = _scan_tag();
6278 _c4dbgpf(
"mapblck[RKEY]: key tag! [{}]~~~{}~~~", tag.len, tag);
6279 _add_annotation(&m_pending_tags, tag, startindent, startline);
6281 else if(first ==
'[')
6286 _c4dbgp(
"mapblck[RKEY]: start child seqflow (!)");
6288 _handle_annotations_before_blck_key_scalar();
6289 m_evt_handler->begin_seq_key_flow();
6291 _line_progressed(1);
6292 _set_indentation(startindent);
6293 goto mapblck_finish;
6295 else if(first ==
'{')
6300 _c4dbgp(
"mapblck[RKEY]: start child mapflow (!)");
6302 _handle_annotations_before_blck_key_scalar();
6303 m_evt_handler->begin_map_key_flow();
6305 _line_progressed(1);
6306 _set_indentation(startindent);
6307 goto mapblck_finish;
6309 else if(first ==
'-')
6311 _c4dbgp(
"mapblck[RKEY]: maybe doc?");
6312 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(rem))
6314 _c4dbgp(
"mapblck[RKEY]: end+start doc");
6315 _start_doc_suddenly();
6316 _line_progressed(3);
6317 _maybe_skip_whitespace_tokens();
6318 goto mapblck_finish;
6322 _c4err(
"parse error");
6325 else if(first ==
'.')
6327 _c4dbgp(
"mapblck[RKEY]: maybe end doc?");
6328 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(rem))
6330 _c4dbgp(
"mapblck[RKEY]: end doc");
6331 _end_doc_suddenly();
6332 _line_progressed(3);
6333 _maybe_skip_whitespace_tokens();
6334 goto mapblck_finish;
6338 _c4err(
"parse error");
6342 else if(first ==
'\t')
6344 _c4dbgp(
"mapblck[RKEY]: skip tabs");
6345 _maybe_skipchars(
'\t');
6349 _c4err(
"parse error");
6352 else if(has_any(
RKCL))
6354 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6355 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6356 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6357 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6361 if(m_evt_handler->m_curr->at_line_beginning())
6363 if(m_evt_handler->m_curr->indentation_eq())
6365 _c4dbgpf(
"mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
6366 _line_progressed(m_evt_handler->m_curr->indref);
6367 rem = m_evt_handler->m_curr->line_contents.rem;
6371 else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
6373 _c4err(
"invalid indentation");
6376 const char first = rem.str[0];
6377 _c4dbgpf(
"mapblck[RKCL]: '{}'", first);
6380 _c4dbgp(
"mapblck[RKCL]: found the colon");
6382 _line_progressed(1);
6383 _maybe_skip_whitespace_tokens();
6385 else if(first ==
'?')
6387 _c4dbgp(
"mapblck[RKCL]: got '?'. val was empty");
6388 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_was_inside_qmrk);
6389 m_evt_handler->set_val_scalar_plain_empty();
6390 m_evt_handler->add_sibling();
6392 _line_progressed(1);
6393 _maybe_skip_whitespace_tokens();
6395 else if(first ==
'-')
6397 if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6399 _c4dbgp(
"mapblck[RKCL]: end+start doc");
6400 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6401 _start_doc_suddenly();
6402 _line_progressed(3);
6403 _maybe_skip_whitespace_tokens();
6404 goto mapblck_finish;
6408 _c4err(
"parse error");
6411 else if(first ==
'.')
6413 _c4dbgp(
"mapblck[RKCL]: maybe end doc?");
6414 csubstr rs = rem.sub(1);
6415 if(rs ==
".." || rs.begins_with(
".. "))
6417 _c4dbgp(
"mapblck[RKCL]: end+start doc");
6418 _end_doc_suddenly();
6419 _line_progressed(3);
6420 goto mapblck_finish;
6424 _c4err(
"parse error");
6427 else if(m_was_inside_qmrk)
6429 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_eq());
6430 _c4dbgp(
"mapblck[RKCL]: missing :");
6431 m_evt_handler->set_val_scalar_plain_empty();
6432 m_evt_handler->add_sibling();
6433 m_was_inside_qmrk =
false;
6438 _c4err(
"parse error");
6441 else if(has_any(
RVAL))
6443 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6444 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6445 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6446 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6450 if(m_evt_handler->m_curr->at_line_beginning())
6452 _c4dbgpf(
"mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6453 m_evt_handler->m_curr->more_indented =
false;
6454 if(m_evt_handler->m_curr->indref ==
npos)
6456 _c4dbgpf(
"mapblck[RVAL]: setting indentation={}", m_evt_handler->m_parent->indref);
6457 _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6458 _line_progressed(m_evt_handler->m_curr->indref);
6459 rem = m_evt_handler->m_curr->line_contents.rem;
6463 else if(m_evt_handler->m_curr->indentation_eq())
6465 _c4dbgp(
"mapblck[RVAL]: skip indentation!");
6466 _line_progressed(m_evt_handler->m_curr->indref);
6467 rem = m_evt_handler->m_curr->line_contents.rem;
6495 else if(m_evt_handler->m_curr->indentation_gt())
6497 _c4dbgp(
"mapblck[RVAL]: more indented!");
6498 m_evt_handler->m_curr->more_indented =
true;
6499 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6500 rem = m_evt_handler->m_curr->line_contents.rem;
6504 else if(m_evt_handler->m_curr->indentation_lt())
6506 _c4dbgp(
"mapblck[RVAL]: smaller indentation!");
6507 _handle_indentation_pop_from_block_map();
6510 _c4dbgp(
"mapblck[RVAL]: still mapblck!");
6511 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6514 _c4dbgp(
"mapblck[RVAL]: speculatively expect next keyval");
6515 m_evt_handler->add_sibling();
6522 _c4dbgp(
"mapblck[RVAL]: no longer mapblck!");
6523 goto mapblck_finish;
6526 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
6528 _c4dbgp(
"mapblck[RVAL]: empty line!");
6529 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6536 const char first = rem.str[0];
6537 const size_t startline = m_evt_handler->m_curr->pos.line;
6538 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6539 _c4dbgpf(
"mapblck[RVAL]: '{}'", first);
6543 _c4dbgp(
"mapblck[RVAL]: scanning single-quoted scalar");
6544 sc = _scan_scalar_squot();
6545 if(!_maybe_scan_following_colon())
6547 _c4dbgp(
"mapblck[RVAL]: set as val");
6548 _handle_annotations_before_blck_val_scalar();
6549 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6550 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6555 if(startindent != m_evt_handler->m_curr->indref)
6557 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6558 _handle_annotations_before_start_mapblck(startline);
6561 m_evt_handler->begin_map_val_block();
6562 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6563 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6564 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6565 _maybe_skip_whitespace_tokens();
6566 _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6572 _c4dbgp(
"mapblck[RVAL]: prev val empty+this is a key");
6573 m_evt_handler->set_val_scalar_plain_empty();
6574 m_evt_handler->add_sibling();
6575 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6576 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6578 _maybe_skip_whitespace_tokens();
6582 else if(first ==
'"')
6584 _c4dbgp(
"mapblck[RVAL]: scanning double-quoted scalar");
6585 sc = _scan_scalar_dquot();
6586 if(!_maybe_scan_following_colon())
6588 _c4dbgp(
"mapblck[RVAL]: set as val");
6589 _handle_annotations_before_blck_val_scalar();
6590 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6591 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6596 if(startindent != m_evt_handler->m_curr->indref)
6598 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6599 _handle_annotations_before_start_mapblck(startline);
6602 m_evt_handler->begin_map_val_block();
6603 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6604 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6605 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6606 _maybe_skip_whitespace_tokens();
6607 _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6613 _c4dbgp(
"mapblck[RVAL]: prev val empty+this is a key");
6614 m_evt_handler->set_val_scalar_plain_empty();
6615 m_evt_handler->add_sibling();
6616 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6617 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6619 _maybe_skip_whitespace_tokens();
6625 else if(first ==
'|')
6627 _c4dbgp(
"mapblck[RVAL]: scanning block-literal scalar");
6629 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6630 _handle_annotations_before_blck_val_scalar();
6631 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6632 m_evt_handler->set_val_scalar_literal(maybe_filtered);
6635 else if(first ==
'>')
6637 _c4dbgp(
"mapblck[RVAL]: scanning block-folded scalar");
6639 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6640 _handle_annotations_before_blck_val_scalar();
6641 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6642 m_evt_handler->set_val_scalar_folded(maybe_filtered);
6645 else if(_scan_scalar_plain_map_blck(&sc))
6647 _c4dbgp(
"mapblck[RVAL]: plain scalar.");
6648 if(!_maybe_scan_following_colon())
6650 _c4dbgp(
"mapblck[RVAL]: set as val");
6651 _handle_annotations_before_blck_val_scalar();
6652 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6653 m_evt_handler->set_val_scalar_plain(maybe_filtered);
6658 if(startindent != m_evt_handler->m_curr->indref)
6660 _c4dbgpf(
"mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
6662 _handle_annotations_before_start_mapblck(startline);
6664 m_evt_handler->begin_map_val_block();
6665 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6666 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6667 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6668 _maybe_skip_whitespace_tokens();
6669 _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6675 _c4dbgp(
"mapblck[RVAL]: prev val empty+this is a key");
6676 _handle_annotations_before_blck_val_scalar();
6677 m_evt_handler->set_val_scalar_plain_empty();
6678 m_evt_handler->add_sibling();
6679 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6680 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6682 _maybe_skip_whitespace_tokens();
6686 else if(first ==
'-')
6690 _c4dbgp(
"mapblck[RVAL]: start val seqblck");
6692 _handle_annotations_before_blck_val_scalar();
6693 m_evt_handler->begin_seq_val_block();
6695 _set_indentation(startindent);
6696 _line_progressed(1);
6697 _maybe_skip_whitespace_tokens();
6698 goto mapblck_finish;
6700 else if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6702 _c4dbgp(
"mapblck[RVAL]: end+start doc");
6703 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6704 _start_doc_suddenly();
6705 _line_progressed(3);
6706 _maybe_skip_whitespace_tokens();
6707 goto mapblck_finish;
6711 _c4err(
"parse error");
6714 else if(first ==
'[')
6716 _c4dbgp(
"mapblck[RVAL]: start val seqflow");
6718 _handle_annotations_before_blck_val_scalar();
6719 m_evt_handler->begin_seq_val_flow();
6721 _set_indentation(m_evt_handler->m_curr->indref + 1u);
6722 _line_progressed(1);
6723 goto mapblck_finish;
6725 else if(first ==
'{')
6727 _c4dbgp(
"mapblck[RVAL]: start val mapflow");
6729 _handle_annotations_before_blck_val_scalar();
6730 m_evt_handler->begin_map_val_flow();
6732 m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
6733 _set_indentation(m_evt_handler->m_curr->indref + 1u);
6734 _line_progressed(1);
6735 goto mapblck_finish;
6737 else if(first ==
'*')
6739 csubstr ref = _scan_ref_map();
6740 _c4dbgpf(
"mapblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
6741 if(startindent == m_evt_handler->m_curr->indref)
6743 _c4dbgpf(
"mapblck[RVAL]: same indentation {}", startindent);
6744 m_evt_handler->set_val_ref(ref);
6749 _c4dbgpf(
"mapblck[RVAL]: larger indentation {}>{}", startindent, m_evt_handler->m_curr->indref);
6750 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref);
6751 if(_maybe_scan_following_colon())
6753 _c4dbgp(
"mapblck[RVAL]: start child map, block");
6755 _handle_annotations_before_blck_val_scalar();
6756 m_evt_handler->begin_map_val_block();
6757 m_evt_handler->set_key_ref(ref);
6758 _set_indentation(startindent);
6764 _c4dbgp(
"mapblck[RVAL]: was val ref");
6765 _handle_annotations_before_blck_val_scalar();
6766 m_evt_handler->set_val_ref(ref);
6770 _maybe_skip_whitespace_tokens();
6772 else if(first ==
'&')
6774 csubstr anchor = _scan_anchor();
6775 _c4dbgpf(
"mapblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
6776 if(startindent == m_evt_handler->m_curr->indref)
6778 _c4dbgp(
"mapblck[RVAL]: anchor for next key. val is missing!");
6779 m_evt_handler->set_val_scalar_plain_empty();
6780 m_evt_handler->add_sibling();
6785 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6787 else if(first ==
'!')
6789 csubstr tag = _scan_tag();
6790 _c4dbgpf(
"mapblck[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
6791 if(startindent == m_evt_handler->m_curr->indref)
6793 _c4dbgp(
"mapblck[RVAL]: tag for next key. val is missing!");
6794 _handle_annotations_before_blck_val_scalar();
6795 m_evt_handler->set_val_scalar_plain_empty();
6796 m_evt_handler->add_sibling();
6801 _add_annotation(&m_pending_tags, tag, startindent, startline);
6803 else if(first ==
'?')
6805 if(startindent == m_evt_handler->m_curr->indref)
6807 _c4dbgp(
"mapblck[RVAL]: got '?'. val was empty");
6808 _handle_annotations_before_blck_val_scalar();
6809 m_evt_handler->set_val_scalar_plain_empty();
6810 m_evt_handler->add_sibling();
6813 else if(startindent > m_evt_handler->m_curr->indref)
6815 _c4dbgp(
"mapblck[RVAL]: start val mapblck");
6817 _handle_annotations_before_blck_val_scalar();
6818 m_evt_handler->begin_map_val_block();
6820 _set_indentation(startindent);
6824 _c4err(
"parse error");
6826 m_was_inside_qmrk =
true;
6827 _line_progressed(1);
6828 _maybe_skip_whitespace_tokens();
6831 else if(first ==
':')
6833 if(startindent == m_evt_handler->m_curr->indref)
6835 _c4dbgp(
"mapblck[RVAL]: got ':'. val was empty, next key as well");
6836 m_evt_handler->set_val_scalar_plain_empty();
6837 m_evt_handler->add_sibling();
6838 m_evt_handler->set_key_scalar_plain_empty();
6840 else if(startindent > m_evt_handler->m_curr->indref)
6842 _c4dbgp(
"mapblck[RVAL]: start val mapblck");
6844 _handle_annotations_before_start_mapblck(startline);
6846 m_evt_handler->begin_map_val_block();
6847 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6848 m_evt_handler->set_key_scalar_plain_empty();
6849 _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6855 _c4err(
"parse error");
6857 _line_progressed(1);
6858 _maybe_skip_whitespace_tokens();
6861 else if(first ==
'.')
6863 _c4dbgp(
"mapblck[RVAL]: maybe doc?");
6864 csubstr rs = rem.sub(1);
6865 if(rs ==
".." || rs.begins_with(
".. "))
6867 _c4dbgp(
"seqblck[RVAL]: end doc expl");
6868 _end_doc_suddenly();
6869 _line_progressed(3);
6870 _maybe_skip_whitespace_tokens();
6871 goto mapblck_finish;
6875 _c4err(
"parse error");
6879 else if(first ==
'\t')
6881 _c4dbgp(
"mapblck[RVAL]: skip tabs");
6882 _maybe_skipchars(
'\t');
6886 _c4err(
"parse error");
6889 else if(has_any(
RNXT))
6891 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6892 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6893 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6894 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6898 if(m_evt_handler->m_curr->at_line_beginning())
6900 _c4dbgpf(
"mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6901 if(m_evt_handler->m_curr->indentation_eq())
6903 _c4dbgpf(
"mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6904 _line_progressed(m_evt_handler->m_curr->indref);
6905 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
6906 m_evt_handler->add_sibling();
6910 else if(m_evt_handler->m_curr->indentation_lt())
6912 _c4dbgp(
"mapblck[RNXT]: smaller indentation!");
6913 _handle_indentation_pop_from_block_map();
6916 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6919 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
6920 m_evt_handler->add_sibling();
6927 goto mapblck_finish;
6933 _c4dbgp(
"mapblck[RNXT]: NOT at line begin");
6934 if(!rem.begins_with_any(
" \t"))
6936 _c4err(
"parse error");
6941 rem = m_evt_handler->m_curr->line_contents.rem;
6944 _c4dbgp(
"seqblck[RNXT]: again");
6952 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
6953 const char first = rem.str[0];
6954 _c4dbgpf(
"mapblck[RNXT]: '{}'", _c4prc(first));
6957 if(m_evt_handler->m_curr->more_indented)
6959 _c4dbgp(
"mapblck[RNXT]: start child block map");
6960 C4_NOT_IMPLEMENTED();
6962 _line_progressed(1);
6963 _set_indentation(m_evt_handler->m_curr->scalar_col);
6964 m_evt_handler->m_curr->more_indented =
false;
6969 _c4err(
"parse error");
6972 else if(first ==
' ')
6974 _c4dbgp(
"mapblck[RNXT]: skip spaces");
6975 _maybe_skip_whitespace_tokens();
6979 _c4err(
"parse error");
6982 else if(has_any(
QMRK))
6984 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6985 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6986 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6987 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6991 if(m_evt_handler->m_curr->at_line_beginning())
6993 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation !=
npos);
6994 if(m_evt_handler->m_curr->indentation_eq())
6996 _c4dbgpf(
"mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref);
6997 _line_progressed(m_evt_handler->m_curr->indref);
6998 rem = m_evt_handler->m_curr->line_contents.rem;
7002 else if(m_evt_handler->m_curr->indentation_lt())
7004 _c4dbgp(
"mapblck[QMRK]: smaller indentation!");
7005 _handle_indentation_pop_from_block_map();
7006 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7009 _c4dbgp(
"mapblck[QMRK]: still mapblck!");
7010 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
QMRK));
7011 rem = m_evt_handler->m_curr->line_contents.rem;
7017 _c4dbgp(
"mapblck[QMRK]: no longer mapblck!");
7018 goto mapblck_finish;
7024 _c4dbgp(
"mapblck[QMRK]: larger indentation !");
7025 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7026 rem = m_evt_handler->m_curr->line_contents.rem;
7034 const char first = rem.str[0];
7035 const size_t startline = m_evt_handler->m_curr->pos.line;
7036 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
7037 _c4dbgpf(
"mapblck[QMRK]: '{}'", first);
7041 _c4dbgp(
"mapblck[QMRK]: scanning single-quoted scalar");
7042 sc = _scan_scalar_squot();
7043 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7044 if(!_maybe_scan_following_colon())
7046 _c4dbgp(
"mapblck[QMRK]: set as key");
7047 _handle_annotations_before_blck_key_scalar();
7048 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7053 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7055 _handle_annotations_before_start_mapblck_as_key();
7056 m_evt_handler->begin_map_key_block();
7057 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7058 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7059 _maybe_skip_whitespace_tokens();
7060 _set_indentation(startindent);
7065 else if(first ==
'"')
7067 _c4dbgp(
"mapblck[QMRK]: scanning double-quoted scalar");
7068 sc = _scan_scalar_dquot();
7069 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7070 if(!_maybe_scan_following_colon())
7072 _c4dbgp(
"mapblck[QMRK]: set as key");
7073 _handle_annotations_before_blck_key_scalar();
7074 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7079 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7081 _handle_annotations_before_start_mapblck_as_key();
7082 m_evt_handler->begin_map_key_block();
7083 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7084 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7085 _maybe_skip_whitespace_tokens();
7086 _set_indentation(startindent);
7091 else if(first ==
'|')
7093 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
7095 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7096 csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb);
7097 _handle_annotations_before_blck_key_scalar();
7098 m_evt_handler->set_key_scalar_literal(maybe_filtered);
7101 else if(first ==
'>')
7103 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
7105 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7106 csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb);
7107 _handle_annotations_before_blck_key_scalar();
7108 m_evt_handler->set_key_scalar_folded(maybe_filtered);
7111 else if(_scan_scalar_plain_map_blck(&sc))
7113 _c4dbgp(
"mapblck[QMRK]: plain scalar");
7114 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
7115 if(!_maybe_scan_following_colon())
7117 _c4dbgp(
"mapblck[QMRK]: set as key");
7118 _handle_annotations_before_blck_key_scalar();
7119 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7124 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7126 _handle_annotations_before_start_mapblck_as_key();
7127 m_evt_handler->begin_map_key_block();
7128 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7129 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7130 _maybe_skip_whitespace_tokens();
7131 _set_indentation(startindent);
7136 else if(first ==
':')
7138 if(startindent == m_evt_handler->m_curr->indref)
7140 _c4dbgp(
"mapblck[QMRK]: empty key");
7142 _handle_annotations_before_blck_key_scalar();
7143 m_evt_handler->set_key_scalar_plain_empty();
7144 _line_progressed(1);
7145 _maybe_skip_whitespace_tokens();
7149 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), empty key");
7151 _handle_annotations_before_start_mapblck_as_key();
7152 m_evt_handler->begin_map_key_block();
7153 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7154 m_evt_handler->set_key_scalar_plain_empty();
7155 _line_progressed(1);
7156 _maybe_skip_whitespace_tokens();
7157 _set_indentation(startindent);
7162 else if(first ==
'*')
7164 csubstr ref = _scan_ref_map();
7165 _c4dbgpf(
"mapblck[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
7166 if(!_maybe_scan_following_colon())
7168 _c4dbgp(
"mapblck[QMRK]: set ref as key");
7169 _handle_annotations_before_blck_key_scalar();
7170 m_evt_handler->set_key_ref(ref);
7175 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set ref as key");
7177 _handle_annotations_before_blck_key_scalar();
7178 m_evt_handler->begin_map_key_block();
7179 m_evt_handler->set_key_ref(ref);
7180 _set_indentation(startindent);
7184 _maybe_skip_whitespace_tokens();
7186 else if(first ==
'&')
7188 csubstr anchor = _scan_anchor();
7189 _c4dbgpf(
"mapblck[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
7190 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7192 else if(first ==
'!')
7194 csubstr tag = _scan_tag();
7195 _c4dbgpf(
"mapblck[QMRK]: key tag! [{}]~~~{}~~~", tag.len, tag);
7196 _add_annotation(&m_pending_tags, tag, startindent, startline);
7198 else if(first ==
'-')
7200 _c4dbgp(
"mapblck[QMRK]: maybe doc?");
7201 csubstr rs = rem.sub(1);
7202 if(rs ==
"--" || rs.begins_with(
"-- "))
7204 _c4dbgp(
"mapblck[QMRK]: end+start doc");
7205 _start_doc_suddenly();
7206 _line_progressed(3);
7210 _c4dbgp(
"mapblck[QMRK]: start child seqblck (!)");
7212 _handle_annotations_before_blck_key_scalar();
7213 m_evt_handler->begin_seq_key_block();
7215 _set_indentation(startindent);
7216 _line_progressed(1);
7218 _maybe_skip_whitespace_tokens();
7219 goto mapblck_finish;
7221 else if(first ==
'[')
7223 _c4dbgp(
"mapblck[QMRK]: start child seqflow (!)");
7225 m_evt_handler->begin_seq_key_flow();
7227 _set_indentation(m_evt_handler->m_parent->indref);
7228 _line_progressed(1);
7229 goto mapblck_finish;
7231 else if(first ==
'{')
7233 _c4dbgp(
"mapblck[QMRK]: start child mapblck (!)");
7235 m_evt_handler->begin_map_key_flow();
7237 _set_indentation(m_evt_handler->m_parent->indref);
7238 _line_progressed(1);
7239 goto mapblck_finish;
7241 else if(first ==
'?')
7243 _c4dbgp(
"mapblck[QMRK]: another QMRK '?'");
7244 m_evt_handler->set_key_scalar_plain_empty();
7245 m_evt_handler->set_val_scalar_plain_empty();
7246 m_evt_handler->add_sibling();
7247 _line_progressed(1);
7249 else if(first ==
'.')
7251 _c4dbgp(
"mapblck[QMRK]: maybe end doc?");
7252 csubstr rs = rem.sub(1);
7253 if(rs ==
".." || rs.begins_with(
".. "))
7255 _c4dbgp(
"mapblck[QMRK]: end+start doc");
7256 _end_doc_suddenly();
7257 _line_progressed(3);
7258 goto mapblck_finish;
7262 _c4err(
"parse error");
7267 _c4err(
"parse error");
7272 _c4dbgt(
"mapblck: again", 0);
7273 if(_finished_line())
7277 if(_finished_file())
7279 _c4dbgp(
"mapblck: file finished!");
7281 goto mapblck_finish;
7288 _c4dbgp(
"mapblck: finish");
7294 template<
class EventHandler>
7295 void ParseEngine<EventHandler>::_handle_unk_json()
7297 _c4dbgpf(
"handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7299 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP));
7300 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP));
7302 _maybe_skip_comment();
7303 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7307 size_t pos = rem.first_not_of(
" \t");
7310 pos = pos !=
npos ? pos : rem.len;
7311 _c4dbgpf(
"skipping indentation of {}", pos);
7312 _line_progressed(pos);
7313 rem = m_evt_handler->m_curr->line_contents.rem;
7316 _c4dbgpf(
"rem is now [{}]~~~{}~~~", rem.len, rem);
7319 if(rem.begins_with(
'['))
7321 _c4dbgp(
"it's a seq");
7322 m_evt_handler->check_trailing_doc_token();
7324 m_evt_handler->begin_seq_val_flow();
7326 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7327 m_doc_empty =
false;
7328 _line_progressed(1);
7330 else if(rem.begins_with(
'{'))
7332 _c4dbgp(
"it's a map");
7333 m_evt_handler->check_trailing_doc_token();
7335 m_evt_handler->begin_map_val_flow();
7337 m_doc_empty =
false;
7338 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7339 _line_progressed(1);
7341 else if(_handle_bom())
7343 _c4dbgp(
"byte order mark");
7347 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
7348 _maybe_skip_whitespace_tokens();
7349 csubstr s = m_evt_handler->m_curr->line_contents.rem;
7352 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7353 const char first = s.str[0];
7357 _c4dbgp(
"runk_json: scanning double-quoted scalar");
7358 m_evt_handler->check_trailing_doc_token();
7361 m_doc_empty =
false;
7362 sc = _scan_scalar_dquot();
7363 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7364 if(!_maybe_scan_following_colon())
7366 _c4dbgp(
"runk_json: set as val");
7367 _handle_annotations_before_blck_val_scalar();
7368 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7372 _c4err(
"parse error");
7375 else if(_scan_scalar_plain_unk(&sc))
7377 _c4dbgp(
"runk_json: got a plain scalar");
7378 m_evt_handler->check_trailing_doc_token();
7381 m_doc_empty =
false;
7382 if(!_maybe_scan_following_colon())
7384 _c4dbgp(
"runk_json: set as val");
7385 _handle_annotations_before_blck_val_scalar();
7386 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7387 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7391 _c4err(
"parse error");
7396 _c4err(
"parse error");
7404 template<
class EventHandler>
7405 void ParseEngine<EventHandler>::_handle_unk()
7407 _c4dbgpf(
"handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7409 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP));
7410 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP));
7412 _maybe_skip_comment();
7413 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7417 size_t pos = rem.first_not_of(
" \t");
7420 pos = pos !=
npos ? pos : rem.len;
7421 _c4dbgpf(
"skipping {} whitespace characters", pos);
7422 _line_progressed(pos);
7423 rem = m_evt_handler->m_curr->line_contents.rem;
7426 _c4dbgpf(
"rem is now [{}]~~~{}~~~", rem.len, rem);
7429 if(m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin())
7431 _c4dbgp(
"rtop: zero indent + at line begin");
7434 _c4dbgp(
"byte order mark!");
7435 rem = m_evt_handler->m_curr->line_contents.rem;
7439 const char first = rem.str[0];
7442 _c4dbgp(
"rtop: suspecting doc");
7443 if(_is_doc_begin_token(rem))
7445 _c4dbgp(
"rtop: begin doc");
7448 _set_indentation(0);
7450 _line_progressed(3u);
7451 _maybe_skip_whitespace_tokens();
7455 else if(first ==
'.')
7457 _c4dbgp(
"rtop: suspecting doc end");
7458 if(_is_doc_end_token(rem))
7460 _c4dbgp(
"rtop: end doc");
7467 _c4dbgp(
"rtop: ignore end doc");
7470 _line_progressed(3u);
7471 _maybe_skip_whitespace_tokens();
7475 else if(first ==
'%')
7477 _c4dbgpf(
"directive: {}", rem);
7478 if(C4_UNLIKELY(!m_doc_empty && has_none(
NDOC)))
7479 _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks,
"need document footer before directives");
7480 _handle_directive(rem);
7486 char first = rem.str[0];
7490 m_evt_handler->check_trailing_doc_token();
7492 m_doc_empty =
false;
7493 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7494 if(C4_LIKELY( ! _annotations_require_key_container()))
7496 _c4dbgp(
"it's a seq, flow");
7497 _handle_annotations_before_blck_val_scalar();
7498 m_evt_handler->begin_seq_val_flow();
7500 _set_indentation(startindent);
7504 _c4dbgp(
"start new block map, set flow seq as key (!)");
7505 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7506 m_evt_handler->begin_map_val_block();
7508 _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
7509 m_evt_handler->begin_seq_key_flow();
7511 _set_indentation(startindent);
7513 _line_progressed(1);
7515 else if(first ==
'{')
7517 m_evt_handler->check_trailing_doc_token();
7519 m_doc_empty =
false;
7520 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7521 if(C4_LIKELY( ! _annotations_require_key_container()))
7523 _c4dbgp(
"it's a map, flow");
7524 _handle_annotations_before_blck_val_scalar();
7525 m_evt_handler->begin_map_val_flow();
7527 _set_indentation(startindent);
7531 _c4dbgp(
"start new block map, set flow map as key (!)");
7532 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7533 m_evt_handler->begin_map_val_block();
7535 _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
7536 m_evt_handler->begin_map_key_flow();
7538 _set_indentation(startindent);
7540 _line_progressed(1);
7542 else if(first ==
'-' && _is_blck_token(rem))
7544 _c4dbgp(
"it's a seq, block");
7545 m_evt_handler->check_trailing_doc_token();
7547 _handle_annotations_before_blck_val_scalar();
7548 m_evt_handler->begin_seq_val_block();
7550 m_doc_empty =
false;
7551 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7552 _line_progressed(1);
7553 _maybe_skip_whitespace_tokens();
7555 else if(first ==
'?' && _is_blck_token(rem))
7557 _c4dbgp(
"it's a map + this key is complex");
7558 m_evt_handler->check_trailing_doc_token();
7560 _handle_annotations_before_blck_val_scalar();
7561 m_evt_handler->begin_map_val_block();
7563 m_doc_empty =
false;
7564 m_was_inside_qmrk =
true;
7565 _save_indentation();
7566 _line_progressed(1);
7567 _maybe_skip_whitespace_tokens();
7569 else if(first ==
':' && _is_blck_token(rem))
7573 _c4dbgp(
"it's a map with an empty key");
7574 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7575 const size_t startline = m_evt_handler->m_curr->pos.line;
7576 m_evt_handler->check_trailing_doc_token();
7578 _handle_annotations_before_start_mapblck(startline);
7580 m_evt_handler->begin_map_val_block();
7581 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7582 m_evt_handler->set_key_scalar_plain_empty();
7583 m_doc_empty =
false;
7584 _set_indentation(startindent);
7588 _c4dbgp(
"actually prev val is a key!");
7589 size_t prev_indentation = m_evt_handler->m_curr->indref;
7590 m_evt_handler->actually_val_is_first_key_of_new_map_block();
7591 _set_indentation(prev_indentation);
7594 _line_progressed(1);
7595 _maybe_skip_whitespace_tokens();
7597 else if(first ==
'&')
7599 csubstr anchor = _scan_anchor();
7600 _c4dbgpf(
"anchor! [{}]~~~{}~~~", anchor.len, anchor);
7601 m_evt_handler->check_trailing_doc_token();
7603 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7604 const size_t line = m_evt_handler->m_curr->pos.line;
7605 _add_annotation(&m_pending_anchors, anchor, indentation, line);
7606 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7607 m_doc_empty =
false;
7609 else if(first ==
'*')
7611 csubstr ref = _scan_ref_map();
7612 _c4dbgpf(
"ref! [{}]~~~{}~~~", ref.len, ref);
7613 m_evt_handler->check_trailing_doc_token();
7615 m_doc_empty =
false;
7616 if(!_maybe_scan_following_colon())
7618 _c4dbgp(
"runk: set val ref");
7619 _handle_annotations_before_blck_val_scalar();
7620 m_evt_handler->set_val_ref(ref);
7624 _c4dbgp(
"runk: start new block map, set ref as key");
7625 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7626 const size_t startline = m_evt_handler->m_curr->pos.line;
7627 _handle_annotations_before_start_mapblck(startline);
7628 m_evt_handler->begin_map_val_block();
7629 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7630 m_evt_handler->set_key_ref(ref);
7631 _maybe_skip_whitespace_tokens();
7632 _set_indentation(startindent);
7636 else if(first ==
'!')
7638 csubstr tag = _scan_tag();
7639 _c4dbgpf(
"unk: val tag! [{}]~~~{}~~~", tag.len, tag);
7642 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7643 const size_t line = m_evt_handler->m_curr->pos.line;
7644 _add_annotation(&m_pending_tags, tag, indentation, line);
7648 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
7649 _maybe_skip_whitespace_tokens();
7650 csubstr s = m_evt_handler->m_curr->line_contents.rem;
7653 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7654 const size_t startline = m_evt_handler->m_curr->pos.line;
7659 _c4dbgp(
"runk: scanning single-quoted scalar");
7660 m_evt_handler->check_trailing_doc_token();
7663 m_doc_empty =
false;
7664 sc = _scan_scalar_squot();
7665 if(!_maybe_scan_following_colon())
7667 _c4dbgp(
"runk: set as val");
7668 _handle_annotations_before_blck_val_scalar();
7669 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
7670 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
7674 _c4dbgp(
"runk: start new block map, set scalar as key");
7675 _handle_annotations_before_start_mapblck(startline);
7677 m_evt_handler->begin_map_val_block();
7678 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7679 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7680 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7681 _maybe_skip_whitespace_tokens();
7682 _set_indentation(startindent);
7686 else if(first ==
'"')
7688 _c4dbgp(
"runk: scanning double-quoted scalar");
7689 m_evt_handler->check_trailing_doc_token();
7692 m_doc_empty =
false;
7693 sc = _scan_scalar_dquot();
7694 if(!_maybe_scan_following_colon())
7696 _c4dbgp(
"runk: set as val");
7697 _handle_annotations_before_blck_val_scalar();
7698 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7699 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7703 _c4dbgp(
"runk: start new block map, set double-quoted scalar as key");
7704 _handle_annotations_before_start_mapblck(startline);
7705 m_evt_handler->begin_map_val_block();
7707 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7708 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7709 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7710 _maybe_skip_whitespace_tokens();
7711 _set_indentation(startindent);
7715 else if(first ==
'|')
7717 _c4dbgp(
"runk: scanning block-literal scalar");
7718 m_evt_handler->check_trailing_doc_token();
7721 m_doc_empty =
false;
7723 _scan_block(&sb, startindent);
7724 if(C4_LIKELY(!_maybe_scan_following_colon()))
7726 _c4dbgp(
"runk: set as val");
7727 _handle_annotations_before_blck_val_scalar();
7728 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7729 m_evt_handler->set_val_scalar_literal(maybe_filtered);
7733 _c4err(
"block literal keys must be enclosed in '?'");
7736 else if(first ==
'>')
7738 _c4dbgp(
"runk: scanning block-folded scalar");
7739 m_evt_handler->check_trailing_doc_token();
7742 m_doc_empty =
false;
7744 _scan_block(&sb, startindent);
7745 if(C4_LIKELY(!_maybe_scan_following_colon()))
7747 _c4dbgp(
"runk: set as val");
7748 _handle_annotations_before_blck_val_scalar();
7749 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7750 m_evt_handler->set_val_scalar_folded(maybe_filtered);
7754 _c4err(
"block folded keys must be enclosed in '?'");
7757 else if(_scan_scalar_plain_unk(&sc))
7759 _c4dbgp(
"runk: got a plain scalar");
7760 m_evt_handler->check_trailing_doc_token();
7763 m_doc_empty =
false;
7764 if(!_maybe_scan_following_colon())
7766 _c4dbgp(
"runk: set as val");
7767 _handle_annotations_before_blck_val_scalar();
7768 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7769 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7773 _c4dbgp(
"runk: start new block map, set scalar as key");
7774 _handle_annotations_before_start_mapblck(startline);
7776 m_evt_handler->begin_map_val_block();
7777 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7778 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7779 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7780 _maybe_skip_whitespace_tokens();
7781 _set_indentation(startindent);
7791 template<
class EventHandler>
7792 C4_COLD
void ParseEngine<EventHandler>::_handle_usty()
7794 _c4dbgpf(
"handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7796 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK|
FLOW));
7798 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
7801 _c4dbgp(
"usty[RNXT]: finishing!");
7806 _maybe_skip_comment();
7807 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7811 size_t pos = rem.first_not_of(
" \t");
7814 pos = pos !=
npos ? pos : rem.len;
7815 _c4dbgpf(
"skipping indentation of {}", pos);
7816 _line_progressed(pos);
7817 rem = m_evt_handler->m_curr->line_contents.rem;
7820 _c4dbgpf(
"rem is now [{}]~~~{}~~~", rem.len, rem);
7823 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
7824 size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7825 char first = rem.str[0];
7828 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
RMAP));
7829 _c4dbgpf(
"usty[RSEQ]: first='{}'", _c4prc(first));
7832 _c4dbgp(
"usty[RSEQ]: it's a flow seq. merging it");
7834 m_evt_handler->_push();
7836 _set_indentation(startindent);
7837 _line_progressed(1);
7838 _maybe_skip_whitespace_tokens();
7840 else if(first ==
'-' && _is_blck_token(rem))
7842 _c4dbgp(
"usty[RSEQ]: it's a block seq. merging it");
7844 m_evt_handler->_push();
7846 _set_indentation(startindent);
7847 _line_progressed(1);
7848 _maybe_skip_whitespace_tokens();
7852 _c4err(
"can only parse a seq into an existing seq");
7855 else if(has_any(
RMAP))
7857 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ));
7858 _c4dbgpf(
"usty[RMAP]: first='{}'", _c4prc(first));
7861 _c4dbgp(
"usty[RMAP]: it's a flow map. merging it");
7863 _handle_annotations_before_blck_val_scalar();
7864 m_evt_handler->_push();
7866 _set_indentation(startindent);
7867 _line_progressed(1);
7868 _maybe_skip_whitespace_tokens();
7870 else if(first ==
'?' && _is_blck_token(rem))
7872 _c4dbgp(
"usty[RMAP]: it's a block map + this key is complex");
7874 _handle_annotations_before_blck_val_scalar();
7875 m_evt_handler->_push();
7877 m_was_inside_qmrk =
true;
7878 _save_indentation();
7879 _line_progressed(1);
7880 _maybe_skip_whitespace_tokens();
7882 else if(first ==
':' && _is_blck_token(rem))
7884 _c4dbgp(
"usty[RMAP]: it's a map with an empty key");
7886 _handle_annotations_before_blck_val_scalar();
7887 m_evt_handler->_push();
7888 m_evt_handler->set_key_scalar_plain_empty();
7890 _save_indentation();
7891 _line_progressed(1);
7892 _maybe_skip_whitespace_tokens();
7894 else if(rem.begins_with(
'&'))
7896 csubstr anchor = _scan_anchor();
7897 _c4dbgpf(
"usty[RMAP]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
7898 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7899 const size_t line = m_evt_handler->m_curr->pos.line;
7900 _add_annotation(&m_pending_anchors, anchor, indentation, line);
7901 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7903 else if(first ==
'*')
7905 csubstr ref = _scan_ref_map();
7906 _c4dbgpf(
"usty[RMAP]: ref! [{}]~~~{}~~~", ref.len, ref);
7907 if(!_maybe_scan_following_colon())
7909 _c4err(
"cannot read a VAL to a map");
7913 _c4dbgp(
"usty[RMAP]: start new block map, set ref as key");
7914 const size_t startline = m_evt_handler->m_curr->pos.line;
7916 _handle_annotations_before_start_mapblck(startline);
7917 m_evt_handler->_push();
7918 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7919 m_evt_handler->set_key_ref(ref);
7920 _maybe_skip_whitespace_tokens();
7921 _set_indentation(startindent);
7925 else if(first ==
'!')
7927 csubstr tag = _scan_tag();
7928 _c4dbgpf(
"usty[RMAP]: val tag! [{}]~~~{}~~~", tag.len, tag);
7931 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7932 const size_t line = m_evt_handler->m_curr->pos.line;
7933 _add_annotation(&m_pending_tags, tag, indentation, line);
7935 else if(first ==
'[' || (first ==
'-' && _is_blck_token(rem)))
7937 _c4err(
"cannot parse a seq into an existing map");
7941 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
7942 startindent = m_evt_handler->m_curr->line_contents.indentation;
7943 const size_t startline = m_evt_handler->m_curr->pos.line;
7945 _c4dbgpf(
"usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
7948 _c4dbgp(
"usty[RMAP]: scanning single-quoted scalar");
7949 sc = _scan_scalar_squot();
7950 if(!_maybe_scan_following_colon())
7952 _c4err(
"cannot read a VAL to a map");
7956 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
7958 _handle_annotations_before_start_mapblck(startline);
7959 m_evt_handler->_push();
7960 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7961 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7962 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7963 _set_indentation(startindent);
7965 _maybe_skip_whitespace_tokens();
7968 else if(first ==
'"')
7970 _c4dbgp(
"usty[RMAP]: scanning double-quoted scalar");
7971 sc = _scan_scalar_dquot();
7972 if(!_maybe_scan_following_colon())
7974 _c4err(
"cannot read a VAL to a map");
7978 _c4dbgp(
"usty[RMAP]: start new block map, set double-quoted scalar as key");
7980 _handle_annotations_before_start_mapblck(startline);
7981 m_evt_handler->_push();
7982 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7983 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7984 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7985 _set_indentation(startindent);
7987 _maybe_skip_whitespace_tokens();
7990 else if(first ==
'|')
7992 _c4err(
"block literal keys must be enclosed in '?'");
7994 else if(first ==
'>')
7996 _c4err(
"block literal keys must be enclosed in '?'");
7998 else if(_scan_scalar_plain_unk(&sc))
8000 _c4dbgp(
"usty[RMAP]: got a plain scalar");
8001 if(!_maybe_scan_following_colon())
8003 _c4err(
"cannot read a VAL to a map");
8007 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
8009 _handle_annotations_before_start_mapblck(startline);
8010 m_evt_handler->_push();
8011 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8012 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8013 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8014 _set_indentation(startindent);
8016 _maybe_skip_whitespace_tokens();
8021 _c4err(
"parse error");
8027 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ));
8028 _c4dbgpf(
"usty[UNK]: first='{}'", _c4prc(first));
8031 _c4dbgp(
"usty[UNK]: it's a flow seq");
8033 _handle_annotations_before_blck_val_scalar();
8034 m_evt_handler->begin_seq_val_flow();
8036 _set_indentation(startindent);
8037 _line_progressed(1);
8038 _maybe_skip_whitespace_tokens();
8040 else if(first ==
'-' && _is_blck_token(rem))
8042 _c4dbgp(
"usty[UNK]: it's a block seq");
8044 _handle_annotations_before_blck_val_scalar();
8045 m_evt_handler->begin_seq_val_block();
8047 _set_indentation(startindent);
8048 _line_progressed(1);
8049 _maybe_skip_whitespace_tokens();
8051 else if(first ==
'{')
8053 _c4dbgp(
"usty[UNK]: it's a flow map");
8055 _handle_annotations_before_blck_val_scalar();
8056 m_evt_handler->begin_map_val_flow();
8058 _set_indentation(startindent);
8059 _line_progressed(1);
8060 _maybe_skip_whitespace_tokens();
8062 else if(first ==
'?' && _is_blck_token(rem))
8064 _c4dbgp(
"usty[UNK]: it's a map + this key is complex");
8066 _handle_annotations_before_blck_val_scalar();
8067 m_evt_handler->begin_map_val_block();
8069 m_was_inside_qmrk =
true;
8070 _save_indentation();
8071 _line_progressed(1);
8072 _maybe_skip_whitespace_tokens();
8074 else if(first ==
':' && _is_blck_token(rem))
8076 _c4dbgp(
"usty[UNK]: it's a map with an empty key");
8078 _handle_annotations_before_blck_val_scalar();
8079 m_evt_handler->begin_map_val_block();
8080 m_evt_handler->set_key_scalar_plain_empty();
8082 _save_indentation();
8083 _line_progressed(1);
8084 _maybe_skip_whitespace_tokens();
8086 else if(first ==
'&')
8088 csubstr anchor = _scan_anchor();
8089 _c4dbgpf(
"usty[UNK]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
8090 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8091 const size_t line = m_evt_handler->m_curr->pos.line;
8092 _add_annotation(&m_pending_anchors, anchor, indentation, line);
8093 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8095 else if(first ==
'*')
8097 csubstr ref = _scan_ref_map();
8098 _c4dbgpf(
"usty[UNK]: ref! [{}]~~~{}~~~", ref.len, ref);
8099 if(!_maybe_scan_following_colon())
8101 _c4dbgp(
"usty[UNK]: set val ref");
8102 _handle_annotations_before_blck_val_scalar();
8103 m_evt_handler->set_val_ref(ref);
8107 _c4dbgp(
"usty[UNK]: start new block map, set ref as key");
8108 const size_t startline = m_evt_handler->m_curr->pos.line;
8110 _handle_annotations_before_start_mapblck(startline);
8111 m_evt_handler->begin_map_val_block();
8112 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8113 m_evt_handler->set_key_ref(ref);
8114 _maybe_skip_whitespace_tokens();
8115 _set_indentation(startindent);
8119 else if(first ==
'!')
8121 csubstr tag = _scan_tag();
8122 _c4dbgpf(
"usty[UNK]: val tag! [{}]~~~{}~~~", tag.len, tag);
8125 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8126 const size_t line = m_evt_handler->m_curr->pos.line;
8127 _add_annotation(&m_pending_tags, tag, indentation, line);
8131 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
8132 startindent = m_evt_handler->m_curr->line_contents.indentation;
8133 const size_t startline = m_evt_handler->m_curr->pos.line;
8136 _c4dbgpf(
"usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
8139 _c4dbgp(
"usty[UNK]: scanning single-quoted scalar");
8140 sc = _scan_scalar_squot();
8141 if(!_maybe_scan_following_colon())
8143 _c4dbgp(
"usty[UNK]: set as val");
8144 _handle_annotations_before_blck_val_scalar();
8145 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8146 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8151 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
8153 _handle_annotations_before_start_mapblck(startline);
8154 m_evt_handler->begin_map_val_block();
8155 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8156 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8157 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8158 _set_indentation(startindent);
8160 _maybe_skip_whitespace_tokens();
8163 else if(first ==
'"')
8165 _c4dbgp(
"usty[UNK]: scanning double-quoted scalar");
8166 sc = _scan_scalar_dquot();
8167 if(!_maybe_scan_following_colon())
8169 _c4dbgp(
"usty[UNK]: set as val");
8170 _handle_annotations_before_blck_val_scalar();
8171 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8172 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8177 _c4dbgp(
"usty[UNK]: start new block map, set double-quoted scalar as key");
8179 _handle_annotations_before_start_mapblck(startline);
8180 m_evt_handler->begin_map_val_block();
8181 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8182 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8183 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8184 _set_indentation(startindent);
8186 _maybe_skip_whitespace_tokens();
8189 else if(first ==
'|')
8191 _c4dbgp(
"usty[UNK]: scanning block-literal scalar");
8193 _scan_block(&sb, startindent);
8194 _c4dbgp(
"usty[UNK]: set as val");
8195 _handle_annotations_before_blck_val_scalar();
8196 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8197 m_evt_handler->set_val_scalar_literal(maybe_filtered);
8200 else if(first ==
'>')
8202 _c4dbgp(
"usty[UNK]: scanning block-folded scalar");
8204 _scan_block(&sb, startindent);
8205 _c4dbgp(
"usty[UNK]: set as val");
8206 _handle_annotations_before_blck_val_scalar();
8207 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8208 m_evt_handler->set_val_scalar_folded(maybe_filtered);
8211 else if(_scan_scalar_plain_unk(&sc))
8213 _c4dbgp(
"usty[UNK]: got a plain scalar");
8214 if(!_maybe_scan_following_colon())
8216 _c4dbgp(
"usty[UNK]: set as val");
8217 _handle_annotations_before_blck_val_scalar();
8218 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8219 m_evt_handler->set_val_scalar_plain(maybe_filtered);
8224 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
8226 _handle_annotations_before_start_mapblck(startline);
8227 m_evt_handler->begin_map_val_block();
8228 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8229 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8230 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8231 _set_indentation(startindent);
8233 _maybe_skip_whitespace_tokens();
8238 _c4err(
"parse error");
8247 template<
class EventHandler>
8250 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8254 m_evt_handler->start_parse(filename.str, &_s_relocate_arena,
this);
8255 m_evt_handler->begin_stream();
8256 while( ! _finished_file())
8259 while( ! _finished_line())
8262 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8267 else if(has_any(
RMAP))
8271 else if(has_any(
RUNK))
8277 _c4err(
"internal error");
8280 if(_finished_file())
8285 m_evt_handler->finish_parse();
8291 template<
class EventHandler>
8294 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8298 m_evt_handler->start_parse(filename.str, &_s_relocate_arena,
this);
8299 m_evt_handler->begin_stream();
8300 while( ! _finished_file())
8303 while( ! _finished_line())
8306 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8317 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
8326 else if(has_any(
BLCK))
8330 _handle_seq_block();
8334 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
8335 _handle_map_block();
8338 else if(has_any(
RUNK))
8342 else if(has_any(
USTY))
8348 _c4err(
"internal error");
8351 if(_finished_file())
8356 m_evt_handler->finish_parse();
8365 #undef _c4dbgnextline
8367 #if defined(_MSC_VER)
8368 # pragma warning(pop)
8369 #elif defined(__clang__)
8370 # pragma clang diagnostic pop
8371 #elif defined(__GNUC__)
8372 # pragma GCC diagnostic pop
Lightweight generic type-safe wrappers for converting individual values to/from strings.
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
Location location(Tree const &tree, id_type node_id) const
Get the location of a node of the last tree to be parsed by this parser.
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&) noexcept
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
#define RYML_ERRMSG_SIZE
size for the error message buffer
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
@ NOTYPE
no node type or style is set
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
size_t to_chars(substr buf, uint8_t v) noexcept
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
@ npos
a null string position
size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept
@ RTOP
reading at top level
@ BLCK
reading in block mode
@ RSET
the (implicit) map being read is a !!set.
@ RNXT
read next val or keyval
@ FLOW
reading is inside explicit flow chars: [] or {}
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a scalar as key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ QSCL
stored scalar was quoted
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a scalar as val
int ParserFlag_t
data type for ParserState_e
#define _RYML_WITHOUT_TAB_TOKENS(...)
#define _ryml_relocate(s)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _RYML_WITH_TAB_TOKENS(...)
Options to give to the parser to control its behavior.
utilities for UTF and Byte Order Mark