1 #ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2 #define _C4_YML_PARSE_ENGINE_DEF_HPP_
5 #include "c4/error.hpp"
12 #include "c4/yml/detail/parser_dbg.hpp"
15 #include "c4/yml/detail/print.hpp"
19 #if defined(RYML_WITH_TAB_TOKENS)
20 #define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
21 #define _RYML_WITHOUT_TAB_TOKENS(...)
22 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
24 #define _RYML_WITH_TAB_TOKENS(...)
25 #define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
26 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
31 #define _c4dbgnextline() \
33 _c4dbgq("\n-----------"); \
34 _c4dbgt("handling line={}, offset={}B", \
35 m_evt_handler->m_curr->pos.line, \
36 m_evt_handler->m_curr->pos.offset); \
41 # pragma warning(push)
42 # pragma warning(disable: 4296)
43 # pragma warning(disable: 4702)
44 #elif defined(__clang__)
45 # pragma clang diagnostic push
46 # pragma clang diagnostic ignored "-Wtype-limits"
47 # pragma clang diagnostic ignored "-Wformat-nonliteral"
48 # pragma clang diagnostic ignored "-Wold-style-cast"
49 #elif defined(__GNUC__)
50 # pragma GCC diagnostic push
51 # pragma GCC diagnostic ignored "-Wtype-limits"
52 # pragma GCC diagnostic ignored "-Wformat-nonliteral"
53 # pragma GCC diagnostic ignored "-Wold-style-cast"
55 # pragma GCC diagnostic ignored "-Wduplicated-branches"
64 C4_HOT C4_ALWAYS_INLINE
bool _is_blck_token(csubstr s) noexcept
66 RYML_ASSERT(s.len > 0);
67 RYML_ASSERT(s.str[0] ==
'-' || s.str[0] ==
':' || s.str[0] ==
'?');
71 inline bool _is_doc_begin_token(csubstr s)
73 RYML_ASSERT(s.begins_with(
'-'));
74 RYML_ASSERT(!s.ends_with(
"\n"));
75 RYML_ASSERT(!s.ends_with(
"\r"));
76 return (s.len >= 3 && s.str[1] ==
'-' && s.str[2] ==
'-')
80 inline bool _is_doc_end_token(csubstr s)
82 RYML_ASSERT(s.begins_with(
'.'));
83 RYML_ASSERT(!s.ends_with(
"\n"));
84 RYML_ASSERT(!s.ends_with(
"\r"));
85 return (s.len >= 3 && s.str[1] ==
'.' && s.str[2] ==
'.')
89 inline bool _is_doc_token(csubstr s) noexcept
109 return (s.str[1] ==
'-' && s.str[2] ==
'-')
113 return (s.str[1] ==
'.' && s.str[2] ==
'.')
120 inline size_t _is_special_json_scalar(csubstr s)
126 if(s.len >= 5 && s.begins_with(
"false"))
130 if(s.len >= 4 && s.begins_with(
"true"))
134 if(s.len >= 4 && s.begins_with(
"null"))
144 C4_ALWAYS_INLINE
size_t _extend_from_combined_newline(
char nl,
char following)
146 return (nl ==
'\n' && following ==
'\r') || (nl ==
'\r' && following ==
'\n');
150 inline substr from_next_line(substr rem)
152 size_t nlpos = rem.first_of(
"\r\n");
155 const char nl = rem[nlpos];
156 rem = rem.right_of(nlpos);
159 if(_extend_from_combined_newline(nl, rem.front()))
167 inline size_t _count_following_newlines(csubstr r,
size_t *C4_RESTRICT i)
169 RYML_ASSERT(r[*i] ==
'\n');
170 size_t numnl_following = 0;
172 for( ; *i < r.len; ++(*i))
174 if(r.str[*i] ==
'\n')
177 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
182 return numnl_following;
187 inline size_t _count_following_newlines(csubstr r,
size_t *C4_RESTRICT i,
size_t indentation)
189 RYML_ASSERT(r[*i] ==
'\n');
190 size_t numnl_following = 0;
194 for( ; *i < r.len; ++(*i))
196 if(r.str[*i] ==
'\n')
199 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
207 for( ; *i < r.len; ++(*i))
209 if(r.str[*i] ==
'\n')
213 size_t stop = *i + indentation;
214 for( ; *i < r.len; ++(*i))
216 if(r.str[*i] !=
' ' && r.str[*i] !=
'\r')
218 RYML_ASSERT(*i < stop);
223 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
229 return numnl_following;
239 template<
class EventHandler>
246 template<
class EventHandler>
251 , m_evt_handler(evt_handler)
252 , m_pending_anchors()
254 , m_newline_offsets()
255 , m_newline_offsets_size(0)
256 , m_newline_offsets_capacity(0)
257 , m_newline_offsets_buf()
259 RYML_CHECK(evt_handler);
262 template<
class EventHandler>
264 : m_options(that.m_options)
265 , m_file(that.m_file)
267 , m_evt_handler(that.m_evt_handler)
268 , m_pending_anchors(that.m_pending_anchors)
269 , m_pending_tags(that.m_pending_tags)
270 , m_newline_offsets(that.m_newline_offsets)
271 , m_newline_offsets_size(that.m_newline_offsets_size)
272 , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
273 , m_newline_offsets_buf(that.m_newline_offsets_buf)
278 template<
class EventHandler>
280 : m_options(that.m_options)
281 , m_file(that.m_file)
283 , m_evt_handler(that.m_evt_handler)
284 , m_pending_anchors(that.m_pending_anchors)
285 , m_pending_tags(that.m_pending_tags)
286 , m_newline_offsets()
287 , m_newline_offsets_size()
288 , m_newline_offsets_capacity()
289 , m_newline_offsets_buf()
291 if(that.m_newline_offsets_capacity)
293 _resize_locations(that.m_newline_offsets_capacity);
294 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
295 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size *
sizeof(
size_t));
296 m_newline_offsets_size = that.m_newline_offsets_size;
300 template<
class EventHandler>
304 m_options = (that.m_options);
305 m_file = (that.m_file);
306 m_buf = (that.m_buf);
307 m_evt_handler = that.m_evt_handler;
308 m_pending_anchors = that.m_pending_anchors;
309 m_pending_tags = that.m_pending_tags;
310 m_newline_offsets = (that.m_newline_offsets);
311 m_newline_offsets_size = (that.m_newline_offsets_size);
312 m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
313 m_newline_offsets_buf = (that.m_newline_offsets_buf);
318 template<
class EventHandler>
322 m_options = (that.m_options);
323 m_file = (that.m_file);
324 m_buf = (that.m_buf);
325 m_evt_handler = that.m_evt_handler;
326 m_pending_anchors = that.m_pending_anchors;
327 m_pending_tags = that.m_pending_tags;
328 if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
329 _resize_locations(that.m_newline_offsets_capacity);
330 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
331 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
332 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size *
sizeof(
size_t));
333 m_newline_offsets_size = that.m_newline_offsets_size;
334 m_newline_offsets_buf = that.m_newline_offsets_buf;
338 template<
class EventHandler>
345 m_pending_anchors = {};
347 m_newline_offsets = {};
348 m_newline_offsets_size = {};
349 m_newline_offsets_capacity = {};
350 m_newline_offsets_buf = {};
353 template<
class EventHandler>
354 void ParseEngine<EventHandler>::_free()
356 if(m_newline_offsets)
358 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
359 m_newline_offsets =
nullptr;
360 m_newline_offsets_size = 0u;
361 m_newline_offsets_capacity = 0u;
362 m_newline_offsets_buf = 0u;
369 template<
class EventHandler>
370 void ParseEngine<EventHandler>::_reset()
372 m_pending_anchors = {};
374 if(m_options.locations())
376 _prepare_locations();
378 m_was_inside_qmrk =
false;
384 template<
class EventHandler>
385 void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena)
387 #define _ryml_relocate(s) \
388 if(s.is_sub(prev_arena)) \
390 s.str = next_arena.str + (s.str - prev_arena.str); \
394 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
396 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
398 #undef _ryml_relocate
401 template<
class EventHandler>
402 void ParseEngine<EventHandler>::_s_relocate_arena(
void* data, csubstr prev_arena, substr next_arena)
404 ((ParseEngine*)data)->_relocate_arena(prev_arena, next_arena);
410 template<
class EventHandler>
411 template<
class DumpFn>
412 void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn)
const
414 auto const *
const C4_RESTRICT st = m_evt_handler->m_curr;
415 auto const& lc = st->line_contents;
416 csubstr contents = lc.stripped;
420 size_t offs = 3u +
to_chars(substr{}, st->pos.line) +
to_chars(substr{}, st->pos.col);
423 detail::_dump(dumpfn,
"{}:", m_file);
424 offs += m_file.len + 1;
426 detail::_dump(dumpfn,
"{}:{}: ", st->pos.line, st->pos.col);
427 csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
428 csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr(
"..."));
429 detail::_dump(dumpfn,
"{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len);
431 size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin());
432 size_t lastcol = firstcol + lc.rem.len;
433 for(
size_t i = 0; i < offs + firstcol; ++i)
436 for(
size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i)
438 detail::_dump(dumpfn,
"{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
449 detail::_dump(dumpfn,
"top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
457 template<
class EventHandler>
458 template<
class ...Args>
459 void ParseEngine<EventHandler>::_err(csubstr fmt, Args
const& C4_RESTRICT ...args)
const
462 detail::_SubstrWriter writer(errmsg);
463 auto dumpfn = [&writer](csubstr s){ writer.append(s); };
464 detail::_dump(dumpfn, fmt, args...);
468 m_evt_handler->cancel_parse();
469 m_evt_handler->m_stack.m_callbacks.m_error(errmsg, len, m_evt_handler->m_curr->pos, m_evt_handler->m_stack.m_callbacks.m_user_data);
475 template<
class EventHandler>
476 template<
class ...Args>
477 void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args
const& C4_RESTRICT ...args)
const
481 auto dumpfn = [](csubstr s){
if(s.str) fwrite(s.str, 1, s.len, stdout); };
482 detail::_dump(dumpfn, fmt, args...);
491 template<
class EventHandler>
492 bool ParseEngine<EventHandler>::_finished_file()
const
494 bool ret = m_evt_handler->m_curr->pos.offset >= m_buf.len;
497 _c4dbgp(
"finished file!!!");
502 template<
class EventHandler>
503 C4_HOT C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_finished_line()
const
505 return m_evt_handler->m_curr->line_contents.rem.empty();
511 template<
class EventHandler>
512 void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
514 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
520 _c4dbgpf(
"skip {} whitespace characters", pos);
521 _line_progressed(pos);
525 template<
class EventHandler>
526 void ParseEngine<EventHandler>::_maybe_skipchars(
char c)
528 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
529 if(rem.len && rem.str[0] == c)
531 size_t pos = rem.first_not_of(c);
534 _c4dbgpf(
"skip {}x'{}'", pos, c);
535 _line_progressed(pos);
539 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
540 template<
class EventHandler>
541 void ParseEngine<EventHandler>::_maybe_skipchars_up_to(
char c,
size_t max_to_skip)
543 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
544 if(rem.len && rem.str[0] == c)
546 size_t pos = rem.first_not_of(c);
549 if(pos > max_to_skip)
551 _c4dbgpf(
"skip {}x'{}'", pos, c);
552 _line_progressed(pos);
557 template<
class EventHandler>
559 void ParseEngine<EventHandler>::_skipchars(
const char (&chars)[N])
561 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars));
562 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
564 pos = m_evt_handler->m_curr->line_contents.rem.len;
565 _c4dbgpf(
"skip {} characters", pos);
566 _line_progressed(pos);
569 template<
class EventHandler>
570 void ParseEngine<EventHandler>::_skip_comment()
572 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'));
573 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full));
574 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
575 csubstr full = m_evt_handler->m_curr->line_contents.full;
577 if(!full.begins_with(
'#'))
579 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str > full.str);
580 const char c = full[(size_t)(rem.str - full.str - 1)];
581 if(C4_UNLIKELY(c !=
' ' && c !=
'\t'))
582 _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks,
"comment not preceded by whitespace");
586 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str == full.str);
588 _c4dbgpf(
"comment was '{}'", rem);
589 _line_progressed(rem.len);
592 template<
class EventHandler>
593 void ParseEngine<EventHandler>::_maybe_skip_comment()
595 csubstr s = m_evt_handler->m_curr->line_contents.rem.triml(
' ');
596 if(s.begins_with(
'#'))
598 _line_progressed((
size_t)(s.str - m_evt_handler->m_curr->line_contents.rem.str));
603 template<
class EventHandler>
604 bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
606 if(m_evt_handler->m_curr->line_contents.rem.len)
608 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
610 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
612 pos = m_evt_handler->m_curr->line_contents.rem.len;
613 _c4dbgpf(
"skip {}x'{}'", pos,
' ');
614 _line_progressed(pos);
616 if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] ==
':'))
618 _c4dbgp(
"found ':' colon next");
626 template<
class EventHandler>
627 bool ParseEngine<EventHandler>::_maybe_scan_following_comma() noexcept
629 if(m_evt_handler->m_curr->line_contents.rem.len)
631 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
633 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
635 pos = m_evt_handler->m_curr->line_contents.rem.len;
636 _c4dbgpf(
"skip {}x'{}'", pos,
' ');
637 _line_progressed(pos);
639 if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] ==
','))
641 _c4dbgp(
"found ',' comma next");
652 template<
class EventHandler>
653 csubstr ParseEngine<EventHandler>::_scan_anchor()
655 csubstr s = m_evt_handler->m_curr->line_contents.rem;
656 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'&'));
657 csubstr anchor = s.range(1, s.first_of(
' '));
658 _line_progressed(1u + anchor.len);
659 _maybe_skipchars(
' ');
663 template<
class EventHandler>
664 csubstr ParseEngine<EventHandler>::_scan_ref_seq()
666 csubstr s = m_evt_handler->m_curr->line_contents.rem;
667 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'));
668 csubstr ref = s.first(s.first_of(
",] :"));
669 _line_progressed(ref.len);
673 template<
class EventHandler>
674 csubstr ParseEngine<EventHandler>::_scan_ref_map()
676 csubstr s = m_evt_handler->m_curr->line_contents.rem;
677 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'));
678 csubstr ref = s.first(s.first_of(
",} "));
679 _line_progressed(ref.len);
683 template<
class EventHandler>
684 csubstr ParseEngine<EventHandler>::_scan_tag()
686 csubstr rem = m_evt_handler->m_curr->line_contents.rem.triml(
' ');
687 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with(
'!'));
689 if(rem.begins_with(
"!!"))
691 _c4dbgp(
"begins with '!!'");
693 t = rem.left_of(rem.first_of(
" ,"));
695 t = rem.left_of(rem.first_of(
' '));
697 else if(rem.begins_with(
"!<"))
699 _c4dbgp(
"begins with '!<'");
700 t = rem.left_of(rem.first_of(
'>'),
true);
702 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
703 else if(rem.begins_with(
"!h!"))
705 _c4dbgp(
"begins with '!h!'");
706 t = rem.left_of(rem.first_of(
' '));
711 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with(
'!'));
712 _c4dbgp(
"begins with '!'");
714 t = rem.left_of(rem.first_of(
" ,"));
716 t = rem.left_of(rem.first_of(
' '));
718 _line_progressed(t.len);
719 _maybe_skip_whitespace_tokens();
726 template<
class EventHandler>
727 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
729 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.empty());
745 _c4dbgpf(
"not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
759 _c4err(
"invalid token \":{}\"", _c4prc(s.str[1]));
766 _c4dbgpf(
"not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
788 _c4dbgpf(
"not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
794 _c4err(
"invalid token \"?{}\"", _c4prc(s.str[1]));
813 template<
class EventHandler>
814 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
816 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP));
817 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK));
818 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ|
RSEQIMAP));
819 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
FLOW));
820 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL));
822 substr s = m_evt_handler->m_curr->line_contents.rem;
823 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
824 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
'\n'));
829 if(!_is_valid_start_scalar_plain_flow(s))
832 _c4dbgp(
"scanning seqflow scalar...");
834 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
835 bool needs_filter =
false;
838 _c4dbgpf(
"scanning scalar: curr line=[{}]~~~{}~~~", s.len, s);
839 for(
size_t i = 0; i < s.len; ++i)
841 const char c = s.str[i];
845 _c4dbgpf(
"found terminating character at {}: '{}'", i, c);
847 if(m_evt_handler->m_curr->pos.offset + i > start_offset)
853 _c4dbgp(
"at the beginning. no scalar here.");
858 _c4dbgpf(
"found terminating character at {}: '{}'", i, c);
863 _c4dbgp(
"found suspicious '#'");
866 _c4dbgpf(
"found terminating character at {}: '{}'", i, c);
872 _c4dbgp(
"found suspicious ':'");
875 const char next = s.str[i+1];
876 _c4dbgpf(
"next char is '{}'", _c4prc(next));
879 _c4dbgp(
"map starting!");
880 if(m_evt_handler->m_curr->pos.offset + i > start_offset)
882 _c4dbgp(
"scalar finished!");
888 _c4dbgp(
"at the beginning. no scalar here.");
894 _c4dbgp(
"it's a scalar indeed.");
898 else if(s.len == i+1)
900 _c4dbgp(
"':' at line end. map starting!");
908 _c4err(
"invalid character: '{}'", c);
913 _line_progressed(s.len);
914 if(!_finished_file())
916 _c4dbgp(
"next line!");
922 _c4dbgp(
"file finished!");
925 s = m_evt_handler->m_curr->line_contents.rem;
932 sc->needs_filter = needs_filter;
934 _c4prscalar(
"scanned plain scalar", sc->scalar,
true);
939 template<
class EventHandler>
940 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
942 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ) || has_any(
RSEQIMAP));
943 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK));
944 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP|
RSEQIMAP));
945 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
FLOW));
946 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK));
948 substr s = m_evt_handler->m_curr->line_contents.rem;
949 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
954 if(!_is_valid_start_scalar_plain_flow(s))
957 _c4dbgp(
"scanning scalar...");
959 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
960 bool needs_filter =
false;
963 for(
size_t i = 0; i < s.len; ++i)
965 const char c = s.str[i];
971 _c4dbgpf(
"found terminating character: '{}'", c);
974 if(s.len == i+1 || s.str[i+1] ==
' ' || s.str[i+1] ==
',' || s.str[i+1] ==
'}' _RYML_WITH_TAB_TOKENS(|| s.str[i+1] ==
'\t'))
977 _c4dbgpf(
"found terminating character: '{}'", c);
984 _c4err(
"invalid character: '{}'", c);
991 _c4err(
"invalid character: '{}'", c);
997 _c4dbgpf(
"found terminating character: '{}'", c);
1005 _c4dbgp(
"next line!");
1006 _line_progressed(s.len);
1007 if(!_finished_file())
1009 _c4dbgp(
"next line!");
1015 _c4dbgp(
"file finished!");
1018 s = m_evt_handler->m_curr->line_contents.rem;
1019 needs_filter =
true;
1025 sc->needs_filter = needs_filter;
1027 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1032 template<
class EventHandler>
1033 bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1035 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP));
1036 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK));
1037 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ));
1038 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
FLOW));
1040 substr s = m_evt_handler->m_curr->line_contents.rem;
1041 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
1046 _c4dbgp(
"scanning scalar...");
1053 _c4dbgp(
"not a scalar.");
1058 const size_t len = _is_special_json_scalar(s);
1061 sc->scalar = s.first(len);
1062 sc->needs_filter =
false;
1063 _c4dbgpf(
"special json scalar: '{}'", sc->scalar);
1064 _line_progressed(len);
1071 for( ; i < s.len; ++i)
1073 const char c = s.str[i];
1080 _c4dbgpf(
"found terminating character: '{}'", c);
1083 if(!i || s.str[i-1] ==
' ')
1085 _c4dbgpf(
"found terminating character: '{}'", c);
1096 if(C4_LIKELY(i > 0))
1098 _line_progressed(i);
1099 sc->scalar = s.first(i);
1100 sc->needs_filter =
false;
1101 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1108 template<
class EventHandler>
1109 bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1111 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ));
1112 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK));
1113 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP));
1114 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
FLOW));
1115 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL));
1117 substr s = m_evt_handler->m_curr->line_contents.rem;
1118 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
1123 _c4dbgp(
"scanning scalar...");
1126 const size_t len = _is_special_json_scalar(s);
1129 sc->scalar = s.first(len);
1130 sc->needs_filter =
false;
1131 _c4dbgpf(
"special json scalar: '{}'", sc->scalar);
1132 _line_progressed(len);
1139 for( ; i < s.len; ++i)
1141 const char c = s.str[i];
1148 _c4dbgpf(
"found terminating character: '{}'", c);
1151 if(!i || s.str[i-1] ==
' ')
1153 _c4dbgpf(
"found terminating character: '{}'", c);
1164 if(C4_LIKELY(i > 0))
1166 _line_progressed(i);
1167 sc->scalar = s.first(i);
1168 sc->needs_filter =
false;
1169 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1176 template<
class EventHandler>
1177 bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1179 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] ==
'-');
1180 return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_begin_token(s));
1183 template<
class EventHandler>
1184 bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1186 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] ==
'.');
1187 return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_end_token(s));
1190 template<
class EventHandler>
1191 bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc,
size_t indentation)
1193 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1194 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP));
1195 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
BLCK|
RUNK|
USTY));
1197 substr s = m_evt_handler->m_curr->line_contents.rem;
1198 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
1206 if(_is_blck_token(s))
1210 else if(_is_doc_begin(s))
1212 _c4dbgp(
"token is doc start");
1218 if(_is_blck_token(s))
1231 _c4dbgp(
"token is doc end");
1237 _c4dbgpf(
"plain scalar! indentation={}", indentation);
1239 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1240 const size_t start_line = m_evt_handler->m_curr->pos.line;
1242 bool needs_filter =
false;
1245 _c4dbgpf(
"plain scalar line: [{}]~~~{}~~~", s.len, s);
1246 for(
size_t i = 0; i < s.len; ++i)
1248 const char curr = s.str[i];
1253 _c4dbgpf(
"[{}]: got suspicious ':'", i);
1257 _c4dbgpf(
"followed by '{}'", i+1 == s.len ? csubstr(
"\\n") : _c4prc(s.str[i+1]));
1258 _line_progressed(i);
1260 if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1262 _c4dbgp(
"start line. scalar ends here");
1267 _c4err(
"parse error");
1273 while(j + 1 < s.len && s.str[j+1] ==
':')
1275 _c4dbgp(
"skip colon");
1278 i = j > i ? j-1 : i;
1279 _c4dbgp(
"nothing to see here");
1283 _c4dbgp(
"got suspicious '#'");
1284 if(!i || (s.str[i-1] ==
' ' || s.str[i-1] ==
'\t'))
1286 _c4dbgp(
"comment! scalar ends here");
1287 _line_progressed(i);
1292 _c4dbgp(
"nothing to see here");
1297 _line_progressed(s.len);
1298 csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1299 next_peeked = next_peeked.trimr(
"\n\r");
1300 const size_t next_indentation = next_peeked.first_not_of(
' ');
1301 _c4dbgpf(
"indentation curr={} next={}", indentation, next_indentation);
1302 if(next_indentation < indentation)
1304 _c4dbgp(
"smaller indentation! scalar ended");
1307 else if(next_indentation == 0 && next_peeked.len > 0)
1309 const char first = next_peeked.str[0];
1313 next_peeked = next_peeked.trimr(
"\n\r");
1314 _c4dbgpf(
"doc begin? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ?
"..." :
"");
1315 if(_is_doc_begin_token(next_peeked))
1317 _c4dbgp(
"doc begin! scalar ended");
1322 next_peeked = next_peeked.trimr(
"\n\r");
1323 _c4dbgpf(
"doc end? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ?
"..." :
"");
1324 if(_is_doc_end_token(next_peeked))
1326 _c4dbgp(
"doc end! scalar ended");
1333 _c4dbgp(
"next line!");
1334 if(!_finished_file())
1336 _c4dbgp(
"next line!");
1342 _c4dbgp(
"file finished!");
1345 s = m_evt_handler->m_curr->line_contents.rem;
1346 needs_filter =
true;
1351 sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(
" \n\r\t");
1352 sc->needs_filter = needs_filter;
1354 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1359 template<
class EventHandler>
1360 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc)
1362 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP));
1363 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1364 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP));
1365 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ));
1366 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
BLCK));
1367 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL));
1368 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1371 template<
class EventHandler>
1372 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc)
1374 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ));
1375 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1376 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP));
1377 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
BLCK));
1378 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK));
1379 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1382 template<
class EventHandler>
1383 bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc)
1385 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RUNK|
USTY));
1386 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1392 template<
class EventHandler>
1393 substr ParseEngine<EventHandler>::_peek_next_line(
size_t pos)
const
1397 pos = pos ==
npos ? m_evt_handler->m_curr->pos.offset : pos;
1398 if(pos >= m_buf.len)
1402 rem = from_next_line(m_buf.sub(pos));
1407 nlpos = rem.first_of(
"\r\n");
1409 nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1410 rem = rem.left_of(nlpos,
true);
1412 _c4dbgpf(
"peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr(
"\r\n"));
1416 _c4dbgpf(
"peek next line @ {}: (len=0)''", pos);
1422 template<
class EventHandler>
1423 void ParseEngine<EventHandler>::_scan_line()
1425 if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < m_buf.len))
1426 m_evt_handler->m_curr->line_contents.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
1428 m_evt_handler->m_curr->line_contents.reset(m_buf.last(0), m_buf.last(0));
1431 template<
class EventHandler>
1432 void ParseEngine<EventHandler>::_line_progressed(
size_t ahead)
1434 _c4dbgpf(
"line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->line_contents.full.len, ahead, m_evt_handler->m_curr->pos.col, m_evt_handler->m_curr->pos.col+ahead, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset+ahead);
1435 m_evt_handler->m_curr->pos.offset += ahead;
1436 m_evt_handler->m_curr->pos.col += ahead;
1437 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.stripped.len+1);
1438 m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1441 template<
class EventHandler>
1442 void ParseEngine<EventHandler>::_line_ended()
1444 _c4dbgpf(
"line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1445 m_evt_handler->m_curr->pos.line,
1446 m_evt_handler->m_curr->line_contents.full.len,
1447 m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len,
1448 m_evt_handler->m_curr->pos.col, 1);
1449 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.stripped.len + 1);
1450 m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
1451 ++m_evt_handler->m_curr->pos.line;
1452 m_evt_handler->m_curr->pos.col = 1;
1455 template<
class EventHandler>
1456 void ParseEngine<EventHandler>::_line_ended_undo()
1458 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u);
1459 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u);
1460 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len);
1461 const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
1462 _c4dbgpf(
"line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1463 m_evt_handler->m_curr->pos.offset -= delta;
1464 --m_evt_handler->m_curr->pos.line;
1465 m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.stripped.len + 1u;
1468 m_evt_handler->m_curr->line_contents.rem = m_buf.sub(m_evt_handler->m_curr->pos.offset, 0);
1473 template<
class EventHandler>
1474 void ParseEngine<EventHandler>::_set_indentation(
size_t indentation)
1476 m_evt_handler->m_curr->indref = indentation;
1477 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1480 template<
class EventHandler>
1481 void ParseEngine<EventHandler>::_save_indentation()
1483 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begin() >= m_evt_handler->m_curr->line_contents.full.begin());
1484 m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1485 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1491 template<
class EventHandler>
1492 void ParseEngine<EventHandler>::_end_map_blck()
1494 _c4dbgp(
"mapblck: end");
1497 _c4dbgp(
"mapblck: set missing val");
1498 _handle_annotations_before_blck_val_scalar();
1499 m_evt_handler->set_val_scalar_plain({});
1501 else if(has_any(
QMRK))
1503 _c4dbgp(
"mapblck: set missing keyval");
1504 _handle_annotations_before_blck_key_scalar();
1505 m_evt_handler->set_key_scalar_plain({});
1506 _handle_annotations_before_blck_val_scalar();
1507 m_evt_handler->set_val_scalar_plain({});
1509 m_evt_handler->end_map();
1512 template<
class EventHandler>
1513 void ParseEngine<EventHandler>::_end_seq_blck()
1517 _c4dbgp(
"seqblck: set missing val");
1518 _handle_annotations_before_blck_val_scalar();
1519 m_evt_handler->set_val_scalar_plain({});
1521 m_evt_handler->end_seq();
1524 template<
class EventHandler>
1525 void ParseEngine<EventHandler>::_end2_map()
1527 _c4dbgp(
"map: end");
1528 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP));
1535 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1536 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
USTY));
1537 m_evt_handler->_pop();
1541 template<
class EventHandler>
1542 void ParseEngine<EventHandler>::_end2_seq()
1544 _c4dbgp(
"seq: end");
1545 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ));
1552 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1553 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
USTY));
1554 m_evt_handler->_pop();
1558 template<
class EventHandler>
1559 void ParseEngine<EventHandler>::_begin2_doc()
1563 m_evt_handler->begin_doc();
1564 m_evt_handler->m_curr->indref = 0;
1567 template<
class EventHandler>
1568 void ParseEngine<EventHandler>::_begin2_doc_expl()
1572 m_evt_handler->begin_doc_expl();
1573 m_evt_handler->m_curr->indref = 0;
1576 template<
class EventHandler>
1577 void ParseEngine<EventHandler>::_end2_doc()
1579 _c4dbgp(
"doc: end");
1580 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC));
1583 _c4dbgp(
"doc was empty; add empty val");
1584 m_evt_handler->set_val_scalar_plain({});
1586 m_evt_handler->end_doc();
1589 template<
class EventHandler>
1590 void ParseEngine<EventHandler>::_end2_doc_expl()
1592 _c4dbgp(
"doc: end");
1595 _c4dbgp(
"doc: no children; add empty val");
1596 m_evt_handler->set_val_scalar_plain({});
1598 m_evt_handler->end_doc_expl();
1601 template<
class EventHandler>
1602 void ParseEngine<EventHandler>::_maybe_begin_doc()
1606 _c4dbgp(
"doc must be started");
1610 template<
class EventHandler>
1611 void ParseEngine<EventHandler>::_maybe_end_doc()
1615 _c4dbgp(
"doc must be finished");
1620 template<
class EventHandler>
1621 void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1623 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
1624 if(m_evt_handler->m_stack[0].flags &
RDOC)
1626 _c4dbgp(
"root is RDOC");
1627 if(m_evt_handler->m_curr->level != 0)
1628 _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1630 else if((m_evt_handler->m_stack.size() > 1) && (m_evt_handler->m_stack[1].flags &
RDOC))
1632 _c4dbgp(
"root is STREAM");
1633 if(m_evt_handler->m_curr->level != 1)
1634 _handle_indentation_pop(&m_evt_handler->m_stack[1]);
1638 _c4err(
"internal error");
1640 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC));
1643 template<
class EventHandler>
1644 void ParseEngine<EventHandler>::_end_doc_suddenly()
1646 _c4dbgp(
"end doc suddenly");
1647 _end_doc_suddenly__pop();
1652 template<
class EventHandler>
1653 void ParseEngine<EventHandler>::_start_doc_suddenly()
1655 _c4dbgp(
"start doc suddenly");
1656 _end_doc_suddenly__pop();
1661 template<
class EventHandler>
1662 void ParseEngine<EventHandler>::_end_stream()
1664 _c4dbgpf(
"end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1666 _c4err(
"missing terminating ]");
1668 _c4err(
"missing terminating }");
1669 if(m_evt_handler->m_stack.size() > 1)
1670 _handle_indentation_pop(m_evt_handler->m_stack.begin());
1677 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1681 m_evt_handler->begin_doc();
1682 _handle_annotations_before_blck_val_scalar();
1683 m_evt_handler->set_val_scalar_plain({});
1684 m_evt_handler->end_doc();
1688 m_evt_handler->end_stream();
1692 template<
class EventHandler>
1693 void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState
const* popto)
1695 _c4dbgpf(
"popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ?
"s" :
""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
1696 while(m_evt_handler->m_curr != popto)
1700 _c4dbgpf(
"popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1703 else if(has_any(
RMAP))
1705 _c4dbgpf(
"popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1713 _c4dbgpf(
"current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1716 template<
class EventHandler>
1717 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
1720 using state_type =
typename EventHandler::state;
1721 state_type
const* popto =
nullptr;
1722 auto &stack = m_evt_handler->m_stack;
1723 _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous());
1724 _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1725 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1730 for(state_type
const& s : stack)
1731 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1734 for(state_type
const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
1736 _c4dbgpf(
"searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
1737 if(s->indref == ind)
1739 _c4dbgpf(
"gotit!!! level={} node={}", s->level, s->node_id);
1744 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1746 _c4err(
"parse error: incorrect indentation?");
1748 _handle_indentation_pop(popto);
1751 template<
class EventHandler>
1752 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
1755 using state_type =
typename EventHandler::state;
1756 auto &stack = m_evt_handler->m_stack;
1757 _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous());
1758 _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1759 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1760 state_type
const* popto =
nullptr;
1765 for(state_type
const& s : stack)
1766 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1769 for(state_type
const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s)
1771 _c4dbgpf(
"searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
1776 else if(s->indref == ind)
1778 _c4dbgpf(
"same indentation!!! level={} node={}", s->level, s->node_id);
1779 if(popto && has_any(
RTOP, s) && has_none(
RMAP|
RSEQ, s))
1786 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1787 const size_t first = rem.first_not_of(
' ');
1788 _RYML_CB_ASSERT(stack.m_callbacks, first == ind || first ==
npos);
1789 rem = rem.right_of(first,
true);
1790 _c4dbgpf(
"indentless? rem='{}' first={}", rem, first);
1791 if(rem.begins_with(
'-') && _is_blck_token(rem))
1793 _c4dbgp(
"parent was indentless seq");
1799 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1801 _c4err(
"parse error: incorrect indentation?");
1803 _handle_indentation_pop(popto);
1808 template<
class EventHandler>
1809 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
1815 size_t b = m_evt_handler->m_curr->pos.offset;
1816 substr s = m_buf.sub(b);
1817 if(s.begins_with(
' '))
1820 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1821 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1822 _line_progressed((
size_t)(s.begin() - m_buf.sub(b).begin()));
1824 b = m_evt_handler->m_curr->pos.offset;
1825 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'\''));
1828 _line_progressed(1);
1831 bool needs_filter =
false;
1833 size_t numlines = 1;
1835 while( ! _finished_file())
1837 const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1838 bool line_is_blank =
true;
1839 _c4dbgpf(
"scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_evt_handler->m_curr->pos.line, line);
1840 for(
size_t i = 0; i < line.len; ++i)
1842 const char curr = line.str[i];
1845 const char next = i+1 < line.len ? line.str[i+1] :
'~';
1853 needs_filter =
true;
1857 else if(curr !=
' ')
1859 line_is_blank =
false;
1864 needs_filter = needs_filter
1867 || (_at_line_begin() && line.begins_with(
' '));
1871 _line_progressed(line.len);
1876 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
1877 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] ==
'\'');
1878 _line_progressed(pos + 1);
1879 pos = m_evt_handler->m_curr->pos.offset - b - 1;
1889 _c4err(
"reached end of file while looking for closing quote");
1893 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
1894 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
1895 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() ==
'\'');
1896 s = s.sub(0, pos-1);
1899 _c4prscalar(
"scanned squoted scalar", s,
true);
1901 return ScannedScalar { s, needs_filter };
1906 template<
class EventHandler>
1907 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
1913 size_t b = m_evt_handler->m_curr->pos.offset;
1914 substr s = m_buf.sub(b);
1915 if(s.begins_with(
' '))
1918 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1919 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1920 _line_progressed((
size_t)(s.begin() - m_buf.sub(b).begin()));
1922 b = m_evt_handler->m_curr->pos.offset;
1923 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'"'));
1926 _line_progressed(1);
1929 bool needs_filter =
false;
1931 size_t numlines = 1;
1933 while( ! _finished_file())
1935 const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1936 bool line_is_blank =
true;
1937 _c4dbgpf(
"scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, line);
1938 for(
size_t i = 0; i < line.len; ++i)
1940 const char curr = line.str[i];
1942 line_is_blank =
false;
1946 const char next = i+1 < line.len ? line.str[i+1] :
'~';
1947 needs_filter =
true;
1948 if(next ==
'"' || next ==
'\\')
1951 else if(curr ==
'"')
1959 needs_filter = needs_filter
1962 || (_at_line_begin() && line.begins_with(
' '));
1966 _line_progressed(line.len);
1971 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
1972 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] ==
'"');
1973 _line_progressed(pos + 1);
1974 pos = m_evt_handler->m_curr->pos.offset - b - 1;
1984 _c4err(
"reached end of file looking for closing quote");
1988 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
1989 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() ==
'"');
1990 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
1991 s = s.sub(0, pos-1);
1994 _c4prscalar(
"scanned dquoted scalar", s,
true);
1996 return ScannedScalar { s, needs_filter };
2001 template<
class EventHandler>
2002 void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb,
size_t indref)
2004 _c4dbgpf(
"blck: indref={}", indref);
2005 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, indref !=
npos);
2008 csubstr s = m_evt_handler->m_curr->line_contents.rem;
2009 csubstr trimmed = s.triml(
' ');
2010 if(trimmed.str > s.str)
2012 _c4dbgp(
"skipping whitespace");
2013 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, trimmed.str >= s.str);
2014 _line_progressed(
static_cast<size_t>(trimmed.str - s.str));
2017 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'|') || s.begins_with(
'>'));
2019 _c4dbgpf(
"blck: specs=[{}]~~~{}~~~", s.len, s);
2022 BlockChomp_e chomp = CHOMP_CLIP;
2023 size_t indentation =
npos;
2027 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(
"|>"));
2028 csubstr t = s.sub(1);
2029 _c4dbgpf(
"blck: spec is multichar: '{}'", t);
2030 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, t.len >= 1);
2031 size_t pos = t.first_of(
"-+");
2032 _c4dbgpf(
"blck: spec chomp char at {}", pos);
2036 chomp = CHOMP_STRIP;
2037 else if(t[pos] ==
'+')
2045 digits = t.left_of(t.first_not_of(
"0123456789"));
2046 if( ! digits.empty())
2048 if(C4_UNLIKELY(digits.len > 1))
2049 _c4err(
"parse error: invalid indentation");
2050 _c4dbgpf(
"blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
2051 if(C4_UNLIKELY( !
c4::atou(digits, &indentation)))
2052 _c4err(
"parse error: could not read indentation as decimal");
2053 if(C4_UNLIKELY( ! indentation))
2054 _c4err(
"parse error: null indentation");
2055 _c4dbgpf(
"blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2056 indentation += m_evt_handler->m_curr->indref;
2060 _c4dbgpf(
"blck: style={} chomp={} indentation={}", s.begins_with(
'>') ?
"fold" :
"literal", chomp==CHOMP_CLIP ?
"clip" : (chomp==CHOMP_STRIP ?
"strip" :
"keep"), indentation);
2063 _line_progressed(s.len);
2068 substr raw_block(m_buf.data() + m_evt_handler->m_curr->pos.offset,
size_t(0));
2069 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.begin());
2077 size_t num_lines = 0;
2078 size_t first = m_evt_handler->m_curr->pos.line;
2079 size_t provisional_indentation =
npos;
2081 while(( ! _finished_file()))
2084 lc.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
2085 _c4dbgpf(
"blck: peeking at [{}]~~~{}~~~", lc.stripped.len, lc.stripped);
2087 if(indentation !=
npos)
2089 _c4dbgpf(
"blck: indentation={}", indentation);
2091 if(lc.indentation < indentation && ( ! lc.rem.trim(
" \t").empty()))
2095 _c4dbgpf(
"blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2099 _c4err(
"indentation decreased without any scalar");
2103 else if(indentation == 0)
2105 _c4dbgpf(
"blck: noindent. lc.rem=[{}]~~~{}~~~", lc.rem.len, lc.rem);
2106 if(_is_doc_token(lc.rem))
2108 _c4dbgp(
"blck: stop. indentation=0 and doc ended");
2115 const size_t fns = lc.stripped.first_not_of(
' ');
2116 _c4dbgpf(
"blck: indentation ref not set. firstnonws={}", fns);
2120 if(C4_UNLIKELY(lc.stripped.begins_with(
'\t')))
2121 _c4err(
"parse error");
2123 _c4dbgpf(
"blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2124 if(provisional_indentation ==
npos)
2126 if(lc.indentation < indref)
2128 _c4dbgpf(
"blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2129 if(raw_block.len == 0)
2131 _c4dbgp(
"blck: was empty, undo next line");
2136 else if(lc.indentation == m_evt_handler->m_curr->indref)
2140 _c4dbgpf(
"blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2144 _c4dbgpf(
"blck: set indentation ref from this line: ref={}", lc.indentation);
2145 indentation = lc.indentation;
2149 if(lc.indentation >= provisional_indentation)
2151 _c4dbgpf(
"blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2153 indentation = lc.indentation;
2164 _c4dbgpf(
"blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.stripped.len, lc.indentation, provisional_indentation);
2165 if(provisional_indentation !=
npos)
2167 if(lc.stripped.len >= provisional_indentation)
2169 _c4dbgpf(
"blck: increase provisional_ref {} -> {}", provisional_indentation, lc.stripped.len);
2170 provisional_indentation = lc.stripped.len;
2172 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
2173 else if(lc.indentation >= provisional_indentation && lc.indentation !=
npos)
2175 _c4dbgpf(
"blck: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation);
2176 provisional_indentation = lc.indentation;
2182 provisional_indentation = lc.indentation ? lc.indentation : has_any(
RSEQ|
RVAL);
2183 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2184 if(provisional_indentation ==
npos)
2186 provisional_indentation = lc.stripped.len ? lc.stripped.len : has_any(
RSEQ|
RVAL);
2187 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2189 if(provisional_indentation < indref)
2191 provisional_indentation = indref;
2192 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2198 m_evt_handler->m_curr->line_contents = lc;
2199 _c4dbgpf(
"blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2200 raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2201 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2205 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0));
2206 C4_UNUSED(num_lines);
2209 if(indentation ==
npos)
2211 _c4dbgpf(
"blck: set indentation from provisional: {}", provisional_indentation);
2212 indentation = provisional_indentation;
2218 _c4prscalar(
"scanned block", raw_block,
true);
2220 sb->scalar = raw_block;
2221 sb->indentation = indentation;
2232 #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2234 #define _c4dbgfws(...)
2237 template<
class EventHandler>
2238 template<
class FilterProcessor>
2239 bool ParseEngine<EventHandler>::_filter_ws_handle_to_first_non_space(FilterProcessor &proc)
2241 _c4dbgfws(
"found whitespace '{}'", _c4prc(proc.curr()));
2242 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
' ' || proc.curr() ==
'\t');
2244 const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(
" \t", proc.rpos) : proc.src.first_not_of(
' ', proc.rpos);
2245 if(first_pos !=
npos)
2247 const char first_char = proc.src[first_pos];
2248 _c4dbgfws(
"firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2249 if(first_char ==
'\n' || first_char ==
'\r')
2251 _c4dbgfws(
"whitespace is trailing on line",
"");
2252 proc.skip(first_pos - proc.rpos);
2257 _c4dbgfws(
"legit whitespace. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2261 _c4dbgfws(
"whitespace is trailing on line",
"");
2265 template<
class EventHandler>
2266 template<
class FilterProcessor>
2267 void ParseEngine<EventHandler>::_filter_ws_copy_trailing(FilterProcessor &proc)
2269 if(!_filter_ws_handle_to_first_non_space(proc))
2271 _c4dbgfws(
"... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2272 proc.copy(proc.src.len - proc.rpos);
2276 template<
class EventHandler>
2277 template<
class FilterProcessor>
2278 void ParseEngine<EventHandler>::_filter_ws_skip_trailing(FilterProcessor &proc)
2280 if(!_filter_ws_handle_to_first_non_space(proc))
2282 _c4dbgfws(
"... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2283 proc.skip(proc.src.len - proc.rpos);
2297 #define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2299 #define _c4dbgfps(fmt, ...)
2302 template<
class EventHandler>
2303 template<
class FilterProcessor>
2304 void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation)
2306 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
'\n');
2308 _c4dbgfps(
"found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2309 size_t ii = proc.rpos;
2310 const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2313 proc.set(
'\n', numnl_following);
2314 _c4dbgfps(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2318 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2322 _c4dbgfps(
"single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2326 _c4dbgfps(
"last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2333 template<
class EventHandler>
2334 template<
class FilterProcessor>
2335 auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation) -> decltype(proc.result())
2337 _RYML_CB_ASSERT(this->callbacks(), indentation !=
npos);
2338 _c4dbgfps(
"before=[{}]~~~{}~~~", proc.src.len, proc.src);
2340 while(proc.has_more_chars())
2342 const char curr = proc.curr();
2343 _c4dbgfps(
"'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2349 _filter_ws_skip_trailing(proc);
2353 _filter_nl_plain(proc, indentation);
2356 _c4dbgfps(
"carriage return, ignore", curr);
2365 _c4dbgfps(
"after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2367 return proc.result();
2373 template<
class EventHandler>
2377 return _filter_plain(proc, indentation);
2380 template<
class EventHandler>
2384 return _filter_plain(proc, indentation);
2395 #define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2397 #define _c4dbgfsq(fmt, ...)
2400 template<
class EventHandler>
2401 template<
class FilterProcessor>
2402 void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2404 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
'\n');
2406 _c4dbgfsq(
"found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2407 size_t ii = proc.rpos;
2408 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2411 proc.set(
'\n', numnl_following);
2412 _c4dbgfsq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2416 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2420 _c4dbgfsq(
"single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2425 _c4dbgfsq(
"single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2431 template<
class EventHandler>
2432 template<
class FilterProcessor>
2433 auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2435 _c4dbgfsq(
"before=[{}]~~~{}~~~", proc.src.len, proc.src);
2439 while(proc.has_more_chars())
2441 const char curr = proc.curr();
2442 _c4dbgfsq(
"'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2448 _filter_ws_copy_trailing(proc);
2452 _filter_nl_squoted(proc);
2460 if(proc.next() ==
'\'')
2462 _c4dbgfsq(
"two consecutive squotes", curr);
2468 _c4err(
"filter error");
2477 _c4dbgfsq(
": #filteredchars={} after=~~~[{}]{}~~~", proc.src.len-proc.sofar().len, proc.sofar().len, proc.sofar());
2479 return proc.result();
2484 template<
class EventHandler>
2488 return _filter_squoted(proc);
2491 template<
class EventHandler>
2495 return _filter_squoted(proc);
2506 #define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2508 #define _c4dbgfdq(...)
2511 template<
class EventHandler>
2512 template<
class FilterProcessor>
2513 void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2515 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
'\n');
2517 _c4dbgfdq(
"found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2518 size_t ii = proc.rpos;
2519 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2522 proc.set(
'\n', numnl_following);
2523 _c4dbgfdq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2527 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2531 _c4dbgfdq(
"single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2536 _c4dbgfdq(
"single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2538 if(ii < proc.src.len && proc.src.str[ii] ==
'\\')
2541 const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] :
'\0';
2542 if(next ==
' ' || next ==
'\t')
2544 _c4dbgfdq(
"extend skip to backslash",
"");
2552 template<
class EventHandler>
2553 template<
class FilterProcessor>
2554 void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2556 char next = proc.next();
2557 _c4dbgfdq(
"backslash, next='{}'", _c4prc(next));
2560 if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] ==
'\n')
2564 _c4dbgfdq(
"[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2570 size_t ii = proc.rpos + 2;
2571 for( ; ii < proc.src.len; ++ii)
2574 if(proc.src.str[ii] ==
' ' || proc.src.str[ii] ==
'\t')
2579 proc.skip(ii - proc.rpos);
2581 else if(next ==
'"' || next ==
'/' || next ==
' ' || next ==
'\t')
2584 proc.translate_esc(next);
2585 _c4dbgfdq(
"here, used '{}'", _c4prc(next));
2587 else if(next ==
'\r')
2591 else if(next ==
'n')
2593 proc.translate_esc(
'\n');
2595 else if(next ==
'r')
2597 proc.translate_esc(
'\r');
2599 else if(next ==
't')
2601 proc.translate_esc(
'\t');
2603 else if(next ==
'\\')
2605 proc.translate_esc(
'\\');
2607 else if(next ==
'x')
2609 if(C4_UNLIKELY(proc.rpos + 1u + 2u >= proc.src.len))
2610 _c4err(
"\\x requires 2 hex digits. scalar pos={}", proc.rpos);
2611 csubstr codepoint = proc.src.sub(proc.rpos + 2u, 2u);
2612 _c4dbgfdq(
"utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2613 uint8_t byteval = {};
2614 if(C4_UNLIKELY(!
read_hex(codepoint, &byteval)))
2615 _c4err(
"failed to read \\x codepoint. scalar pos={}", proc.rpos);
2616 proc.translate_esc_bulk((
const char*)&byteval, 1u, 3u);
2617 _c4dbgfdq(
"utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2619 else if(next ==
'u')
2621 if(C4_UNLIKELY(proc.rpos + 1u + 4u >= proc.src.len))
2622 _c4err(
"\\u requires 4 hex digits. scalar pos={}", proc.rpos);
2624 csubstr codepoint = proc.src.sub(proc.rpos + 2u, 4u);
2625 uint32_t codepoint_val = {};
2626 if(C4_UNLIKELY(!
read_hex(codepoint, &codepoint_val)))
2627 _c4err(
"failed to parse \\u codepoint. scalar pos={}", proc.rpos);
2628 const size_t numbytes = decode_code_point((uint8_t*)readbuf,
sizeof(readbuf), codepoint_val);
2629 if(C4_UNLIKELY(numbytes == 0))
2630 _c4err(
"failed to decode code point={}", proc.rpos);
2631 _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2632 proc.translate_esc_bulk(readbuf, numbytes, 5u);
2634 else if(next ==
'U')
2636 if(C4_UNLIKELY(proc.rpos + 1u + 8u >= proc.src.len))
2637 _c4err(
"\\U requires 8 hex digits. scalar pos={}", proc.rpos);
2639 csubstr codepoint = proc.src.sub(proc.rpos + 2u, 8u);
2640 uint32_t codepoint_val = {};
2641 if(C4_UNLIKELY(!
read_hex(codepoint, &codepoint_val)))
2642 _c4err(
"failed to parse \\U codepoint. scalar pos={}", proc.rpos);
2643 const size_t numbytes = decode_code_point((uint8_t*)readbuf,
sizeof(readbuf), codepoint_val);
2644 if(C4_UNLIKELY(numbytes == 0))
2645 _c4err(
"failed to decode code point={}", proc.rpos);
2646 _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2647 proc.translate_esc_bulk(readbuf, numbytes, 9u);
2650 else if(next ==
'0')
2652 proc.translate_esc(
'\0');
2654 else if(next ==
'b')
2656 proc.translate_esc(
'\b');
2658 else if(next ==
'f')
2660 proc.translate_esc(
'\f');
2662 else if(next ==
'a')
2664 proc.translate_esc(
'\a');
2666 else if(next ==
'v')
2668 proc.translate_esc(
'\v');
2670 else if(next ==
'e')
2672 proc.translate_esc(
'\x1b');
2674 else if(next ==
'_')
2677 const char payload[] = {
2678 _RYML_CHCONST(-0x3e, 0xc2),
2679 _RYML_CHCONST(-0x60, 0xa0),
2681 proc.translate_esc_bulk(payload, 2, 1);
2683 else if(next ==
'N')
2686 const char payload[] = {
2687 _RYML_CHCONST(-0x3e, 0xc2),
2688 _RYML_CHCONST(-0x7b, 0x85),
2690 proc.translate_esc_bulk(payload, 2, 1);
2692 else if(next ==
'L')
2695 const char payload[] = {
2696 _RYML_CHCONST(-0x1e, 0xe2),
2697 _RYML_CHCONST(-0x80, 0x80),
2698 _RYML_CHCONST(-0x58, 0xa8),
2700 proc.translate_esc_extending(payload, 3, 1);
2702 else if(next ==
'P')
2705 const char payload[] = {
2706 _RYML_CHCONST(-0x1e, 0xe2),
2707 _RYML_CHCONST(-0x80, 0x80),
2708 _RYML_CHCONST(-0x57, 0xa9),
2710 proc.translate_esc_extending(payload, 3, 1);
2712 else if(next ==
'\0')
2718 _c4err(
"unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2720 _c4dbgfdq(
"backslash...sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2724 template<
class EventHandler>
2725 template<
class FilterProcessor>
2726 auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2728 _c4dbgfdq(
"before=[{}]~~~{}~~~", proc.src.len, proc.src);
2731 while(proc.has_more_chars())
2733 const char curr = proc.curr();
2734 _c4dbgfdq(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2741 _filter_ws_copy_trailing(proc);
2747 _filter_nl_dquoted(proc);
2752 _c4dbgfdq(
"carriage return, ignore", curr);
2758 _filter_dquoted_backslash(proc);
2768 _c4dbgfdq(
"after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2769 return proc.result();
2775 template<
class EventHandler>
2779 return _filter_dquoted(proc);
2782 template<
class EventHandler>
2786 return _filter_dquoted(proc);
2795 template<
class EventHandler>
2796 template<
class FilterProcessor>
2799 _RYML_CB_ASSERT(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP);
2800 _RYML_CB_ASSERT(this->callbacks(), proc.rem().first_not_of(
" \n\r") ==
npos);
2804 #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2806 #define _c4dbgchomp(...)
2814 _c4dbgchomp(
"found newline and larger indentation. last={}", last);
2815 last = proc.rpos + last + size_t(1) + indentation;
2816 _RYML_CB_ASSERT(this->callbacks(), last <= proc.src.len);
2818 while((proc.rpos < last) && proc.has_more_chars())
2820 const char curr = proc.curr();
2826 _c4dbgchomp(
"newline! remlen={}", proc.rem().len);
2829 csubstr at_next_line = proc.rem();
2830 if(at_next_line.begins_with(
' '))
2832 _c4dbgchomp(
"next line begins with spaces. indentation={}", indentation);
2834 size_t first_non_space = at_next_line.first_not_of(
' ');
2835 _c4dbgchomp(
"first_non_space={}", first_non_space);
2836 if(first_non_space ==
npos)
2838 _c4dbgchomp(
"{} spaces, to the end", at_next_line.len);
2839 first_non_space = at_next_line.len;
2841 if(first_non_space <= indentation)
2843 _c4dbgchomp(
"skip spaces={}<=indentation={}", first_non_space, indentation);
2844 proc.skip(first_non_space);
2848 _c4dbgchomp(
"skip indentation={}<spaces={}", indentation, first_non_space);
2849 proc.skip(indentation);
2851 _c4dbgchomp(
"copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
2852 proc.copy(first_non_space - indentation);
2861 _c4err(
"parse error");
2873 bool had_one =
false;
2874 while(proc.has_more_chars())
2876 const char curr = proc.curr();
2897 _c4dbgchomp(
"chomp=CLIP: add missing newline @{}", proc.wpos);
2904 _c4dbgchomp(
"chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
2905 while(proc.has_more_chars())
2907 const char curr = proc.curr();
2926 _c4dbgchomp(
"chomp=STRIP: strip {} characters", proc.rem().len);
2938 #define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2940 #define _c4dbgfb(...)
2943 template<
class EventHandler>
2944 template<
class FilterProcessor>
2945 void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc,
size_t indentation)
2947 csubstr rem = proc.rem();
2950 size_t first = rem.first_not_of(
' ');
2953 _c4dbgfb(
"{} spaces follow before next nonws character", first);
2954 if(first < indentation)
2956 _c4dbgfb(
"skip {}<{} spaces from indentation", first, indentation);
2961 _c4dbgfb(
"skip {} spaces from indentation", indentation);
2962 proc.skip(indentation);
2965 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
2968 _c4dbgfb(
"all spaces to the end: {} spaces", first);
2972 if(first < indentation)
2974 _c4dbgfb(
"skip everything", first);
2975 proc.skip(proc.src.len - proc.rpos);
2979 _c4dbgfb(
"skip {} spaces from indentation", indentation);
2980 proc.skip(indentation);
2988 template<
class EventHandler>
2989 template<
class FilterProcessor>
2990 size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
2992 csubstr contents = proc.src.trimr(
" \n\r");
2993 _c4dbgfb(
"ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
2996 _c4dbgfb(
"ws: all whitespace: len={}", proc.src.len);
2997 if(chomp == CHOMP_KEEP && proc.src.len)
2999 _c4dbgfb(
"ws: chomp=KEEP all {} newlines", proc.src.count(
'\n'));
3000 while(proc.has_more_chars())
3002 const char curr = proc.curr();
3014 return contents.len;
3017 template<
class EventHandler>
3018 template<
class FilterProcessor>
3019 size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc,
size_t contents_len)
3021 _c4dbgfb(
"contents_len={}", contents_len);
3023 _RYML_CB_ASSERT(this->callbacks(), contents_len > 0u);
3027 size_t firstnewl = proc.src.first_of(
'\n', contents_len);
3028 if(firstnewl !=
npos)
3030 contents_len = firstnewl;
3031 _c4dbgfb(
"contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3035 contents_len = proc.src.len;
3036 _c4dbgfb(
"contents_len={} <--- src.len={}", contents_len, proc.src.len);
3039 return contents_len;
3051 #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3053 #define _c4dbgfbl(...)
3056 template<
class EventHandler>
3057 template<
class FilterProcessor>
3058 auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc,
size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3060 _c4dbgfbl(
"indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3062 size_t contents_len = _handle_all_whitespace(proc, chomp);
3064 return proc.result();
3066 contents_len = _extend_to_chomp(proc, contents_len);
3068 _c4dbgfbl(
"to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3070 _filter_block_indentation(proc, indentation);
3073 while(proc.has_more_chars(contents_len))
3075 const char curr = proc.curr();
3076 _c4dbgfbl(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3081 _c4dbgfbl(
"found newline. skip indentation on the next line", curr);
3083 _filter_block_indentation(proc, indentation);
3095 _c4dbgfbl(
"before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3097 _filter_chomp(proc, chomp, indentation);
3099 _c4dbgfbl(
"final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3101 return proc.result();
3106 template<
class EventHandler>
3110 return _filter_block_literal(proc, indentation, chomp);
3113 template<
class EventHandler>
3117 return _filter_block_literal(proc, indentation, chomp);
3127 #define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3129 #define _c4dbgfbf(...)
3133 template<
class EventHandler>
3134 template<
class FilterProcessor>
3135 void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len)
3137 _filter_block_indentation(proc, indentation);
3138 while(proc.has_more_chars(len))
3140 const char curr = proc.curr();
3141 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3147 _filter_block_indentation(proc, indentation);
3155 size_t first = proc.rem().first_not_of(
" \t");
3158 first = proc.rem().len;
3159 _c4dbgfbf(
"... indentation increased to {}", first);
3160 _filter_block_folded_indented_block(proc, indentation, len, first);
3164 _c4dbgfbf(
"newl leading: not space, not newline. stop.", 0);
3170 template<
class EventHandler>
3171 template<
class FilterProcessor>
3172 size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc,
size_t num_newl,
size_t wpos_at_first_newl)
3177 _c4dbgfbf(
"... this is the first newline. turn into space. wpos={}", proc.wpos);
3178 wpos_at_first_newl = proc.wpos;
3183 _c4dbgfbf(
"... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3184 _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl !=
npos);
3185 _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
' ');
3186 _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos);
3188 proc.set_at(wpos_at_first_newl,
'\n');
3189 _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
'\n');
3192 _c4dbgfbf(
"... subsequent newline (num_newl={}). copy", num_newl);
3196 return wpos_at_first_newl;
3199 template<
class EventHandler>
3200 template<
class FilterProcessor>
3201 void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len)
3203 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
'\n');
3204 size_t num_newl = 0;
3205 size_t wpos_at_first_newl =
npos;
3206 while(proc.has_more_chars(len))
3208 const char curr = proc.curr();
3209 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3214 _c4dbgfbf(
"newline. sofar={}", num_newl);
3250 wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3251 _filter_block_indentation(proc, indentation);
3257 size_t first = proc.rem().first_not_of(
" \t");
3260 first = proc.rem().len;
3261 _c4dbgfbf(
"... indentation increased to {}", first);
3264 _c4dbgfbf(
"... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3265 proc.set_at(wpos_at_first_newl,
'\n');
3269 _c4dbgfbf(
"... add missing newline", wpos_at_first_newl);
3272 _filter_block_folded_indented_block(proc, indentation, len, first);
3274 wpos_at_first_newl =
npos;
3281 _c4dbgfbf(
"not space, not newline. stop.", 0);
3288 template<
class EventHandler>
3289 template<
class FilterProcessor>
3290 void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len,
size_t curr_indentation) noexcept
3292 _RYML_CB_ASSERT(this->callbacks(), (proc.rem().first_not_of(
" \t") == curr_indentation) || (proc.rem().first_not_of(
" \t") ==
npos));
3293 if(curr_indentation)
3294 proc.copy(curr_indentation);
3295 while(proc.has_more_chars(len))
3297 const char curr = proc.curr();
3298 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3304 _filter_block_indentation(proc, indentation);
3305 csubstr rem = proc.rem();
3306 const size_t first = rem.first_not_of(
' ');
3307 _c4dbgfbf(
"newline. firstns={}", first);
3310 const char c = rem[first];
3311 _c4dbgfbf(
"firstns={}='{}'", first, _c4prc(c));
3312 if(c ==
'\n' || c ==
'\r')
3318 _c4dbgfbf(
"done with indented block", first);
3322 else if(first !=
npos)
3343 template<
class EventHandler>
3344 template<
class FilterProcessor>
3345 auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc,
size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3347 _c4dbgfbf(
"indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3349 size_t contents_len = _handle_all_whitespace(proc, chomp);
3351 return proc.result();
3353 contents_len = _extend_to_chomp(proc, contents_len);
3355 _c4dbgfbf(
"to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3357 _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3360 while(proc.has_more_chars(contents_len))
3362 const char curr = proc.curr();
3363 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3369 _filter_block_folded_newlines(proc, indentation, contents_len);
3381 _c4dbgfbf(
"before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3383 _filter_chomp(proc, chomp, indentation);
3385 _c4dbgfbf(
"final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3387 return proc.result();
3392 template<
class EventHandler>
3396 return _filter_block_folded(proc, indentation, chomp);
3399 template<
class EventHandler>
3403 return _filter_block_folded(proc, indentation, chomp);
3411 template<
class EventHandler>
3414 _c4dbgpf(
"filtering plain scalar: s=[{}]~~~{}~~~", s.len, s);
3415 FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3416 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, r.valid());
3417 _c4dbgpf(
"filtering plain scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3423 template<
class EventHandler>
3424 csubstr ParseEngine<EventHandler>::_filter_scalar_squot(substr s)
3426 _c4dbgpf(
"filtering squo scalar: s=[{}]~~~{}~~~", s.len, s);
3427 FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3428 _RYML_CB_ASSERT(this->callbacks(), r.valid());
3429 _c4dbgpf(
"filtering squo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3436 template<
class EventHandler>
3437 csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
3439 _c4dbgpf(
"filtering dquo scalar: s=[{}]~~~{}~~~", s.len, s);
3440 FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3441 if(C4_LIKELY(r.valid()))
3443 _c4dbgpf(
"filtering dquo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3448 const size_t len = r.required_len();
3449 _c4dbgpf(
"filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3450 substr dst = m_evt_handler->alloc_arena(len, &s);
3451 _c4dbgpf(
"filtering dquo scalar: dst.len={}", dst.len);
3452 _RYML_CB_ASSERT(this->callbacks(), dst.len == len);
3453 FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3454 _c4dbgpf(
"filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3455 _RYML_CB_ASSERT(this->callbacks(), rsd.required_len() <= len);
3456 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3457 _c4dbgpf(
"filtering dquo scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3464 template<
class EventHandler>
3465 csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s,
size_t indentation, BlockChomp_e chomp)
3467 _c4dbgpf(
"filtering block literal scalar: s=[{}]~~~{}~~~", s.len, s);
3468 FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3469 if(C4_LIKELY(r.valid()))
3471 _c4dbgpf(
"filtering block literal scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3476 _c4dbgpf(
"filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3477 substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
3478 FilterResult rsd = this->filter_scalar_block_literal(s, dst, indentation, chomp);
3479 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3480 _c4dbgpf(
"filtering block literal scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3487 template<
class EventHandler>
3488 csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s,
size_t indentation, BlockChomp_e chomp)
3490 _c4dbgpf(
"filtering block folded scalar: s=[{}]~~~{}~~~", s.len, s);
3491 FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3492 if(C4_LIKELY(r.valid()))
3494 _c4dbgpf(
"filtering block folded scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3499 _c4dbgpf(
"filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3500 substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
3501 FilterResult rsd = this->filter_scalar_block_folded(s, dst, indentation, chomp);
3502 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3503 _c4dbgpf(
"filtering block folded scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3511 template<
class EventHandler>
3512 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar
const& C4_RESTRICT sc,
size_t indentation)
3516 if(m_options.scalar_filtering())
3518 return _filter_scalar_plain(sc.scalar, indentation);
3522 _c4dbgp(
"plain scalar left unfiltered");
3523 m_evt_handler->mark_key_scalar_unfiltered();
3528 _c4dbgp(
"plain scalar doesn't need filtering");
3533 template<
class EventHandler>
3534 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar
const& C4_RESTRICT sc,
size_t indentation)
3538 if(m_options.scalar_filtering())
3540 return _filter_scalar_plain(sc.scalar, indentation);
3544 _c4dbgp(
"plain scalar left unfiltered");
3545 m_evt_handler->mark_val_scalar_unfiltered();
3550 _c4dbgp(
"plain scalar doesn't need filtering");
3558 template<
class EventHandler>
3559 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar
const& C4_RESTRICT sc)
3563 if(m_options.scalar_filtering())
3565 return _filter_scalar_squot(sc.scalar);
3569 _c4dbgp(
"squo key scalar left unfiltered");
3570 m_evt_handler->mark_key_scalar_unfiltered();
3575 _c4dbgp(
"squo key scalar doesn't need filtering");
3580 template<
class EventHandler>
3581 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar
const& C4_RESTRICT sc)
3585 if(m_options.scalar_filtering())
3587 return _filter_scalar_squot(sc.scalar);
3591 _c4dbgp(
"squo val scalar left unfiltered");
3592 m_evt_handler->mark_val_scalar_unfiltered();
3597 _c4dbgp(
"squo val scalar doesn't need filtering");
3605 template<
class EventHandler>
3606 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar
const& C4_RESTRICT sc)
3610 if(m_options.scalar_filtering())
3612 return _filter_scalar_dquot(sc.scalar);
3616 _c4dbgp(
"dquo scalar left unfiltered");
3617 m_evt_handler->mark_key_scalar_unfiltered();
3622 _c4dbgp(
"dquo scalar doesn't need filtering");
3627 template<
class EventHandler>
3628 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar
const& C4_RESTRICT sc)
3632 if(m_options.scalar_filtering())
3634 return _filter_scalar_dquot(sc.scalar);
3638 _c4dbgp(
"dquo scalar left unfiltered");
3639 m_evt_handler->mark_val_scalar_unfiltered();
3644 _c4dbgp(
"dquo scalar doesn't need filtering");
3652 template<
class EventHandler>
3653 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_literal(ScannedBlock
const& C4_RESTRICT sb)
3655 if(m_options.scalar_filtering())
3657 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3661 _c4dbgp(
"literal scalar left unfiltered");
3662 m_evt_handler->mark_key_scalar_unfiltered();
3667 template<
class EventHandler>
3668 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_literal(ScannedBlock
const& C4_RESTRICT sb)
3670 if(m_options.scalar_filtering())
3672 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3676 _c4dbgp(
"literal scalar left unfiltered");
3677 m_evt_handler->mark_val_scalar_unfiltered();
3685 template<
class EventHandler>
3686 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_folded(ScannedBlock
const& C4_RESTRICT sb)
3688 if(m_options.scalar_filtering())
3690 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3694 _c4dbgp(
"folded scalar left unfiltered");
3695 m_evt_handler->mark_key_scalar_unfiltered();
3700 template<
class EventHandler>
3701 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_folded(ScannedBlock
const& C4_RESTRICT sb)
3703 if(m_options.scalar_filtering())
3705 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3709 _c4dbgp(
"folded scalar left unfiltered");
3710 m_evt_handler->mark_val_scalar_unfiltered();
3722 template<
class EventHandler>
3723 void ParseEngine<EventHandler>::add_flags(
ParserFlag_t on, ParserState * s)
3725 char buf1_[64], buf2_[64], buf3_[64];
3726 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3727 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3728 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
3729 _c4dbgpf(
"state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
3733 template<
class EventHandler>
3736 char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
3737 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3738 csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
3739 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
3740 csubstr buf4 = detail::_parser_flags_to_str(buf4_, ((s->flags|on)&(~off)));
3741 _c4dbgpf(
"state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
3746 template<
class EventHandler>
3747 void ParseEngine<EventHandler>::rem_flags(
ParserFlag_t off, ParserState * s)
3749 char buf1_[64], buf2_[64], buf3_[64];
3750 csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
3751 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3752 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
3753 _c4dbgpf(
"state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
3757 inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf,
ParserFlag_t flags)
3760 bool gotone =
false;
3762 #define _prflag(fl) \
3763 if((flags & fl) == (fl)) \
3767 if(pos + 1 < buf.len) \
3771 csubstr fltxt = #fl; \
3772 if(pos + fltxt.len <= buf.len) \
3773 memcpy(buf.str + pos, fltxt.str, fltxt.len); \
3803 RYML_CHECK(pos <= buf.len);
3805 return buf.first(pos);
3815 template<
class EventHandler>
3818 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, loc.
offset < m_buf.len);
3819 return m_buf.sub(loc.
offset);
3822 template<
class EventHandler>
3825 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, node.
readable());
3826 return location(*node.
tree(), node.
id());
3829 template<
class EventHandler>
3834 if(_location_from_node(tree, node, &loc, 0))
3836 return val_location(m_buf.str);
3839 template<
class EventHandler>
3844 csubstr k = tree.
key(node);
3845 if(C4_LIKELY(k.str !=
nullptr))
3847 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, k.is_sub(m_buf));
3848 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(k));
3849 *loc = val_location(k.str);
3856 csubstr v = tree.
val(node);
3857 if(C4_LIKELY(v.str !=
nullptr))
3859 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, v.is_sub(m_buf));
3860 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(v));
3861 *loc = val_location(v.str);
3868 if(_location_from_cont(tree, node, loc))
3879 if(_location_from_node(tree, prev, loc, level+1))
3888 if(_location_from_node(tree, next, loc, level+1))
3897 if(_location_from_node(tree, parent, loc, level+1))
3906 template<
class EventHandler>
3907 bool ParseEngine<EventHandler>::_location_from_cont(Tree
const& tree,
id_type node, Location *C4_RESTRICT loc)
const
3909 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, tree.is_container(node));
3910 if(!tree.is_stream(node))
3912 const char *node_start = tree._p(node)->m_val.scalar.str;
3913 if(tree.has_children(node))
3915 id_type child = tree.first_child(node);
3916 if(tree.has_key(child))
3919 csubstr k = tree.key(child);
3920 if(k.str && node_start > k.str)
3924 *loc = val_location(node_start);
3929 *loc = val_location(m_buf.str);
3935 template<
class EventHandler>
3938 if(C4_UNLIKELY(val ==
nullptr))
3939 return {m_file, 0, 0, 0};
3940 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_options.locations());
3943 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str);
3944 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len);
3945 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_options.locations());
3946 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
3947 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets !=
nullptr);
3948 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
3950 csubstr src = m_buf;
3951 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, val !=
nullptr || src.str ==
nullptr);
3952 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str ==
nullptr && val ==
nullptr));
3954 using lineptr_type =
size_t const* C4_RESTRICT;
3955 lineptr_type lineptr =
nullptr;
3956 size_t offset = (size_t)(val - src.begin());
3960 for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
3975 size_t count = m_newline_offsets_size;
3978 lineptr = m_newline_offsets;
3982 it = lineptr + step;
3994 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
3995 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
3996 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
4000 loc.
line = (size_t)(lineptr - m_newline_offsets);
4001 if(lineptr > m_newline_offsets)
4002 loc.
col = (offset - *(lineptr-1) - 1u);
4008 template<
class EventHandler>
4011 m_newline_offsets_buf = m_buf;
4012 size_t numnewlines = 1u + m_buf.count(
'\n');
4013 _resize_locations(numnewlines);
4014 m_newline_offsets_size = 0;
4015 for(
size_t i = 0; i < m_buf.len; i++)
4016 if(m_buf[i] ==
'\n')
4017 m_newline_offsets[m_newline_offsets_size++] = i;
4018 m_newline_offsets[m_newline_offsets_size++] = m_buf.len;
4019 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4022 template<
class EventHandler>
4023 void ParseEngine<EventHandler>::_resize_locations(
size_t numnewlines)
4025 if(numnewlines > m_newline_offsets_capacity)
4027 if(m_newline_offsets)
4028 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
4029 m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks,
size_t, numnewlines, m_newline_offsets);
4030 m_newline_offsets_capacity = numnewlines;
4034 template<
class EventHandler>
4035 bool ParseEngine<EventHandler>::_locations_dirty()
const
4037 return !m_newline_offsets_size;
4045 template<
class EventHandler>
4046 void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4048 if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4050 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4051 if(rem.str[0] ==
' ' || rem.str[0] ==
'\t')
4053 _c4dbgpf(
"starts with whitespace: '{}'", _c4prc(rem.str[0]));
4055 rem = m_evt_handler->m_curr->line_contents.rem;
4058 if(rem.begins_with(
'#'))
4060 _c4dbgpf(
"it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4061 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4070 template<
class EventHandler>
4071 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str,
size_t indentation,
size_t line)
4073 _c4dbgpf(
"store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, str, indentation, line);
4074 if(C4_UNLIKELY(dst->num_entries >= C4_COUNTOF(dst->annotations)))
4075 _c4err(
"too many annotations");
4076 dst->annotations[dst->num_entries].str = str;
4077 dst->annotations[dst->num_entries].indentation = indentation;
4078 dst->annotations[dst->num_entries].line = line;
4082 template<
class EventHandler>
4083 void ParseEngine<EventHandler>::_clear_annotations(Annotation *C4_RESTRICT dst)
4085 dst->num_entries = 0;
4088 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
4089 template<
class EventHandler>
4090 bool ParseEngine<EventHandler>::_handle_indentation_from_annotations()
4092 if(m_pending_anchors.num_entries == 1u || m_pending_tags.num_entries == 1u)
4094 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries < 2u && m_pending_tags.num_entries < 2u);
4095 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.annotations[0].line < m_evt_handler->m_curr->pos.line);
4096 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.annotations[1].line < m_evt_handler->m_curr->pos.line);
4097 size_t to_skip = m_evt_handler->m_curr->indref;
4098 if(m_pending_anchors.num_entries)
4099 to_skip = m_pending_anchors.annotations[0].indentation > to_skip ? m_pending_anchors.annotations[0].indentation : to_skip;
4100 if(m_pending_tags.num_entries)
4101 to_skip = m_pending_tags.annotations[0].indentation > to_skip ? m_pending_tags.annotations[0].indentation : to_skip;
4102 _c4dbgpf(
"annotations pending, skip indentation up to {}!", to_skip);
4103 _maybe_skipchars_up_to(
' ', to_skip);
4110 template<
class EventHandler>
4111 bool ParseEngine<EventHandler>::_annotations_require_key_container()
const
4113 return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4116 template<
class EventHandler>
4117 void ParseEngine<EventHandler>::_check_tag(csubstr tag)
4119 if(!tag.begins_with(
"!<"))
4121 if(C4_UNLIKELY(tag.first_of(
"[]{},") !=
npos))
4122 _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks,
"tags must not contain any of '[]{},'", m_evt_handler->m_curr->pos);
4126 if(C4_UNLIKELY(!tag.ends_with(
'>')))
4127 _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks,
"malformed tag", m_evt_handler->m_curr->pos);
4131 template<
class EventHandler>
4132 void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4134 _c4dbgpf(
"annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4135 if(m_pending_tags.num_entries)
4137 _c4dbgpf(
"annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4138 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4140 _check_tag(m_pending_tags.annotations[0].str);
4141 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4142 _clear_annotations(&m_pending_tags);
4146 _c4err(
"too many tags");
4149 if(m_pending_anchors.num_entries)
4151 _c4dbgpf(
"annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4152 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4154 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4155 _clear_annotations(&m_pending_anchors);
4159 _c4err(
"too many anchors");
4164 template<
class EventHandler>
4165 void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4167 _c4dbgpf(
"annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4168 if(m_pending_tags.num_entries)
4170 _c4dbgpf(
"annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4171 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4173 _check_tag(m_pending_tags.annotations[0].str);
4174 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4175 _clear_annotations(&m_pending_tags);
4179 _c4err(
"too many tags");
4182 if(m_pending_anchors.num_entries)
4184 _c4dbgpf(
"annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4185 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4187 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4188 _clear_annotations(&m_pending_anchors);
4192 _c4err(
"too many anchors");
4197 template<
class EventHandler>
4198 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(
size_t current_line)
4200 _c4dbgpf(
"annotations_before_start_mapblck, current_line={}", current_line);
4201 if(m_pending_tags.num_entries == 2)
4203 _c4dbgp(
"2 tags, setting entry 0");
4204 _check_tag(m_pending_tags.annotations[0].str);
4205 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4207 else if(m_pending_tags.num_entries == 1)
4209 _c4dbgpf(
"1 tag. line={}, curr={}", m_pending_tags.annotations[0].line);
4210 if(m_pending_tags.annotations[0].line < current_line)
4212 _c4dbgp(
"...tag is for the map. setting it.");
4213 _check_tag(m_pending_tags.annotations[0].str);
4214 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4215 _clear_annotations(&m_pending_tags);
4219 if(m_pending_anchors.num_entries == 2)
4221 _c4dbgp(
"2 anchors, setting entry 0");
4222 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4224 else if(m_pending_anchors.num_entries == 1)
4226 _c4dbgpf(
"1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line);
4227 if(m_pending_anchors.annotations[0].line < current_line)
4229 _c4dbgp(
"...anchor is for the map. setting it.");
4230 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4231 _clear_annotations(&m_pending_anchors);
4236 template<
class EventHandler>
4237 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4239 _c4dbgp(
"annotations_before_start_mapblck_as_key");
4240 if(m_pending_tags.num_entries == 2)
4242 _check_tag(m_pending_tags.annotations[0].str);
4243 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4245 if(m_pending_anchors.num_entries == 2)
4247 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4251 template<
class EventHandler>
4252 void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(
size_t key_indentation,
size_t key_line)
4254 _c4dbgp(
"annotations_after_start_mapblck");
4255 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2);
4256 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2);
4257 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4259 key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4260 switch(m_pending_tags.num_entries)
4263 _check_tag(m_pending_tags.annotations[0].str);
4264 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4265 _clear_annotations(&m_pending_tags);
4268 _check_tag(m_pending_tags.annotations[1].str);
4269 m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4270 _clear_annotations(&m_pending_tags);
4273 switch(m_pending_anchors.num_entries)
4276 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4277 _clear_annotations(&m_pending_anchors);
4280 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4281 _clear_annotations(&m_pending_anchors);
4285 _set_indentation(key_indentation);
4288 template<
class EventHandler>
4289 size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(
size_t val_indentation,
size_t val_line)
4291 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries || m_pending_anchors.num_entries);
4293 auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4294 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4296 auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4297 if(ann.line > curr->line)
4299 else if(ann.indentation < curr->indentation)
4302 for(
size_t j = 0; j < m_pending_tags.num_entries; ++j)
4304 auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4305 if(ann.line > curr->line)
4307 else if(ann.indentation < curr->indentation)
4310 return curr->line < val_line ? val_indentation : curr->indentation;
4313 template<
class EventHandler>
4314 void ParseEngine<EventHandler>::_handle_directive(csubstr rem)
4316 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.is_sub(m_evt_handler->m_curr->line_contents.rem));
4317 const size_t pos = rem.find(
'#');
4318 _c4dbgpf(
"handle_directive: pos={} rem={}", pos, rem);
4321 m_evt_handler->add_directive(rem);
4322 _line_progressed(rem.len);
4326 csubstr to_comment = rem.first(pos);
4327 csubstr trimmed = to_comment.trimr(
" \t");
4328 m_evt_handler->add_directive(trimmed);
4329 _line_progressed(pos);
4337 template<
class EventHandler>
4338 void ParseEngine<EventHandler>::_handle_seq_json()
4341 _c4dbgpf(
"handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4343 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4344 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ));
4345 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
FLOW));
4346 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT));
4347 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT));
4349 _handle_flow_skip_whitespace();
4350 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4356 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4357 const char first = rem.str[0];
4358 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
4363 _c4dbgp(
"seqjson[RVAL]: scanning double-quoted scalar");
4364 ScannedScalar sc = _scan_scalar_dquot();
4365 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4366 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4372 _c4dbgp(
"seqjson[RVAL]: start child seqjson");
4374 m_evt_handler->begin_seq_val_flow();
4376 _line_progressed(1);
4381 _c4dbgp(
"seqjson[RVAL]: start child mapjson");
4383 m_evt_handler->begin_map_val_flow();
4385 _line_progressed(1);
4386 goto seqjson_finish;
4390 _c4dbgp(
"seqjson[RVAL]: end!");
4392 m_evt_handler->end_seq();
4393 _line_progressed(1);
4395 goto seqjson_finish;
4401 if(_scan_scalar_seq_json(&sc))
4403 _c4dbgp(
"seqjson[RVAL]: it's a plain scalar.");
4404 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4405 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4410 _c4err(
"parse error");
4417 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
4418 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4419 const char first = rem.str[0];
4420 _c4dbgpf(
"mapjson[RNXT]: '{}'", first);
4425 _c4dbgp(
"seqjson[RNXT]: expect next val");
4427 m_evt_handler->add_sibling();
4428 _line_progressed(1);
4433 _c4dbgp(
"seqjson[RNXT]: end!");
4434 m_evt_handler->end_seq();
4435 _line_progressed(1);
4436 goto seqjson_finish;
4439 _c4err(
"parse error");
4444 _c4dbgt(
"seqjson: go again", 0);
4445 if(_finished_line())
4447 if(C4_LIKELY(!_finished_file()))
4455 _c4err(
"missing terminating ]");
4461 _c4dbgp(
"seqjson: finish");
4467 template<
class EventHandler>
4468 void ParseEngine<EventHandler>::_handle_map_json()
4471 _c4dbgpf(
"handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4473 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
4474 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
FLOW));
4475 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4476 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT));
4477 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT)));
4479 _handle_flow_skip_whitespace();
4480 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4486 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4487 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4488 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4489 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4490 const char first = rem.str[0];
4491 _c4dbgpf(
"mapjson[RKEY]: '{}'", first);
4496 _c4dbgp(
"mapjson[RKEY]: scanning double-quoted scalar");
4497 ScannedScalar sc = _scan_scalar_dquot();
4498 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4499 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4505 _c4dbgp(
"mapjson[RKEY]: end!");
4506 m_evt_handler->end_map();
4507 _line_progressed(1);
4508 goto mapjson_finish;
4511 _c4err(
"parse error");
4514 else if(has_any(
RVAL))
4516 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4517 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4518 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4519 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4520 const char first = rem.str[0];
4521 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
4526 _c4dbgp(
"mapjson[RVAL]: scanning double-quoted scalar");
4527 ScannedScalar sc = _scan_scalar_dquot();
4528 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4529 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4535 _c4dbgp(
"mapjson[RVAL]: start val seqjson");
4537 m_evt_handler->begin_seq_val_flow();
4538 _set_indentation(m_evt_handler->m_parent->indref);
4540 _line_progressed(1);
4541 goto mapjson_finish;
4545 _c4dbgp(
"mapjson[RVAL]: start val mapjson");
4547 m_evt_handler->begin_map_val_flow();
4548 _set_indentation(m_evt_handler->m_parent->indref);
4550 _line_progressed(1);
4557 if(_scan_scalar_map_json(&sc))
4559 _c4dbgp(
"mapjson[RVAL]: plain scalar.");
4560 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4561 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4566 _c4err(
"parse error");
4572 else if(has_any(
RKCL))
4574 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4575 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4576 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4577 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4578 const char first = rem.str[0];
4579 _c4dbgpf(
"mapjson[RKCL]: '{}'", first);
4582 _c4dbgp(
"mapjson[RKCL]: found the colon");
4584 _line_progressed(1);
4588 _c4err(
"parse error");
4591 else if(has_any(
RNXT))
4593 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4594 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4595 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4596 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4597 _c4dbgpf(
"mapjson[RNXT]: '{}'", rem.str[0]);
4598 if(rem.begins_with(
','))
4600 _c4dbgp(
"mapjson[RNXT]: expect next keyval");
4601 m_evt_handler->add_sibling();
4603 _line_progressed(1);
4605 else if(rem.begins_with(
'}'))
4607 _c4dbgp(
"mapjson[RNXT]: end!");
4608 m_evt_handler->end_map();
4609 _line_progressed(1);
4610 goto mapjson_finish;
4614 _c4err(
"parse error");
4619 _c4dbgt(
"mapjson: go again", 0);
4620 if(_finished_line())
4622 if(C4_LIKELY(!_finished_file()))
4630 _c4err(
"missing terminating }");
4636 _c4dbgp(
"mapjson: finish");
4642 template<
class EventHandler>
4643 void ParseEngine<EventHandler>::_handle_seq_imap()
4646 _c4dbgpf(
"handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4648 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQIMAP));
4649 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4650 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT|
QMRK|
RKCL));
4651 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == has_all(
RVAL) + has_all(
RNXT) + has_all(
QMRK) + has_all(
RKCL));
4652 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3);
4654 _handle_flow_skip_whitespace();
4655 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4661 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL));
4662 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4663 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4664 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4665 const char first = rem.str[0];
4666 _c4dbgpf(
"seqimap[RVAL]: '{}'", _c4prc(first));
4670 _c4dbgp(
"seqimap[RVAL]: scanning single-quoted scalar");
4671 sc = _scan_scalar_squot();
4672 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
4673 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
4674 m_evt_handler->end_map();
4675 goto seqimap_finish;
4677 else if(first ==
'"')
4679 _c4dbgp(
"seqimap[RVAL]: scanning double-quoted scalar");
4680 sc = _scan_scalar_dquot();
4681 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4682 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4683 m_evt_handler->end_map();
4684 goto seqimap_finish;
4687 else if(_scan_scalar_plain_map_flow(&sc))
4689 _c4dbgp(
"seqimap[RVAL]: it's a scalar.");
4690 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4691 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4692 m_evt_handler->end_map();
4693 goto seqimap_finish;
4695 else if(first ==
'[')
4697 _c4dbgp(
"seqimap[RVAL]: start child seqflow");
4699 m_evt_handler->begin_seq_val_flow();
4701 _set_indentation(m_evt_handler->m_parent->indref);
4702 _line_progressed(1);
4703 goto seqimap_finish;
4705 else if(first ==
'{')
4707 _c4dbgp(
"seqimap[RVAL]: start child mapflow");
4709 m_evt_handler->begin_map_val_flow();
4711 _set_indentation(m_evt_handler->m_parent->indref);
4712 _line_progressed(1);
4713 goto seqimap_finish;
4715 else if(first ==
',' || first ==
']')
4717 _c4dbgp(
"seqimap[RVAL]: finish without val.");
4718 m_evt_handler->set_val_scalar_plain({});
4719 m_evt_handler->end_map();
4720 goto seqimap_finish;
4722 else if(first ==
'&')
4724 csubstr anchor = _scan_anchor();
4725 _c4dbgp(
"seqimap[RVAL]: anchor!");
4726 m_evt_handler->set_val_anchor(anchor);
4728 else if(first ==
'*')
4730 csubstr ref = _scan_ref_seq();
4731 _c4dbgp(
"seqimap[RVAL]: ref!");
4732 m_evt_handler->set_val_ref(ref);
4737 _c4err(
"parse error");
4740 else if(has_any(
RNXT))
4742 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
4743 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4744 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4745 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4746 const char first = rem.str[0];
4747 _c4dbgpf(
"seqimap[RNXT]: '{}'", _c4prc(first));
4748 if(first ==
',' || first ==
']')
4752 _c4dbgp(
"seqimap: done");
4753 m_evt_handler->end_map();
4754 goto seqimap_finish;
4758 _c4err(
"parse error");
4761 else if(has_any(
QMRK))
4763 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
QMRK));
4764 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4765 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4766 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4767 const char first = rem.str[0];
4768 _c4dbgpf(
"seqimap[QMRK]: '{}'", _c4prc(first));
4772 _c4dbgp(
"seqimap[QMRK]: scanning single-quoted scalar");
4773 sc = _scan_scalar_squot();
4774 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
4775 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
4779 else if(first ==
'"')
4781 _c4dbgp(
"seqimap[QMRK]: scanning double-quoted scalar");
4782 sc = _scan_scalar_dquot();
4783 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4784 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4789 else if(_scan_scalar_plain_map_flow(&sc))
4791 _c4dbgp(
"seqimap[QMRK]: it's a scalar.");
4792 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
4793 m_evt_handler->set_key_scalar_plain(maybe_filtered);
4797 else if(first ==
'[')
4799 _c4dbgp(
"seqimap[QMRK]: start child seqflow");
4801 m_evt_handler->begin_seq_key_flow();
4803 _set_indentation(m_evt_handler->m_parent->indref);
4804 _line_progressed(1);
4805 goto seqimap_finish;
4807 else if(first ==
'{')
4809 _c4dbgp(
"seqimap[QMRK]: start child mapflow");
4811 m_evt_handler->begin_map_key_flow();
4813 _set_indentation(m_evt_handler->m_parent->indref);
4814 _line_progressed(1);
4815 goto seqimap_finish;
4817 else if(first ==
',' || first ==
']')
4819 _c4dbgp(
"seqimap[QMRK]: finish without key.");
4820 m_evt_handler->set_key_scalar_plain({});
4821 m_evt_handler->set_val_scalar_plain({});
4822 m_evt_handler->end_map();
4823 goto seqimap_finish;
4825 else if(first ==
'&')
4827 csubstr anchor = _scan_anchor();
4828 _c4dbgp(
"seqimap[QMRK]: anchor!");
4829 m_evt_handler->set_key_anchor(anchor);
4831 else if(first ==
'*')
4833 csubstr ref = _scan_ref_seq();
4834 _c4dbgp(
"seqimap[QMRK]: ref!");
4835 m_evt_handler->set_key_ref(ref);
4840 _c4err(
"parse error");
4843 else if(has_any(
RKCL))
4845 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4846 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4847 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4848 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKCL));
4849 const char first = rem.str[0];
4850 _c4dbgpf(
"seqimap[RKCL]: '{}'", _c4prc(first));
4853 _c4dbgp(
"seqimap[RKCL]: found ':'");
4855 _line_progressed(1);
4858 else if(first ==
',' || first ==
']')
4860 _c4dbgp(
"seqimap[RKCL]: found ','. finish without val");
4861 m_evt_handler->set_val_scalar_plain({});
4862 m_evt_handler->end_map();
4863 goto seqimap_finish;
4867 _c4err(
"parse error");
4872 _c4dbgt(
"seqimap: go again", 0);
4873 if(_finished_line())
4875 if(C4_LIKELY(!_finished_file()))
4883 _c4err(
"parse error");
4889 _c4dbgp(
"seqimap: finish");
4895 template<
class EventHandler>
4896 void ParseEngine<EventHandler>::_handle_seq_flow()
4899 _c4dbgpf(
"handle2_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4901 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4902 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ));
4903 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
FLOW));
4904 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT));
4905 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT));
4906 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos);
4908 _handle_flow_skip_whitespace();
4909 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4915 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4916 const char first = rem.str[0];
4920 _c4dbgp(
"seqflow[RVAL]: scanning single-quoted scalar");
4921 sc = _scan_scalar_squot();
4922 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
4923 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
4926 else if(first ==
'"')
4928 _c4dbgp(
"seqflow[RVAL]: scanning double-quoted scalar");
4929 sc = _scan_scalar_dquot();
4930 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4931 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4935 else if(_scan_scalar_plain_seq_flow(&sc))
4937 _c4dbgp(
"seqflow[RVAL]: it's a scalar.");
4938 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4939 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4942 else if(first ==
'[')
4944 _c4dbgp(
"seqflow[RVAL]: start child seqflow");
4946 m_evt_handler->begin_seq_val_flow();
4947 _set_indentation(m_evt_handler->m_parent->indref);
4949 _line_progressed(1);
4951 else if(first ==
'{')
4953 _c4dbgp(
"seqflow[RVAL]: start child mapflow");
4955 m_evt_handler->begin_map_val_flow();
4956 _set_indentation(m_evt_handler->m_parent->indref);
4958 _line_progressed(1);
4959 goto seqflow_finish;
4961 else if(first ==
']')
4963 _c4dbgp(
"seqflow[RVAL]: end!");
4964 _line_progressed(1);
4965 m_evt_handler->end_seq();
4966 goto seqflow_finish;
4968 else if(first ==
'*')
4970 csubstr ref = _scan_ref_seq();
4971 _c4dbgpf(
"seqflow[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
4972 m_evt_handler->set_val_ref(ref);
4975 else if(first ==
'&')
4977 csubstr anchor = _scan_anchor();
4978 _c4dbgpf(
"seqflow[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
4979 m_evt_handler->set_val_anchor(anchor);
4980 if(_maybe_scan_following_comma())
4982 _c4dbgp(
"seqflow[RVAL]: empty scalar!");
4983 m_evt_handler->set_val_scalar_plain({});
4984 m_evt_handler->add_sibling();
4987 else if(first ==
'!')
4989 csubstr tag = _scan_tag();
4990 _c4dbgpf(
"seqflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
4992 m_evt_handler->set_val_tag(tag);
4993 if(_maybe_scan_following_comma())
4995 _c4dbgp(
"seqflow[RVAL]: empty scalar!");
4996 m_evt_handler->set_val_scalar_plain({});
4997 m_evt_handler->add_sibling();
5000 else if(first ==
':')
5002 _c4dbgpf(
"seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5004 m_evt_handler->begin_map_val_flow();
5005 _set_indentation(m_evt_handler->m_parent->indref);
5006 m_evt_handler->set_key_scalar_plain({});
5008 _line_progressed(1);
5009 goto seqflow_finish;
5011 else if(first ==
'?')
5013 _c4dbgp(
"seqflow[RVAL]: start child mapflow, explicit key");
5015 m_was_inside_qmrk =
true;
5016 m_evt_handler->begin_map_val_flow();
5017 _set_indentation(m_evt_handler->m_parent->indref);
5019 _line_progressed(1);
5020 _maybe_skip_whitespace_tokens();
5021 goto seqflow_finish;
5025 _c4err(
"parse error");
5030 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
5031 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5032 const char first = rem.str[0];
5035 _c4dbgp(
"seqflow[RNXT]: expect next val");
5037 m_evt_handler->add_sibling();
5038 _line_progressed(1);
5040 else if(first ==
']')
5042 _c4dbgp(
"seqflow[RNXT]: end!");
5043 m_evt_handler->end_seq();
5044 _line_progressed(1);
5045 goto seqflow_finish;
5047 else if(first ==
':')
5049 _c4dbgpf(
"seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5050 m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5051 _set_indentation(m_evt_handler->m_parent->indref);
5052 _line_progressed(1);
5054 goto seqflow_finish;
5058 _c4err(
"parse error");
5063 _c4dbgt(
"seqflow: go again", 0);
5064 if(_finished_line())
5066 if(C4_LIKELY(!_finished_file()))
5074 _c4err(
"missing terminating ]");
5080 _c4dbgp(
"seqflow: finish");
5086 template<
class EventHandler>
5087 void ParseEngine<EventHandler>::_handle_map_flow()
5090 _c4dbgpf(
"handle2_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5092 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
5093 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
FLOW));
5095 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)));
5097 _handle_flow_skip_whitespace();
5098 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5104 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5105 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5106 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5107 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5108 const char first = rem.str[0];
5109 _c4dbgpf(
"mapflow[RKEY]: '{}'", first);
5113 _c4dbgp(
"mapflow[RKEY]: scanning single-quoted scalar");
5114 sc = _scan_scalar_squot();
5115 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5116 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5119 else if(first ==
'"')
5121 _c4dbgp(
"mapflow[RKEY]: scanning double-quoted scalar");
5122 sc = _scan_scalar_dquot();
5123 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5124 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5128 else if(_scan_scalar_plain_map_flow(&sc))
5130 _c4dbgp(
"mapflow[RKEY]: plain scalar");
5131 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5132 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5135 else if(first ==
'?')
5137 _c4dbgp(
"mapflow[RKEY]: explicit key");
5138 _line_progressed(1);
5140 _maybe_skip_whitespace_tokens();
5142 else if(first ==
':')
5144 _c4dbgp(
"mapflow[RKEY]: setting empty key");
5145 m_evt_handler->set_key_scalar_plain({});
5147 _line_progressed(1);
5148 _maybe_skip_whitespace_tokens();
5150 else if(first ==
'}')
5152 _c4dbgp(
"mapflow[RKEY]: end!");
5153 m_evt_handler->end_map();
5154 _line_progressed(1);
5155 goto mapflow_finish;
5157 else if(first ==
'&')
5159 csubstr anchor = _scan_anchor();
5160 _c4dbgpf(
"mapflow[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5161 m_evt_handler->set_key_anchor(anchor);
5163 else if(first ==
'*')
5165 csubstr ref = _scan_ref_map();
5166 _c4dbgpf(
"mapflow[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
5167 m_evt_handler->set_key_ref(ref);
5170 else if(first ==
'[')
5175 _c4dbgp(
"mapflow[RKEY]: start child seqflow (!)");
5177 m_evt_handler->begin_seq_key_flow();
5179 _set_indentation(m_evt_handler->m_parent->indref);
5180 _line_progressed(1);
5181 goto mapflow_finish;
5183 else if(first ==
'{')
5188 _c4dbgp(
"mapflow[RKEY]: start child mapflow (!)");
5190 m_evt_handler->begin_map_key_flow();
5192 _set_indentation(m_evt_handler->m_parent->indref);
5193 _line_progressed(1);
5196 else if(first ==
'!')
5198 csubstr tag = _scan_tag();
5199 _c4dbgpf(
"mapflow[RKEY]: tag! [{}]~~~{}~~~", tag.len, tag);
5201 m_evt_handler->set_key_tag(tag);
5205 _c4err(
"parse error");
5208 else if(has_any(
RKCL))
5210 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5211 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5212 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5213 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5214 const char first = rem.str[0];
5215 _c4dbgpf(
"mapflow[RKCL]: '{}'", first);
5218 _c4dbgp(
"mapflow[RKCL]: found the colon");
5220 _line_progressed(1);
5222 else if(first ==
'}')
5224 _c4dbgp(
"mapflow[RKCL]: end with missing val!");
5226 m_evt_handler->set_val_scalar_plain({});
5227 m_evt_handler->end_map();
5228 _line_progressed(1);
5229 goto mapflow_finish;
5231 else if(first ==
',')
5233 _c4dbgp(
"mapflow[RKCL]: got comma. val is missing");
5234 m_evt_handler->set_val_scalar_plain({});
5235 m_evt_handler->add_sibling();
5237 _line_progressed(1);
5241 _c4err(
"parse error");
5244 else if(has_any(
RVAL))
5246 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5247 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5248 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5249 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5250 const char first = rem.str[0];
5251 _c4dbgpf(
"mapflow[RVAL]: '{}'", first);
5255 _c4dbgp(
"mapflow[RVAL]: scanning single-quoted scalar");
5256 sc = _scan_scalar_squot();
5257 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5258 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5261 else if(first ==
'"')
5263 _c4dbgp(
"mapflow[RVAL]: scanning double-quoted scalar");
5264 sc = _scan_scalar_dquot();
5265 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5266 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5270 else if(_scan_scalar_plain_map_flow(&sc))
5272 _c4dbgp(
"mapflow[RVAL]: plain scalar.");
5273 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5274 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5277 else if(first ==
'[')
5279 _c4dbgp(
"mapflow[RVAL]: start val seqflow");
5281 m_evt_handler->begin_seq_val_flow();
5282 _set_indentation(m_evt_handler->m_parent->indref);
5284 _line_progressed(1);
5285 goto mapflow_finish;
5287 else if(first ==
'{')
5289 _c4dbgp(
"mapflow[RVAL]: start val mapflow");
5291 m_evt_handler->begin_map_val_flow();
5292 _set_indentation(m_evt_handler->m_parent->indref);
5294 _line_progressed(1);
5297 else if(first ==
'}')
5299 _c4dbgp(
"mapflow[RVAL]: end!");
5300 m_evt_handler->set_val_scalar_plain({});
5301 m_evt_handler->end_map();
5302 _line_progressed(1);
5303 goto mapflow_finish;
5305 else if(first ==
'*')
5307 csubstr ref = _scan_ref_map();
5308 _c4dbgpf(
"mapflow[RVAL]: key ref! [{}]~~~{}~~~", ref.len, ref);
5309 m_evt_handler->set_val_ref(ref);
5312 else if(first ==
'&')
5314 csubstr anchor = _scan_anchor();
5315 _c4dbgpf(
"mapflow[RVAL]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5316 m_evt_handler->set_val_anchor(anchor);
5318 else if(first ==
'!')
5320 csubstr tag = _scan_tag();
5321 _c4dbgpf(
"mapflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5323 m_evt_handler->set_val_tag(tag);
5327 _c4err(
"parse error");
5330 else if(has_any(
RNXT))
5332 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5333 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5334 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5335 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5336 _c4dbgpf(
"mapflow[RNXT]: '{}'", rem.str[0]);
5337 if(rem.begins_with(
','))
5339 _c4dbgp(
"mapflow[RNXT]: expect next keyval");
5340 m_evt_handler->add_sibling();
5342 _line_progressed(1);
5344 else if(rem.begins_with(
'}'))
5346 _c4dbgp(
"mapflow[RNXT]: end!");
5347 m_evt_handler->end_map();
5348 _line_progressed(1);
5349 goto mapflow_finish;
5353 _c4err(
"parse error");
5356 else if(has_any(
QMRK))
5358 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5359 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5360 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5361 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5362 const char first = rem.str[0];
5363 _c4dbgpf(
"mapflow[QMRK]: '{}'", first);
5367 _c4dbgp(
"mapflow[QMRK]: scanning single-quoted scalar");
5368 sc = _scan_scalar_squot();
5369 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5370 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5373 else if(first ==
'"')
5375 _c4dbgp(
"mapflow[QMRK]: scanning double-quoted scalar");
5376 sc = _scan_scalar_dquot();
5377 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5378 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5382 else if(_scan_scalar_plain_map_flow(&sc))
5384 _c4dbgp(
"mapflow[QMRK]: plain scalar");
5385 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5386 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5389 else if(first ==
':')
5391 _c4dbgp(
"mapflow[QMRK]: setting empty key");
5392 m_evt_handler->set_key_scalar_plain({});
5394 _line_progressed(1);
5395 _maybe_skip_whitespace_tokens();
5397 else if(first ==
'}')
5399 _c4dbgp(
"mapflow[QMRK]: end!");
5400 m_evt_handler->set_key_scalar_plain({});
5401 m_evt_handler->set_val_scalar_plain({});
5402 m_evt_handler->end_map();
5403 _line_progressed(1);
5404 goto mapflow_finish;
5406 else if(first ==
'&')
5408 csubstr anchor = _scan_anchor();
5409 _c4dbgpf(
"mapflow[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5410 m_evt_handler->set_key_anchor(anchor);
5412 else if(first ==
'*')
5414 csubstr ref = _scan_ref_map();
5415 _c4dbgpf(
"mapflow[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
5416 m_evt_handler->set_key_ref(ref);
5419 else if(first ==
'[')
5424 _c4dbgp(
"mapflow[QMRK]: start child seqflow (!)");
5426 m_evt_handler->begin_seq_key_flow();
5428 _set_indentation(m_evt_handler->m_parent->indref);
5429 _line_progressed(1);
5430 goto mapflow_finish;
5432 else if(first ==
'{')
5437 _c4dbgp(
"mapflow[QMRK]: start child mapflow (!)");
5439 m_evt_handler->begin_map_key_flow();
5440 _set_indentation(m_evt_handler->m_parent->indref);
5442 _line_progressed(1);
5445 else if(first ==
'!')
5447 csubstr tag = _scan_tag();
5448 _c4dbgpf(
"mapflow[QMRK]: tag! [{}]~~~{}~~~", tag.len, tag);
5450 m_evt_handler->set_key_tag(tag);
5454 _c4err(
"parse error");
5459 _c4dbgt(
"mapflow: go again", 0);
5460 if(_finished_line())
5462 if(C4_LIKELY(!_finished_file()))
5470 _c4err(
"missing terminating }");
5476 _c4dbgp(
"mapflow: finish");
5482 template<
class EventHandler>
5483 void ParseEngine<EventHandler>::_handle_seq_block()
5486 _c4dbgpf(
"handle2_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5488 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ));
5489 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
BLCK));
5490 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT));
5491 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RVAL) + has_any(
RNXT)));
5493 _maybe_skip_comment();
5494 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5500 _c4dbgpf(
"seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
5501 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5502 if(m_evt_handler->m_curr->at_line_beginning())
5504 _c4dbgpf(
"seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5505 if(m_evt_handler->m_curr->indentation_ge())
5507 _c4dbgpf(
"seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
5508 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5509 rem = m_evt_handler->m_curr->line_contents.rem;
5513 else if(m_evt_handler->m_curr->indentation_lt())
5515 _c4dbgp(
"seqblck[RVAL]: smaller indentation!");
5516 _handle_indentation_pop_from_block_seq();
5517 goto seqblck_finish;
5519 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
5521 _c4dbgp(
"seqblck[RVAL]: empty line!");
5522 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5526 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
5534 if(rem.str[0] ==
' ')
5536 if(_handle_indentation_from_annotations())
5538 _c4dbgp(
"seqblck[RVAL]: annotations!");
5539 rem = m_evt_handler->m_curr->line_contents.rem;
5546 _RYML_CB_ASSERT(callbacks(), rem.len);
5547 _c4dbgpf(
"seqblck[RVAL]: '{}' node_id={}", rem.str[0], m_evt_handler->m_curr->node_id);
5548 const char first = rem.str[0];
5549 const size_t startline = m_evt_handler->m_curr->pos.line;
5552 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
5556 _c4dbgp(
"seqblck[RVAL]: single-quoted scalar");
5557 sc = _scan_scalar_squot();
5558 if(!_maybe_scan_following_colon())
5560 _c4dbgp(
"seqblck[RVAL]: set as val");
5561 _handle_annotations_before_blck_val_scalar();
5562 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5563 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5568 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
5570 _handle_annotations_before_start_mapblck(startline);
5571 m_evt_handler->begin_map_val_block();
5572 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5573 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5574 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5576 _maybe_skip_whitespace_tokens();
5577 goto seqblck_finish;
5580 else if(first ==
'"')
5582 _c4dbgp(
"seqblck[RVAL]: double-quoted scalar");
5583 sc = _scan_scalar_dquot();
5584 if(!_maybe_scan_following_colon())
5586 _c4dbgp(
"seqblck[RVAL]: set as val");
5587 _handle_annotations_before_blck_val_scalar();
5588 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5589 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5594 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
5596 _handle_annotations_before_start_mapblck(startline);
5597 m_evt_handler->begin_map_val_block();
5598 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5599 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5600 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5602 _maybe_skip_whitespace_tokens();
5603 goto seqblck_finish;
5609 else if(first ==
'|')
5611 _c4dbgp(
"seqblck[RVAL]: block-literal scalar");
5613 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5614 _handle_annotations_before_blck_val_scalar();
5615 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
5616 m_evt_handler->set_val_scalar_literal(maybe_filtered);
5619 else if(first ==
'>')
5621 _c4dbgp(
"seqblck[RVAL]: block-folded scalar");
5623 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5624 _handle_annotations_before_blck_val_scalar();
5625 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
5626 m_evt_handler->set_val_scalar_folded(maybe_filtered);
5629 else if(_scan_scalar_plain_seq_blck(&sc))
5631 _c4dbgp(
"seqblck[RVAL]: plain scalar.");
5632 if(!_maybe_scan_following_colon())
5634 _c4dbgp(
"seqblck[RVAL]: set as val");
5635 _handle_annotations_before_blck_val_scalar();
5636 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5637 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5642 if(startindent > m_evt_handler->m_curr->indref)
5644 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
5646 _handle_annotations_before_start_mapblck(startline);
5647 m_evt_handler->begin_map_val_block();
5648 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5649 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5650 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5652 _maybe_skip_whitespace_tokens();
5653 goto seqblck_finish;
5655 else if(m_evt_handler->m_parent && m_evt_handler->m_parent->indref == startindent && has_any(
RMAP|
BLCK, m_evt_handler->m_parent))
5657 _c4dbgp(
"seqblck[RVAL]: empty val + end indentless seq + set key");
5658 m_evt_handler->set_val_scalar_plain({});
5659 m_evt_handler->end_seq();
5660 m_evt_handler->add_sibling();
5661 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5662 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5664 _maybe_skip_whitespace_tokens();
5665 goto seqblck_finish;
5669 _c4err(
"parse error");
5673 else if(first ==
'[')
5675 _c4dbgp(
"seqblck[RVAL]: start child seqflow");
5677 m_evt_handler->begin_seq_val_flow();
5679 _line_progressed(1);
5680 _set_indentation(m_evt_handler->m_parent->indref + 1u);
5681 goto seqblck_finish;
5683 else if(first ==
'{')
5685 _c4dbgp(
"seqblck[RVAL]: start child mapflow");
5687 _handle_annotations_before_blck_val_scalar();
5688 m_evt_handler->begin_map_val_flow();
5690 _line_progressed(1);
5691 _set_indentation(m_evt_handler->m_parent->indref + 1u);
5692 goto seqblck_finish;
5694 else if(first ==
'-')
5696 if(startindent == m_evt_handler->m_curr->indref)
5698 _c4dbgp(
"seqblck[RVAL]: prev val was empty");
5699 _handle_annotations_before_blck_val_scalar();
5700 m_evt_handler->set_val_scalar_plain({});
5702 m_evt_handler->add_sibling();
5706 _c4dbgp(
"seqblck[RVAL]: start child seqblck");
5707 _RYML_CB_ASSERT(this->callbacks(), startindent > m_evt_handler->m_curr->indref);
5709 _handle_annotations_before_blck_val_scalar();
5710 m_evt_handler->begin_seq_val_block();
5712 _save_indentation();
5715 _line_progressed(1);
5716 _maybe_skip_whitespace_tokens();
5718 else if(first ==
':')
5720 _c4dbgp(
"seqblck[RVAL]: start child mapblck with empty key");
5722 _handle_annotations_before_start_mapblck(startline);
5723 m_evt_handler->begin_map_val_block();
5724 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5725 m_evt_handler->set_key_scalar_plain({});
5727 _line_progressed(1);
5728 _maybe_skip_whitespace_tokens();
5729 goto seqblck_finish;
5731 else if(first ==
'&')
5733 const csubstr anchor = _scan_anchor();
5734 _c4dbgpf(
"seqblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5737 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
5739 else if(first ==
'*')
5741 csubstr ref = _scan_ref_seq();
5742 _c4dbgpf(
"seqblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5743 if(!_maybe_scan_following_colon())
5745 _c4dbgp(
"seqblck[RVAL]: set ref as val!");
5746 _handle_annotations_before_blck_val_scalar();
5747 m_evt_handler->set_val_ref(ref);
5752 _c4dbgp(
"seqblck[RVAL]: ref is key of map");
5754 _handle_annotations_before_start_mapblck(startline);
5755 m_evt_handler->begin_map_val_block();
5756 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5757 m_evt_handler->set_key_ref(ref);
5759 _set_indentation(startindent);
5760 _maybe_skip_whitespace_tokens();
5761 goto seqblck_finish;
5764 else if(first ==
'!')
5766 csubstr tag = _scan_tag();
5767 _c4dbgpf(
"seqblck[RVAL]: val tag! [{}]~~~{}~~~", tag.len, tag);
5770 _add_annotation(&m_pending_tags, tag, startindent, startline);
5772 else if(first ==
'?')
5774 _c4dbgp(
"seqblck[RVAL]: start child mapblck, explicit key");
5776 m_was_inside_qmrk =
true;
5777 m_evt_handler->begin_map_val_block();
5779 _save_indentation();
5780 _line_progressed(1);
5781 _maybe_skip_whitespace_tokens();
5782 goto seqblck_finish;
5786 _c4err(
"parse error");
5791 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
5792 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5796 _c4dbgpf(
"seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5797 if(C4_UNLIKELY(!_at_line_begin()))
5798 _c4err(
"parse error");
5799 if(m_evt_handler->m_curr->indentation_ge())
5801 _c4dbgpf(
"seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
5802 _line_progressed(m_evt_handler->m_curr->indref);
5803 _maybe_skip_whitespace_tokens();
5804 rem = m_evt_handler->m_curr->line_contents.rem;
5808 else if(m_evt_handler->m_curr->indentation_lt())
5810 _c4dbgp(
"seqblck[RNXT]: smaller indentation!");
5811 _handle_indentation_pop_from_block_seq();
5814 _c4dbgp(
"seqblck[RNXT]: still seqblck!");
5815 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
5816 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5817 rem = m_evt_handler->m_curr->line_contents.rem;
5823 _c4dbgp(
"seqblck[RNXT]: no longer seqblck!");
5824 goto seqblck_finish;
5827 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
5829 _c4dbgpf(
"seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
5830 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5831 rem = m_evt_handler->m_curr->line_contents.rem;
5838 const char first = rem.str[0];
5839 _c4dbgpf(
"seqblck[RNXT]: '{}' node_id={}", first, m_evt_handler->m_curr->node_id);
5842 if(m_evt_handler->m_curr->indref > 0 || m_evt_handler->m_curr->line_contents.indentation > 0 || !_is_doc_begin_token(rem))
5844 _c4dbgp(
"seqblck[RNXT]: expect next val");
5846 m_evt_handler->add_sibling();
5847 _line_progressed(1);
5848 _maybe_skip_whitespace_tokens();
5852 _c4dbgp(
"seqblck[RNXT]: start doc");
5853 _start_doc_suddenly();
5854 _line_progressed(3);
5855 _maybe_skip_whitespace_tokens();
5856 goto seqblck_finish;
5859 else if(first ==
':')
5865 auto const *C4_RESTRICT prev_state = m_evt_handler->m_parent;
5866 if(C4_LIKELY(prev_state && (prev_state->flags &
RMAP)))
5868 _c4dbgp(
"seqblck[RNXT]: actually this seq was '?' key of parent map");
5869 m_evt_handler->end_seq();
5870 goto seqblck_finish;
5874 _c4err(
"parse error");
5877 else if(first ==
'.')
5879 _c4dbgp(
"seqblck[RNXT]: maybe doc?");
5880 csubstr rs = rem.sub(1);
5881 if(rs ==
".." || rs.begins_with(
".. "))
5883 _c4dbgp(
"seqblck[RNXT]: end+start doc");
5884 _end_doc_suddenly();
5885 _line_progressed(3);
5886 _maybe_skip_whitespace_tokens();
5887 goto seqblck_finish;
5891 _c4err(
"parse error");
5900 for(
auto const& s : m_evt_handler->m_stack)
5902 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
5905 if(m_evt_handler->m_parent && has_all(
RMAP|
BLCK, m_evt_handler->m_parent) && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
5907 _c4dbgpf(
"seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
5908 _RYML_CB_ASSERT(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent);
5909 _handle_indentation_pop(m_evt_handler->m_parent);
5910 _RYML_CB_ASSERT(this->callbacks(), has_all(
RMAP|
BLCK));
5911 m_evt_handler->add_sibling();
5913 goto seqblck_finish;
5917 _c4err(
"parse error");
5923 _c4dbgt(
"seqblck: go again", 0);
5924 if(_finished_line())
5928 if(_finished_file())
5930 _c4dbgp(
"seqblck: finish!");
5932 goto seqblck_finish;
5939 _c4dbgp(
"seqblck: finish");
5945 template<
class EventHandler>
5946 void ParseEngine<EventHandler>::_handle_map_block()
5949 _c4dbgpf(
"handle2_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5952 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
5953 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
BLCK));
5955 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)));
5957 _maybe_skip_comment();
5958 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5964 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5965 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5966 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5967 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5971 if(m_evt_handler->m_curr->at_line_beginning())
5973 if(m_evt_handler->m_curr->indentation_eq())
5975 _c4dbgpf(
"mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
5976 _line_progressed(m_evt_handler->m_curr->indref);
5977 rem = m_evt_handler->m_curr->line_contents.rem;
5981 else if(m_evt_handler->m_curr->indentation_lt())
5983 _c4dbgp(
"mapblck[RKEY]: smaller indentation!");
5984 _handle_indentation_pop_from_block_map();
5985 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5988 _c4dbgp(
"mapblck[RKEY]: still mapblck!");
5989 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY));
5990 rem = m_evt_handler->m_curr->line_contents.rem;
5996 _c4dbgp(
"mapblck[RKEY]: no longer mapblck!");
5997 goto mapblck_finish;
6002 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt());
6003 _c4err(
"invalid indentation");
6009 const char first = rem.str[0];
6010 const size_t startline = m_evt_handler->m_curr->pos.line;
6011 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6012 _c4dbgpf(
"mapblck[RKEY]: '{}'", first);
6016 _c4dbgp(
"mapblck[RKEY]: scanning single-quoted scalar");
6017 sc = _scan_scalar_squot();
6018 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6019 _handle_annotations_before_blck_key_scalar();
6020 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6022 if(!_maybe_scan_following_colon())
6023 _c4err(
"could not find ':' colon after key");
6024 _maybe_skip_whitespace_tokens();
6026 else if(first ==
'"')
6028 _c4dbgp(
"mapblck[RKEY]: scanning double-quoted scalar");
6029 sc = _scan_scalar_dquot();
6030 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6031 _handle_annotations_before_blck_key_scalar();
6032 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6034 if(!_maybe_scan_following_colon())
6035 _c4err(
"could not find ':' colon after key");
6036 _maybe_skip_whitespace_tokens();
6040 else if(C4_UNLIKELY(first ==
'|'))
6042 _c4err(
"block literal keys must be enclosed in '?'");
6044 else if(C4_UNLIKELY(first ==
'>'))
6046 _c4err(
"block literal keys must be enclosed in '?'");
6048 else if(_scan_scalar_plain_map_blck(&sc))
6050 _c4dbgp(
"mapblck[RKEY]: plain scalar");
6051 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6052 _handle_annotations_before_blck_key_scalar();
6053 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6055 if(!_maybe_scan_following_colon())
6056 _c4err(
"could not find ':' colon after key");
6057 _maybe_skip_whitespace_tokens();
6059 else if(first ==
'?')
6061 _c4dbgp(
"mapblck[RKEY]: key token!");
6063 _line_progressed(1);
6064 _maybe_skip_whitespace_tokens();
6065 m_was_inside_qmrk =
true;
6068 else if(first ==
':')
6070 _c4dbgp(
"mapblck[RKEY]: setting empty key");
6071 _handle_annotations_before_blck_key_scalar();
6072 m_evt_handler->set_key_scalar_plain({});
6074 _line_progressed(1);
6075 _maybe_skip_whitespace_tokens();
6077 else if(first ==
'*')
6079 csubstr ref = _scan_ref_map();
6080 _c4dbgpf(
"mapblck[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
6081 _handle_annotations_before_blck_key_scalar();
6082 m_evt_handler->set_key_ref(ref);
6084 if(!_maybe_scan_following_colon())
6085 _c4err(
"could not find ':' colon after key");
6086 _maybe_skip_whitespace_tokens();
6088 else if(first ==
'&')
6090 csubstr anchor = _scan_anchor();
6091 _c4dbgpf(
"mapblck[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
6092 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6094 else if(first ==
'!')
6096 csubstr tag = _scan_tag();
6097 _c4dbgpf(
"mapblck[RKEY]: key tag! [{}]~~~{}~~~", tag.len, tag);
6098 _add_annotation(&m_pending_tags, tag, startindent, startline);
6100 else if(first ==
'[')
6105 _c4dbgp(
"mapblck[RKEY]: start child seqflow (!)");
6107 _handle_annotations_before_blck_key_scalar();
6108 m_evt_handler->begin_seq_key_flow();
6110 _line_progressed(1);
6111 _set_indentation(startindent);
6112 goto mapblck_finish;
6114 else if(first ==
'{')
6119 _c4dbgp(
"mapblck[RKEY]: start child mapflow (!)");
6121 _handle_annotations_before_blck_key_scalar();
6122 m_evt_handler->begin_map_key_flow();
6124 _line_progressed(1);
6125 _set_indentation(startindent);
6126 goto mapblck_finish;
6128 else if(first ==
'-')
6130 _c4dbgp(
"mapblck[RKEY]: maybe doc?");
6131 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(rem))
6133 _c4dbgp(
"mapblck[RKEY]: end+start doc");
6134 _start_doc_suddenly();
6135 _line_progressed(3);
6136 _maybe_skip_whitespace_tokens();
6137 goto mapblck_finish;
6141 _c4err(
"parse error");
6144 else if(first ==
'.')
6146 _c4dbgp(
"mapblck[RKEY]: maybe end doc?");
6147 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(rem))
6149 _c4dbgp(
"mapblck[RKEY]: end doc");
6150 _end_doc_suddenly();
6151 _line_progressed(3);
6152 _maybe_skip_whitespace_tokens();
6153 goto mapblck_finish;
6157 _c4err(
"parse error");
6161 else if(first ==
'\t')
6163 _c4dbgp(
"mapblck[RKEY]: skip tabs");
6164 _maybe_skipchars(
'\t');
6168 _c4err(
"parse error");
6171 else if(has_any(
RKCL))
6173 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6174 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6175 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6176 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6180 if(m_evt_handler->m_curr->at_line_beginning())
6182 if(m_evt_handler->m_curr->indentation_eq())
6184 _c4dbgpf(
"mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
6185 _line_progressed(m_evt_handler->m_curr->indref);
6186 rem = m_evt_handler->m_curr->line_contents.rem;
6190 else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
6192 _c4err(
"invalid indentation");
6195 const char first = rem.str[0];
6196 _c4dbgpf(
"mapblck[RKCL]: '{}'", first);
6199 _c4dbgp(
"mapblck[RKCL]: found the colon");
6201 _line_progressed(1);
6202 _maybe_skip_whitespace_tokens();
6204 else if(first ==
'?')
6206 _c4dbgp(
"mapblck[RKCL]: got '?'. val was empty");
6207 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_was_inside_qmrk);
6208 m_evt_handler->set_val_scalar_plain({});
6209 m_evt_handler->add_sibling();
6211 _line_progressed(1);
6212 _maybe_skip_whitespace_tokens();
6214 else if(first ==
'-')
6216 if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6218 _c4dbgp(
"mapblck[RKCL]: end+start doc");
6219 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6220 _start_doc_suddenly();
6221 _line_progressed(3);
6222 _maybe_skip_whitespace_tokens();
6223 goto mapblck_finish;
6227 _c4err(
"parse error");
6230 else if(first ==
'.')
6232 _c4dbgp(
"mapblck[RKCL]: maybe end doc?");
6233 csubstr rs = rem.sub(1);
6234 if(rs ==
".." || rs.begins_with(
".. "))
6236 _c4dbgp(
"mapblck[RKCL]: end+start doc");
6237 _end_doc_suddenly();
6238 _line_progressed(3);
6239 goto mapblck_finish;
6243 _c4err(
"parse error");
6246 else if(m_was_inside_qmrk)
6248 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_eq());
6249 _c4dbgp(
"mapblck[RKCL]: missing :");
6250 m_evt_handler->set_val_scalar_plain({});
6251 m_evt_handler->add_sibling();
6252 m_was_inside_qmrk =
false;
6257 _c4err(
"parse error");
6260 else if(has_any(
RVAL))
6262 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6263 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6264 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6265 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6269 if(m_evt_handler->m_curr->at_line_beginning())
6271 _c4dbgpf(
"mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6272 m_evt_handler->m_curr->more_indented =
false;
6273 if(m_evt_handler->m_curr->indref ==
npos)
6275 _c4dbgpf(
"mapblck[RVAL]: setting indentation={}", m_evt_handler->m_parent->indref);
6276 _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6277 _line_progressed(m_evt_handler->m_curr->indref);
6278 rem = m_evt_handler->m_curr->line_contents.rem;
6282 else if(m_evt_handler->m_curr->indentation_eq())
6284 _c4dbgp(
"mapblck[RVAL]: skip indentation!");
6285 _line_progressed(m_evt_handler->m_curr->indref);
6286 rem = m_evt_handler->m_curr->line_contents.rem;
6314 else if(m_evt_handler->m_curr->indentation_gt())
6316 _c4dbgp(
"mapblck[RVAL]: more indented!");
6317 m_evt_handler->m_curr->more_indented =
true;
6318 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6319 rem = m_evt_handler->m_curr->line_contents.rem;
6323 else if(m_evt_handler->m_curr->indentation_lt())
6325 _c4dbgp(
"mapblck[RVAL]: smaller indentation!");
6326 _handle_indentation_pop_from_block_map();
6329 _c4dbgp(
"mapblck[RVAL]: still mapblck!");
6330 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6333 _c4dbgp(
"mapblck[RVAL]: speculatively expect next keyval");
6334 m_evt_handler->add_sibling();
6341 _c4dbgp(
"mapblck[RVAL]: no longer mapblck!");
6342 goto mapblck_finish;
6345 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
6347 _c4dbgp(
"mapblck[RVAL]: empty line!");
6348 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6355 const char first = rem.str[0];
6356 const size_t startline = m_evt_handler->m_curr->pos.line;
6357 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6358 _c4dbgpf(
"mapblck[RVAL]: '{}'", first);
6362 _c4dbgp(
"mapblck[RVAL]: scanning single-quoted scalar");
6363 sc = _scan_scalar_squot();
6364 if(!_maybe_scan_following_colon())
6366 _c4dbgp(
"mapblck[RVAL]: set as val");
6367 _handle_annotations_before_blck_val_scalar();
6368 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6369 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6374 if(startindent != m_evt_handler->m_curr->indref)
6376 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6377 _handle_annotations_before_start_mapblck(startline);
6379 m_evt_handler->begin_map_val_block();
6380 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6381 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6382 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6383 _maybe_skip_whitespace_tokens();
6384 _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6390 _c4dbgp(
"mapblck[RVAL]: prev val empty+this is a key");
6391 m_evt_handler->set_val_scalar_plain({});
6392 m_evt_handler->add_sibling();
6393 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6394 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6396 _maybe_skip_whitespace_tokens();
6400 else if(first ==
'"')
6402 _c4dbgp(
"mapblck[RVAL]: scanning double-quoted scalar");
6403 sc = _scan_scalar_dquot();
6404 if(!_maybe_scan_following_colon())
6406 _c4dbgp(
"mapblck[RVAL]: set as val");
6407 _handle_annotations_before_blck_val_scalar();
6408 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6409 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6414 if(startindent != m_evt_handler->m_curr->indref)
6416 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6417 _handle_annotations_before_start_mapblck(startline);
6419 m_evt_handler->begin_map_val_block();
6420 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6421 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6422 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6423 _maybe_skip_whitespace_tokens();
6424 _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6430 _c4dbgp(
"mapblck[RVAL]: prev val empty+this is a key");
6431 m_evt_handler->set_val_scalar_plain({});
6432 m_evt_handler->add_sibling();
6433 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6434 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6436 _maybe_skip_whitespace_tokens();
6442 else if(first ==
'|')
6444 _c4dbgp(
"mapblck[RVAL]: scanning block-literal scalar");
6446 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6447 _handle_annotations_before_blck_val_scalar();
6448 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6449 m_evt_handler->set_val_scalar_literal(maybe_filtered);
6452 else if(first ==
'>')
6454 _c4dbgp(
"mapblck[RVAL]: scanning block-folded scalar");
6456 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6457 _handle_annotations_before_blck_val_scalar();
6458 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6459 m_evt_handler->set_val_scalar_folded(maybe_filtered);
6462 else if(_scan_scalar_plain_map_blck(&sc))
6464 _c4dbgp(
"mapblck[RVAL]: plain scalar.");
6465 if(!_maybe_scan_following_colon())
6467 _c4dbgp(
"mapblck[RVAL]: set as val");
6468 _handle_annotations_before_blck_val_scalar();
6469 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6470 m_evt_handler->set_val_scalar_plain(maybe_filtered);
6475 if(startindent != m_evt_handler->m_curr->indref)
6477 _c4dbgpf(
"mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
6479 _handle_annotations_before_start_mapblck(startline);
6480 m_evt_handler->begin_map_val_block();
6481 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6482 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6483 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6484 _maybe_skip_whitespace_tokens();
6485 _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6491 _c4dbgp(
"mapblck[RVAL]: prev val empty+this is a key");
6492 _handle_annotations_before_blck_val_scalar();
6493 m_evt_handler->set_val_scalar_plain({});
6494 m_evt_handler->add_sibling();
6495 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6496 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6498 _maybe_skip_whitespace_tokens();
6502 else if(first ==
'-')
6506 _c4dbgp(
"mapblck[RVAL]: start val seqblck");
6508 _handle_annotations_before_blck_val_scalar();
6509 m_evt_handler->begin_seq_val_block();
6511 _set_indentation(startindent);
6512 _line_progressed(1);
6513 _maybe_skip_whitespace_tokens();
6514 goto mapblck_finish;
6516 else if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6518 _c4dbgp(
"mapblck[RVAL]: end+start doc");
6519 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6520 _start_doc_suddenly();
6521 _line_progressed(3);
6522 _maybe_skip_whitespace_tokens();
6523 goto mapblck_finish;
6527 _c4err(
"parse error");
6530 else if(first ==
'[')
6532 _c4dbgp(
"mapblck[RVAL]: start val seqflow");
6534 _handle_annotations_before_blck_val_scalar();
6535 m_evt_handler->begin_seq_val_flow();
6537 _set_indentation(m_evt_handler->m_curr->indref + 1u);
6538 _line_progressed(1);
6539 goto mapblck_finish;
6541 else if(first ==
'{')
6543 _c4dbgp(
"mapblck[RVAL]: start val mapflow");
6545 _handle_annotations_before_blck_val_scalar();
6546 m_evt_handler->begin_map_val_flow();
6548 m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
6549 _set_indentation(m_evt_handler->m_curr->indref + 1u);
6550 _line_progressed(1);
6551 goto mapblck_finish;
6553 else if(first ==
'*')
6555 csubstr ref = _scan_ref_map();
6556 _c4dbgpf(
"mapblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
6557 if(startindent == m_evt_handler->m_curr->indref)
6559 _c4dbgpf(
"mapblck[RVAL]: same indentation {}", startindent);
6560 m_evt_handler->set_val_ref(ref);
6565 _c4dbgpf(
"mapblck[RVAL]: larger indentation {}>{}", startindent, m_evt_handler->m_curr->indref);
6566 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref);
6567 if(_maybe_scan_following_colon())
6569 _c4dbgp(
"mapblck[RVAL]: start child map, block");
6571 _handle_annotations_before_blck_val_scalar();
6572 m_evt_handler->begin_map_val_block();
6573 m_evt_handler->set_key_ref(ref);
6574 _set_indentation(startindent);
6580 _c4dbgp(
"mapblck[RVAL]: was val ref");
6581 _handle_annotations_before_blck_val_scalar();
6582 m_evt_handler->set_val_ref(ref);
6586 _maybe_skip_whitespace_tokens();
6588 else if(first ==
'&')
6590 csubstr anchor = _scan_anchor();
6591 _c4dbgpf(
"mapblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
6592 if(startindent == m_evt_handler->m_curr->indref)
6594 _c4dbgp(
"mapblck[RVAL]: anchor for next key. val is missing!");
6595 m_evt_handler->set_val_scalar_plain({});
6596 m_evt_handler->add_sibling();
6601 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6603 else if(first ==
'!')
6605 csubstr tag = _scan_tag();
6606 _c4dbgpf(
"mapblck[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
6607 if(startindent == m_evt_handler->m_curr->indref)
6609 _c4dbgp(
"mapblck[RVAL]: tag for next key. val is missing!");
6610 _handle_annotations_before_blck_val_scalar();
6611 m_evt_handler->set_val_scalar_plain({});
6612 m_evt_handler->add_sibling();
6617 _add_annotation(&m_pending_tags, tag, startindent, startline);
6619 else if(first ==
'?')
6621 if(startindent == m_evt_handler->m_curr->indref)
6623 _c4dbgp(
"mapblck[RVAL]: got '?'. val was empty");
6624 _handle_annotations_before_blck_val_scalar();
6625 m_evt_handler->set_val_scalar_plain({});
6626 m_evt_handler->add_sibling();
6629 else if(startindent > m_evt_handler->m_curr->indref)
6631 _c4dbgp(
"mapblck[RVAL]: start val mapblck");
6633 _handle_annotations_before_blck_val_scalar();
6634 m_evt_handler->begin_map_val_block();
6636 _set_indentation(startindent);
6640 _c4err(
"parse error");
6642 m_was_inside_qmrk =
true;
6643 _line_progressed(1);
6644 _maybe_skip_whitespace_tokens();
6647 else if(first ==
':')
6649 if(startindent == m_evt_handler->m_curr->indref)
6651 _c4dbgp(
"mapblck[RVAL]: got ':'. val was empty, next key as well");
6652 m_evt_handler->set_val_scalar_plain({});
6653 m_evt_handler->add_sibling();
6654 m_evt_handler->set_key_scalar_plain({});
6655 _line_progressed(1);
6656 _maybe_skip_whitespace_tokens();
6661 _c4err(
"parse error");
6664 else if(first ==
'.')
6666 _c4dbgp(
"mapblck[RVAL]: maybe doc?");
6667 csubstr rs = rem.sub(1);
6668 if(rs ==
".." || rs.begins_with(
".. "))
6670 _c4dbgp(
"seqblck[RVAL]: end doc expl");
6671 _end_doc_suddenly();
6672 _line_progressed(3);
6673 _maybe_skip_whitespace_tokens();
6674 goto mapblck_finish;
6678 _c4err(
"parse error");
6682 else if(first ==
'\t')
6684 _c4dbgp(
"mapblck[RVAL]: skip tabs");
6685 _maybe_skipchars(
'\t');
6689 _c4err(
"parse error");
6692 else if(has_any(
RNXT))
6694 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6695 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6696 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6697 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6701 if(m_evt_handler->m_curr->at_line_beginning())
6703 _c4dbgpf(
"mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6704 if(m_evt_handler->m_curr->indentation_eq())
6706 _c4dbgpf(
"mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6707 _line_progressed(m_evt_handler->m_curr->indref);
6708 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
6709 m_evt_handler->add_sibling();
6713 else if(m_evt_handler->m_curr->indentation_lt())
6715 _c4dbgp(
"mapblck[RNXT]: smaller indentation!");
6716 _handle_indentation_pop_from_block_map();
6719 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6722 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
6723 m_evt_handler->add_sibling();
6730 goto mapblck_finish;
6737 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
6738 const char first = rem.str[0];
6739 _c4dbgpf(
"mapblck[RNXT]: '{}'", _c4prc(first));
6742 if(m_evt_handler->m_curr->more_indented)
6744 _c4dbgp(
"mapblck[RNXT]: start child block map");
6745 C4_NOT_IMPLEMENTED();
6747 _line_progressed(1);
6748 _set_indentation(m_evt_handler->m_curr->scalar_col);
6749 m_evt_handler->m_curr->more_indented =
false;
6754 _c4err(
"parse error");
6757 else if(first ==
' ')
6759 _c4dbgp(
"mapblck[RNXT]: skip spaces");
6760 _maybe_skip_whitespace_tokens();
6764 _c4err(
"parse error");
6767 else if(has_any(
QMRK))
6769 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6770 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6771 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6772 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6776 if(m_evt_handler->m_curr->at_line_beginning())
6778 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation !=
npos);
6779 if(m_evt_handler->m_curr->indentation_eq())
6781 _c4dbgpf(
"mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref);
6782 _line_progressed(m_evt_handler->m_curr->indref);
6783 rem = m_evt_handler->m_curr->line_contents.rem;
6787 else if(m_evt_handler->m_curr->indentation_lt())
6789 _c4dbgp(
"mapblck[QMRK]: smaller indentation!");
6790 _handle_indentation_pop_from_block_map();
6791 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6794 _c4dbgp(
"mapblck[QMRK]: still mapblck!");
6795 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
QMRK));
6796 rem = m_evt_handler->m_curr->line_contents.rem;
6802 _c4dbgp(
"mapblck[QMRK]: no longer mapblck!");
6803 goto mapblck_finish;
6809 _c4dbgp(
"mapblck[QMRK]: larger indentation !");
6810 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6811 rem = m_evt_handler->m_curr->line_contents.rem;
6819 const char first = rem.str[0];
6820 const size_t startline = m_evt_handler->m_curr->pos.line;
6821 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6822 _c4dbgpf(
"mapblck[QMRK]: '{}'", first);
6826 _c4dbgp(
"mapblck[QMRK]: scanning single-quoted scalar");
6827 sc = _scan_scalar_squot();
6828 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6829 if(!_maybe_scan_following_colon())
6831 _c4dbgp(
"mapblck[QMRK]: set as key");
6832 _handle_annotations_before_blck_key_scalar();
6833 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6838 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
6840 _handle_annotations_before_start_mapblck_as_key();
6841 m_evt_handler->begin_map_key_block();
6842 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6843 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6844 _maybe_skip_whitespace_tokens();
6845 _set_indentation(startindent);
6850 else if(first ==
'"')
6852 _c4dbgp(
"mapblck[QMRK]: scanning double-quoted scalar");
6853 sc = _scan_scalar_dquot();
6854 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6855 if(!_maybe_scan_following_colon())
6857 _c4dbgp(
"mapblck[QMRK]: set as key");
6858 _handle_annotations_before_blck_key_scalar();
6859 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6864 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
6866 _handle_annotations_before_start_mapblck_as_key();
6867 m_evt_handler->begin_map_key_block();
6868 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6869 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6870 _maybe_skip_whitespace_tokens();
6871 _set_indentation(startindent);
6876 else if(first ==
'|')
6878 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
6880 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6881 csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb);
6882 _handle_annotations_before_blck_key_scalar();
6883 m_evt_handler->set_key_scalar_literal(maybe_filtered);
6886 else if(first ==
'>')
6888 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
6890 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6891 csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb);
6892 _handle_annotations_before_blck_key_scalar();
6893 m_evt_handler->set_key_scalar_folded(maybe_filtered);
6896 else if(_scan_scalar_plain_map_blck(&sc))
6898 _c4dbgp(
"mapblck[QMRK]: plain scalar");
6899 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6900 if(!_maybe_scan_following_colon())
6902 _c4dbgp(
"mapblck[QMRK]: set as key");
6903 _handle_annotations_before_blck_key_scalar();
6904 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6909 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
6911 _handle_annotations_before_start_mapblck_as_key();
6912 m_evt_handler->begin_map_key_block();
6913 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6914 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6915 _maybe_skip_whitespace_tokens();
6916 _set_indentation(startindent);
6921 else if(first ==
':')
6923 if(startindent == m_evt_handler->m_curr->indref)
6925 _c4dbgp(
"mapblck[QMRK]: empty key");
6927 _handle_annotations_before_blck_key_scalar();
6928 m_evt_handler->set_key_scalar_plain({});
6929 _line_progressed(1);
6930 _maybe_skip_whitespace_tokens();
6934 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), empty key");
6936 _handle_annotations_before_start_mapblck_as_key();
6937 m_evt_handler->begin_map_key_block();
6938 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6939 m_evt_handler->set_key_scalar_plain({});
6940 _line_progressed(1);
6941 _maybe_skip_whitespace_tokens();
6942 _set_indentation(startindent);
6947 else if(first ==
'*')
6949 csubstr ref = _scan_ref_map();
6950 _c4dbgpf(
"mapblck[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
6951 if(!_maybe_scan_following_colon())
6953 _c4dbgp(
"mapblck[QMRK]: set ref as key");
6954 _handle_annotations_before_blck_key_scalar();
6955 m_evt_handler->set_key_ref(ref);
6960 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set ref as key");
6962 _handle_annotations_before_blck_key_scalar();
6963 m_evt_handler->begin_map_key_block();
6964 m_evt_handler->set_key_ref(ref);
6965 _set_indentation(startindent);
6969 _maybe_skip_whitespace_tokens();
6971 else if(first ==
'&')
6973 csubstr anchor = _scan_anchor();
6974 _c4dbgpf(
"mapblck[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
6975 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6977 else if(first ==
'!')
6979 csubstr tag = _scan_tag();
6980 _c4dbgpf(
"mapblck[QMRK]: key tag! [{}]~~~{}~~~", tag.len, tag);
6981 _add_annotation(&m_pending_tags, tag, startindent, startline);
6983 else if(first ==
'-')
6985 _c4dbgp(
"mapblck[QMRK]: maybe doc?");
6986 csubstr rs = rem.sub(1);
6987 if(rs ==
"--" || rs.begins_with(
"-- "))
6989 _c4dbgp(
"mapblck[QMRK]: end+start doc");
6990 _start_doc_suddenly();
6991 _line_progressed(3);
6995 _c4dbgp(
"mapblck[QMRK]: start child seqblck (!)");
6997 m_evt_handler->begin_seq_key_block();
6999 _set_indentation(startindent);
7000 _line_progressed(1);
7002 _maybe_skip_whitespace_tokens();
7003 goto mapblck_finish;
7005 else if(first ==
'[')
7007 _c4dbgp(
"mapblck[QMRK]: start child seqflow (!)");
7009 m_evt_handler->begin_seq_key_flow();
7011 _set_indentation(m_evt_handler->m_parent->indref);
7012 _line_progressed(1);
7013 goto mapblck_finish;
7015 else if(first ==
'{')
7017 _c4dbgp(
"mapblck[QMRK]: start child mapblck (!)");
7019 m_evt_handler->begin_map_key_flow();
7021 _set_indentation(m_evt_handler->m_parent->indref);
7022 _line_progressed(1);
7023 goto mapblck_finish;
7025 else if(first ==
'?')
7027 _c4dbgp(
"mapblck[QMRK]: another QMRK '?'");
7028 m_evt_handler->set_key_scalar_plain({});
7029 m_evt_handler->set_val_scalar_plain({});
7030 m_evt_handler->add_sibling();
7031 _line_progressed(1);
7033 else if(first ==
'.')
7035 _c4dbgp(
"mapblck[QMRK]: maybe end doc?");
7036 csubstr rs = rem.sub(1);
7037 if(rs ==
".." || rs.begins_with(
".. "))
7039 _c4dbgp(
"mapblck[QMRK]: end+start doc");
7040 _end_doc_suddenly();
7041 _line_progressed(3);
7042 goto mapblck_finish;
7046 _c4err(
"parse error");
7051 _c4err(
"parse error");
7056 _c4dbgt(
"mapblck: again", 0);
7057 if(_finished_line())
7061 if(_finished_file())
7063 _c4dbgp(
"mapblck: file finished!");
7065 goto mapblck_finish;
7072 _c4dbgp(
"mapblck: finish");
7078 template<
class EventHandler>
7079 void ParseEngine<EventHandler>::_handle_unk_json()
7081 _c4dbgpf(
"handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7083 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP));
7084 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP));
7086 _maybe_skip_comment();
7087 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7091 size_t pos = rem.first_not_of(
" \t");
7094 pos = pos !=
npos ? pos : rem.len;
7095 _c4dbgpf(
"skipping indentation of {}", pos);
7096 _line_progressed(pos);
7097 rem = m_evt_handler->m_curr->line_contents.rem;
7100 _c4dbgpf(
"rem is now [{}]~~~{}~~~", rem.len, rem);
7103 if(rem.begins_with(
'['))
7105 _c4dbgp(
"it's a seq");
7106 m_evt_handler->check_trailing_doc_token();
7108 m_evt_handler->begin_seq_val_flow();
7110 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7111 m_doc_empty =
false;
7112 _line_progressed(1);
7114 else if(rem.begins_with(
'{'))
7116 _c4dbgp(
"it's a map");
7117 m_evt_handler->check_trailing_doc_token();
7119 m_evt_handler->begin_map_val_flow();
7121 m_doc_empty =
false;
7122 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7123 _line_progressed(1);
7127 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
7128 _maybe_skip_whitespace_tokens();
7129 csubstr s = m_evt_handler->m_curr->line_contents.rem;
7132 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7133 const char first = s.str[0];
7137 _c4dbgp(
"runk_json: scanning double-quoted scalar");
7138 m_evt_handler->check_trailing_doc_token();
7141 m_doc_empty =
false;
7142 sc = _scan_scalar_dquot();
7143 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7144 if(!_maybe_scan_following_colon())
7146 _c4dbgp(
"runk_json: set as val");
7147 _handle_annotations_before_blck_val_scalar();
7148 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7152 _c4err(
"parse error");
7155 else if(_scan_scalar_plain_unk(&sc))
7157 _c4dbgp(
"runk_json: got a plain scalar");
7158 m_evt_handler->check_trailing_doc_token();
7161 m_doc_empty =
false;
7162 if(!_maybe_scan_following_colon())
7164 _c4dbgp(
"runk_json: set as val");
7165 _handle_annotations_before_blck_val_scalar();
7166 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7167 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7171 _c4err(
"parse error");
7176 _c4err(
"parse error");
7184 template<
class EventHandler>
7185 void ParseEngine<EventHandler>::_handle_unk()
7187 _c4dbgpf(
"handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7189 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP));
7190 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP));
7192 _maybe_skip_comment();
7193 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7197 size_t pos = rem.first_not_of(
" \t");
7200 pos = pos !=
npos ? pos : rem.len;
7201 _c4dbgpf(
"skipping {} whitespace characters", pos);
7202 _line_progressed(pos);
7203 rem = m_evt_handler->m_curr->line_contents.rem;
7206 _c4dbgpf(
"rem is now [{}]~~~{}~~~", rem.len, rem);
7209 if(m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin())
7211 const char first = rem.str[0];
7212 _c4dbgp(
"rtop: zero indent + at line begin");
7215 _c4dbgp(
"rtop: suspecting doc");
7216 if(_is_doc_begin_token(rem))
7218 _c4dbgp(
"rtop: begin doc");
7221 _set_indentation(0);
7223 _line_progressed(3u);
7224 _maybe_skip_whitespace_tokens();
7228 else if(first ==
'.')
7230 _c4dbgp(
"rtop: suspecting doc end");
7231 if(_is_doc_end_token(rem))
7233 _c4dbgp(
"rtop: end doc");
7240 _c4dbgp(
"rtop: ignore end doc");
7243 _line_progressed(3u);
7244 _maybe_skip_whitespace_tokens();
7248 else if(first ==
'%')
7250 _c4dbgpf(
"directive: {}", rem);
7251 if(C4_UNLIKELY(!m_doc_empty && has_none(
NDOC)))
7252 _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks,
"need document footer before directives");
7253 _handle_directive(rem);
7259 char first = rem.str[0];
7263 m_evt_handler->check_trailing_doc_token();
7265 m_doc_empty =
false;
7266 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7267 if(C4_LIKELY( ! _annotations_require_key_container()))
7269 _c4dbgp(
"it's a seq, flow");
7270 _handle_annotations_before_blck_val_scalar();
7271 m_evt_handler->begin_seq_val_flow();
7273 _set_indentation(startindent);
7277 _c4dbgp(
"start new block map, set flow seq as key (!)");
7278 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7279 m_evt_handler->begin_map_val_block();
7281 _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
7282 m_evt_handler->begin_seq_key_flow();
7284 _set_indentation(startindent);
7286 _line_progressed(1);
7288 else if(first ==
'{')
7290 m_evt_handler->check_trailing_doc_token();
7292 m_doc_empty =
false;
7293 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7294 if(C4_LIKELY( ! _annotations_require_key_container()))
7296 _c4dbgp(
"it's a map, flow");
7297 _handle_annotations_before_blck_val_scalar();
7298 m_evt_handler->begin_map_val_flow();
7300 _set_indentation(startindent);
7304 _c4dbgp(
"start new block map, set flow map as key (!)");
7305 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7306 m_evt_handler->begin_map_val_block();
7308 _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
7309 m_evt_handler->begin_map_key_flow();
7311 _set_indentation(startindent);
7313 _line_progressed(1);
7315 else if(first ==
'-' && _is_blck_token(rem))
7317 _c4dbgp(
"it's a seq, block");
7318 m_evt_handler->check_trailing_doc_token();
7320 _handle_annotations_before_blck_val_scalar();
7321 m_evt_handler->begin_seq_val_block();
7323 m_doc_empty =
false;
7324 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7325 _line_progressed(1);
7326 _maybe_skip_whitespace_tokens();
7328 else if(first ==
'?' && _is_blck_token(rem))
7330 _c4dbgp(
"it's a map + this key is complex");
7331 m_evt_handler->check_trailing_doc_token();
7333 _handle_annotations_before_blck_val_scalar();
7334 m_evt_handler->begin_map_val_block();
7336 m_doc_empty =
false;
7337 m_was_inside_qmrk =
true;
7338 _save_indentation();
7339 _line_progressed(1);
7340 _maybe_skip_whitespace_tokens();
7342 else if(first ==
':' && _is_blck_token(rem))
7346 _c4dbgp(
"it's a map with an empty key");
7347 m_evt_handler->check_trailing_doc_token();
7349 _handle_annotations_before_blck_val_scalar();
7350 m_evt_handler->begin_map_val_block();
7351 m_evt_handler->set_key_scalar_plain({});
7352 m_doc_empty =
false;
7353 _save_indentation();
7357 _c4dbgp(
"actually prev val is a key!");
7358 size_t prev_indentation = m_evt_handler->m_curr->indref;
7359 m_evt_handler->actually_val_is_first_key_of_new_map_block();
7360 _set_indentation(prev_indentation);
7363 _line_progressed(1);
7364 _maybe_skip_whitespace_tokens();
7366 else if(first ==
'&')
7368 csubstr anchor = _scan_anchor();
7369 _c4dbgpf(
"anchor! [{}]~~~{}~~~", anchor.len, anchor);
7370 m_evt_handler->check_trailing_doc_token();
7372 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7373 const size_t line = m_evt_handler->m_curr->pos.line;
7374 _add_annotation(&m_pending_anchors, anchor, indentation, line);
7375 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7376 m_doc_empty =
false;
7378 else if(first ==
'*')
7380 csubstr ref = _scan_ref_map();
7381 _c4dbgpf(
"ref! [{}]~~~{}~~~", ref.len, ref);
7382 m_evt_handler->check_trailing_doc_token();
7384 m_doc_empty =
false;
7385 if(!_maybe_scan_following_colon())
7387 _c4dbgp(
"runk: set val ref");
7388 _handle_annotations_before_blck_val_scalar();
7389 m_evt_handler->set_val_ref(ref);
7393 _c4dbgp(
"runk: start new block map, set ref as key");
7394 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7395 const size_t startline = m_evt_handler->m_curr->pos.line;
7396 _handle_annotations_before_start_mapblck(startline);
7397 m_evt_handler->begin_map_val_block();
7398 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7399 m_evt_handler->set_key_ref(ref);
7400 _maybe_skip_whitespace_tokens();
7401 _set_indentation(startindent);
7405 else if(first ==
'!')
7407 csubstr tag = _scan_tag();
7408 _c4dbgpf(
"unk: val tag! [{}]~~~{}~~~", tag.len, tag);
7411 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7412 const size_t line = m_evt_handler->m_curr->pos.line;
7413 _add_annotation(&m_pending_tags, tag, indentation, line);
7417 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
7418 _maybe_skip_whitespace_tokens();
7419 csubstr s = m_evt_handler->m_curr->line_contents.rem;
7422 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7423 const size_t startline = m_evt_handler->m_curr->pos.line;
7428 _c4dbgp(
"runk: scanning single-quoted scalar");
7429 m_evt_handler->check_trailing_doc_token();
7432 m_doc_empty =
false;
7433 sc = _scan_scalar_squot();
7434 if(!_maybe_scan_following_colon())
7436 _c4dbgp(
"runk: set as val");
7437 _handle_annotations_before_blck_val_scalar();
7438 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
7439 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
7443 _c4dbgp(
"runk: start new block map, set scalar as key");
7444 _handle_annotations_before_start_mapblck(startline);
7445 m_evt_handler->begin_map_val_block();
7446 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7447 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7448 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7449 _maybe_skip_whitespace_tokens();
7450 _set_indentation(startindent);
7454 else if(first ==
'"')
7456 _c4dbgp(
"runk: scanning double-quoted scalar");
7457 m_evt_handler->check_trailing_doc_token();
7460 m_doc_empty =
false;
7461 sc = _scan_scalar_dquot();
7462 if(!_maybe_scan_following_colon())
7464 _c4dbgp(
"runk: set as val");
7465 _handle_annotations_before_blck_val_scalar();
7466 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7467 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7471 _c4dbgp(
"runk: start new block map, set double-quoted scalar as key");
7472 _handle_annotations_before_start_mapblck(startline);
7473 m_evt_handler->begin_map_val_block();
7474 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7475 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7476 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7477 _maybe_skip_whitespace_tokens();
7478 _set_indentation(startindent);
7482 else if(first ==
'|')
7484 _c4dbgp(
"runk: scanning block-literal scalar");
7485 m_evt_handler->check_trailing_doc_token();
7488 m_doc_empty =
false;
7490 _scan_block(&sb, startindent);
7491 if(C4_LIKELY(!_maybe_scan_following_colon()))
7493 _c4dbgp(
"runk: set as val");
7494 _handle_annotations_before_blck_val_scalar();
7495 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7496 m_evt_handler->set_val_scalar_literal(maybe_filtered);
7500 _c4err(
"block literal keys must be enclosed in '?'");
7503 else if(first ==
'>')
7505 _c4dbgp(
"runk: scanning block-folded scalar");
7506 m_evt_handler->check_trailing_doc_token();
7509 m_doc_empty =
false;
7511 _scan_block(&sb, startindent);
7512 if(C4_LIKELY(!_maybe_scan_following_colon()))
7514 _c4dbgp(
"runk: set as val");
7515 _handle_annotations_before_blck_val_scalar();
7516 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7517 m_evt_handler->set_val_scalar_folded(maybe_filtered);
7521 _c4err(
"block folded keys must be enclosed in '?'");
7524 else if(_scan_scalar_plain_unk(&sc))
7526 _c4dbgp(
"runk: got a plain scalar");
7527 m_evt_handler->check_trailing_doc_token();
7530 m_doc_empty =
false;
7531 if(!_maybe_scan_following_colon())
7533 _c4dbgp(
"runk: set as val");
7534 _handle_annotations_before_blck_val_scalar();
7535 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7536 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7540 _c4dbgp(
"runk: start new block map, set scalar as key");
7541 _handle_annotations_before_start_mapblck(startline);
7542 m_evt_handler->begin_map_val_block();
7543 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7544 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7545 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7546 _maybe_skip_whitespace_tokens();
7547 _set_indentation(startindent);
7557 template<
class EventHandler>
7558 C4_COLD
void ParseEngine<EventHandler>::_handle_usty()
7560 _c4dbgpf(
"handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7562 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK|
FLOW));
7564 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
7567 _c4dbgp(
"usty[RNXT]: finishing!");
7572 _maybe_skip_comment();
7573 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7577 size_t pos = rem.first_not_of(
" \t");
7580 pos = pos !=
npos ? pos : rem.len;
7581 _c4dbgpf(
"skipping indentation of {}", pos);
7582 _line_progressed(pos);
7583 rem = m_evt_handler->m_curr->line_contents.rem;
7586 _c4dbgpf(
"rem is now [{}]~~~{}~~~", rem.len, rem);
7589 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
7590 size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7591 char first = rem.str[0];
7594 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
RMAP));
7595 _c4dbgpf(
"usty[RSEQ]: first='{}'", _c4prc(first));
7598 _c4dbgp(
"usty[RSEQ]: it's a flow seq. merging it");
7600 m_evt_handler->_push();
7602 _set_indentation(startindent);
7603 _line_progressed(1);
7604 _maybe_skip_whitespace_tokens();
7606 else if(first ==
'-' && _is_blck_token(rem))
7608 _c4dbgp(
"usty[RSEQ]: it's a block seq. merging it");
7610 m_evt_handler->_push();
7612 _set_indentation(startindent);
7613 _line_progressed(1);
7614 _maybe_skip_whitespace_tokens();
7618 _c4err(
"can only parse a seq into an existing seq");
7621 else if(has_any(
RMAP))
7623 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ));
7624 _c4dbgpf(
"usty[RMAP]: first='{}'", _c4prc(first));
7627 _c4dbgp(
"usty[RMAP]: it's a flow map. merging it");
7629 _handle_annotations_before_blck_val_scalar();
7630 m_evt_handler->_push();
7632 _set_indentation(startindent);
7633 _line_progressed(1);
7634 _maybe_skip_whitespace_tokens();
7636 else if(first ==
'?' && _is_blck_token(rem))
7638 _c4dbgp(
"usty[RMAP]: it's a block map + this key is complex");
7640 _handle_annotations_before_blck_val_scalar();
7641 m_evt_handler->_push();
7643 m_was_inside_qmrk =
true;
7644 _save_indentation();
7645 _line_progressed(1);
7646 _maybe_skip_whitespace_tokens();
7648 else if(first ==
':' && _is_blck_token(rem))
7650 _c4dbgp(
"usty[RMAP]: it's a map with an empty key");
7652 _handle_annotations_before_blck_val_scalar();
7653 m_evt_handler->_push();
7654 m_evt_handler->set_key_scalar_plain({});
7656 _save_indentation();
7657 _line_progressed(1);
7658 _maybe_skip_whitespace_tokens();
7660 else if(rem.begins_with(
'&'))
7662 csubstr anchor = _scan_anchor();
7663 _c4dbgpf(
"usty[RMAP]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
7664 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7665 const size_t line = m_evt_handler->m_curr->pos.line;
7666 _add_annotation(&m_pending_anchors, anchor, indentation, line);
7667 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7669 else if(first ==
'*')
7671 csubstr ref = _scan_ref_map();
7672 _c4dbgpf(
"usty[RMAP]: ref! [{}]~~~{}~~~", ref.len, ref);
7673 if(!_maybe_scan_following_colon())
7675 _c4err(
"cannot read a VAL to a map");
7679 _c4dbgp(
"usty[RMAP]: start new block map, set ref as key");
7680 const size_t startline = m_evt_handler->m_curr->pos.line;
7682 _handle_annotations_before_start_mapblck(startline);
7683 m_evt_handler->_push();
7684 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7685 m_evt_handler->set_key_ref(ref);
7686 _maybe_skip_whitespace_tokens();
7687 _set_indentation(startindent);
7691 else if(first ==
'!')
7693 csubstr tag = _scan_tag();
7694 _c4dbgpf(
"usty[RMAP]: val tag! [{}]~~~{}~~~", tag.len, tag);
7697 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7698 const size_t line = m_evt_handler->m_curr->pos.line;
7699 _add_annotation(&m_pending_tags, tag, indentation, line);
7701 else if(first ==
'[' || (first ==
'-' && _is_blck_token(rem)))
7703 _c4err(
"cannot parse a seq into an existing map");
7707 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
7708 startindent = m_evt_handler->m_curr->line_contents.indentation;
7709 const size_t startline = m_evt_handler->m_curr->pos.line;
7711 _c4dbgpf(
"usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
7714 _c4dbgp(
"usty[RMAP]: scanning single-quoted scalar");
7715 sc = _scan_scalar_squot();
7716 if(!_maybe_scan_following_colon())
7718 _c4err(
"cannot read a VAL to a map");
7722 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
7724 _handle_annotations_before_start_mapblck(startline);
7725 m_evt_handler->_push();
7726 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7727 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7728 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7729 _set_indentation(startindent);
7731 _maybe_skip_whitespace_tokens();
7734 else if(first ==
'"')
7736 _c4dbgp(
"usty[RMAP]: scanning double-quoted scalar");
7737 sc = _scan_scalar_dquot();
7738 if(!_maybe_scan_following_colon())
7740 _c4err(
"cannot read a VAL to a map");
7744 _c4dbgp(
"usty[RMAP]: start new block map, set double-quoted scalar as key");
7746 _handle_annotations_before_start_mapblck(startline);
7747 m_evt_handler->_push();
7748 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7749 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7750 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7751 _set_indentation(startindent);
7753 _maybe_skip_whitespace_tokens();
7756 else if(first ==
'|')
7758 _c4err(
"block literal keys must be enclosed in '?'");
7760 else if(first ==
'>')
7762 _c4err(
"block literal keys must be enclosed in '?'");
7764 else if(_scan_scalar_plain_unk(&sc))
7766 _c4dbgp(
"usty[RMAP]: got a plain scalar");
7767 if(!_maybe_scan_following_colon())
7769 _c4err(
"cannot read a VAL to a map");
7773 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
7775 _handle_annotations_before_start_mapblck(startline);
7776 m_evt_handler->_push();
7777 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7778 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7779 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7780 _set_indentation(startindent);
7782 _maybe_skip_whitespace_tokens();
7787 _c4err(
"parse error");
7793 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ));
7794 _c4dbgpf(
"usty[UNK]: first='{}'", _c4prc(first));
7797 _c4dbgp(
"usty[UNK]: it's a flow seq");
7799 _handle_annotations_before_blck_val_scalar();
7800 m_evt_handler->begin_seq_val_flow();
7802 _set_indentation(startindent);
7803 _line_progressed(1);
7804 _maybe_skip_whitespace_tokens();
7806 else if(first ==
'-' && _is_blck_token(rem))
7808 _c4dbgp(
"usty[UNK]: it's a block seq");
7810 _handle_annotations_before_blck_val_scalar();
7811 m_evt_handler->begin_seq_val_block();
7813 _set_indentation(startindent);
7814 _line_progressed(1);
7815 _maybe_skip_whitespace_tokens();
7817 else if(first ==
'{')
7819 _c4dbgp(
"usty[UNK]: it's a flow map");
7821 _handle_annotations_before_blck_val_scalar();
7822 m_evt_handler->begin_map_val_flow();
7824 _set_indentation(startindent);
7825 _line_progressed(1);
7826 _maybe_skip_whitespace_tokens();
7828 else if(first ==
'?' && _is_blck_token(rem))
7830 _c4dbgp(
"usty[UNK]: it's a map + this key is complex");
7832 _handle_annotations_before_blck_val_scalar();
7833 m_evt_handler->begin_map_val_block();
7835 m_was_inside_qmrk =
true;
7836 _save_indentation();
7837 _line_progressed(1);
7838 _maybe_skip_whitespace_tokens();
7840 else if(first ==
':' && _is_blck_token(rem))
7842 _c4dbgp(
"usty[UNK]: it's a map with an empty key");
7844 _handle_annotations_before_blck_val_scalar();
7845 m_evt_handler->begin_map_val_block();
7846 m_evt_handler->set_key_scalar_plain({});
7848 _save_indentation();
7849 _line_progressed(1);
7850 _maybe_skip_whitespace_tokens();
7852 else if(first ==
'&')
7854 csubstr anchor = _scan_anchor();
7855 _c4dbgpf(
"usty[UNK]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
7856 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7857 const size_t line = m_evt_handler->m_curr->pos.line;
7858 _add_annotation(&m_pending_anchors, anchor, indentation, line);
7859 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7861 else if(first ==
'*')
7863 csubstr ref = _scan_ref_map();
7864 _c4dbgpf(
"usty[UNK]: ref! [{}]~~~{}~~~", ref.len, ref);
7865 if(!_maybe_scan_following_colon())
7867 _c4dbgp(
"usty[UNK]: set val ref");
7868 _handle_annotations_before_blck_val_scalar();
7869 m_evt_handler->set_val_ref(ref);
7873 _c4dbgp(
"usty[UNK]: start new block map, set ref as key");
7874 const size_t startline = m_evt_handler->m_curr->pos.line;
7876 _handle_annotations_before_start_mapblck(startline);
7877 m_evt_handler->begin_map_val_block();
7878 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7879 m_evt_handler->set_key_ref(ref);
7880 _maybe_skip_whitespace_tokens();
7881 _set_indentation(startindent);
7885 else if(first ==
'!')
7887 csubstr tag = _scan_tag();
7888 _c4dbgpf(
"usty[UNK]: val tag! [{}]~~~{}~~~", tag.len, tag);
7891 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7892 const size_t line = m_evt_handler->m_curr->pos.line;
7893 _add_annotation(&m_pending_tags, tag, indentation, line);
7897 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
7898 startindent = m_evt_handler->m_curr->line_contents.indentation;
7899 const size_t startline = m_evt_handler->m_curr->pos.line;
7902 _c4dbgpf(
"usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
7905 _c4dbgp(
"usty[UNK]: scanning single-quoted scalar");
7906 sc = _scan_scalar_squot();
7907 if(!_maybe_scan_following_colon())
7909 _c4dbgp(
"usty[UNK]: set as val");
7910 _handle_annotations_before_blck_val_scalar();
7911 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
7912 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
7917 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
7919 _handle_annotations_before_start_mapblck(startline);
7920 m_evt_handler->begin_map_val_block();
7921 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7922 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7923 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7924 _set_indentation(startindent);
7926 _maybe_skip_whitespace_tokens();
7929 else if(first ==
'"')
7931 _c4dbgp(
"usty[UNK]: scanning double-quoted scalar");
7932 sc = _scan_scalar_dquot();
7933 if(!_maybe_scan_following_colon())
7935 _c4dbgp(
"usty[UNK]: set as val");
7936 _handle_annotations_before_blck_val_scalar();
7937 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7938 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7943 _c4dbgp(
"usty[UNK]: start new block map, set double-quoted scalar as key");
7945 _handle_annotations_before_start_mapblck(startline);
7946 m_evt_handler->begin_map_val_block();
7947 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7948 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7949 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7950 _set_indentation(startindent);
7952 _maybe_skip_whitespace_tokens();
7955 else if(first ==
'|')
7957 _c4dbgp(
"usty[UNK]: scanning block-literal scalar");
7959 _scan_block(&sb, startindent);
7960 _c4dbgp(
"usty[UNK]: set as val");
7961 _handle_annotations_before_blck_val_scalar();
7962 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7963 m_evt_handler->set_val_scalar_literal(maybe_filtered);
7966 else if(first ==
'>')
7968 _c4dbgp(
"usty[UNK]: scanning block-folded scalar");
7970 _scan_block(&sb, startindent);
7971 _c4dbgp(
"usty[UNK]: set as val");
7972 _handle_annotations_before_blck_val_scalar();
7973 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7974 m_evt_handler->set_val_scalar_folded(maybe_filtered);
7977 else if(_scan_scalar_plain_unk(&sc))
7979 _c4dbgp(
"usty[UNK]: got a plain scalar");
7980 if(!_maybe_scan_following_colon())
7982 _c4dbgp(
"usty[UNK]: set as val");
7983 _handle_annotations_before_blck_val_scalar();
7984 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7985 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7990 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
7992 _handle_annotations_before_start_mapblck(startline);
7993 m_evt_handler->begin_map_val_block();
7994 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7995 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7996 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7997 _set_indentation(startindent);
7999 _maybe_skip_whitespace_tokens();
8004 _c4err(
"parse error");
8013 template<
class EventHandler>
8016 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8020 m_evt_handler->start_parse(filename.str, &_s_relocate_arena,
this);
8021 m_evt_handler->begin_stream();
8022 while( ! _finished_file())
8025 while( ! _finished_line())
8028 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8033 else if(has_any(
RMAP))
8037 else if(has_any(
RUNK))
8043 _c4err(
"internal error");
8046 if(_finished_file())
8051 m_evt_handler->finish_parse();
8057 template<
class EventHandler>
8060 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8064 m_evt_handler->start_parse(filename.str, &_s_relocate_arena,
this);
8065 m_evt_handler->begin_stream();
8066 while( ! _finished_file())
8069 while( ! _finished_line())
8072 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8083 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
8092 else if(has_any(
BLCK))
8096 _handle_seq_block();
8100 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
8101 _handle_map_block();
8104 else if(has_any(
RUNK))
8108 else if(has_any(
USTY))
8114 _c4err(
"internal error");
8117 if(_finished_file())
8122 m_evt_handler->finish_parse();
8128 #undef _c4dbgnextline
8130 #if defined(_MSC_VER)
8131 # pragma warning(pop)
8132 #elif defined(__clang__)
8133 # pragma clang diagnostic pop
8134 #elif defined(__GNUC__)
8135 # pragma GCC diagnostic pop
Lightweight generic type-safe wrappers for converting individual values to/from strings.
Holds a pointer to an existing tree, and a node id.
Tree const * tree() const noexcept
id_type id() const noexcept
bool readable() const noexcept
because a ConstNodeRef cannot be used to write to the tree, readable() has the same meaning as !...
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
Location location(Tree const &tree, id_type node_id) const
Get the location of a node of the last tree to be parsed by this parser.
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&)
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
NodeType type(id_type node) const
id_type prev_sibling(id_type node) const
bool has_key(id_type node) const
id_type parent(id_type node) const
id_type next_sibling(id_type node) const
csubstr const & key(id_type node) const
bool has_val(id_type node) const
csubstr const & val(id_type node) const
bool is_container(id_type node) const
#define RYML_ERRMSG_SIZE
size for the error message buffer
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
@ NOTYPE
no node type or style is set
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
size_t to_chars(substr buf, uint8_t v) noexcept
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
@ npos
a null string position
size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept
@ RTOP
reading at top level
@ BLCK
reading in block mode
@ RSET
the (implicit) map being read is a !!set.
@ RNXT
read next val or keyval
@ FLOW
reading is inside explicit flow chars: [] or {}
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a scalar as key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ QSCL
stored scalar was quoted
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a scalar as val
int ParserFlag_t
data type for ParserState_e
#define _RYML_WITHOUT_TAB_TOKENS(...)
#define _ryml_relocate(s)
#define _c4dbgfsq(fmt,...)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _RYML_WITH_TAB_TOKENS(...)
#define _c4dbgfps(fmt,...)
Filters an input string into a different output string.
size_t offset
number of bytes from the beginning of the source buffer
Options to give to the parser to control its behavior.