1 #ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2 #define _C4_YML_PARSE_ENGINE_DEF_HPP_
5 #include "c4/error.hpp"
12 #include "c4/yml/detail/parser_dbg.hpp"
15 #include "c4/yml/detail/print.hpp"
19 #if defined(RYML_WITH_TAB_TOKENS)
20 #define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
21 #define _RYML_WITHOUT_TAB_TOKENS(...)
22 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
24 #define _RYML_WITH_TAB_TOKENS(...)
25 #define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
26 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
31 #define _c4dbgnextline() \
33 _c4dbgq("\n-----------"); \
34 _c4dbgt("handling line={}, offset={}B", \
35 m_evt_handler->m_curr->pos.line, \
36 m_evt_handler->m_curr->pos.offset); \
41 # pragma warning(push)
42 # pragma warning(disable: 4296)
43 # pragma warning(disable: 4702)
44 #elif defined(__clang__)
45 # pragma clang diagnostic push
46 # pragma clang diagnostic ignored "-Wtype-limits"
47 # pragma clang diagnostic ignored "-Wformat-nonliteral"
48 # pragma clang diagnostic ignored "-Wold-style-cast"
49 #elif defined(__GNUC__)
50 # pragma GCC diagnostic push
51 # pragma GCC diagnostic ignored "-Wtype-limits"
52 # pragma GCC diagnostic ignored "-Wformat-nonliteral"
53 # pragma GCC diagnostic ignored "-Wold-style-cast"
55 # pragma GCC diagnostic ignored "-Wduplicated-branches"
64 C4_HOT C4_ALWAYS_INLINE
bool _is_blck_token(csubstr s) noexcept
66 RYML_ASSERT(s.len > 0);
67 RYML_ASSERT(s.str[0] ==
'-' || s.str[0] ==
':' || s.str[0] ==
'?');
71 inline bool _is_doc_begin_token(csubstr s)
73 RYML_ASSERT(s.begins_with(
'-'));
74 RYML_ASSERT(!s.ends_with(
"\n"));
75 RYML_ASSERT(!s.ends_with(
"\r"));
76 return (s.len >= 3 && s.str[1] ==
'-' && s.str[2] ==
'-')
80 inline bool _is_doc_end_token(csubstr s)
82 RYML_ASSERT(s.begins_with(
'.'));
83 RYML_ASSERT(!s.ends_with(
"\n"));
84 RYML_ASSERT(!s.ends_with(
"\r"));
85 return (s.len >= 3 && s.str[1] ==
'.' && s.str[2] ==
'.')
89 inline bool _is_doc_token(csubstr s) noexcept
109 return (s.str[1] ==
'-' && s.str[2] ==
'-')
113 return (s.str[1] ==
'.' && s.str[2] ==
'.')
120 inline size_t _is_special_json_scalar(csubstr s)
126 if(s.len >= 5 && s.begins_with(
"false"))
130 if(s.len >= 4 && s.begins_with(
"true"))
134 if(s.len >= 4 && s.begins_with(
"null"))
144 C4_ALWAYS_INLINE
size_t _extend_from_combined_newline(
char nl,
char following)
146 return (nl ==
'\n' && following ==
'\r') || (nl ==
'\r' && following ==
'\n');
150 inline substr from_next_line(substr rem)
152 size_t nlpos = rem.first_of(
"\r\n");
155 const char nl = rem[nlpos];
156 rem = rem.right_of(nlpos);
159 if(_extend_from_combined_newline(nl, rem.front()))
167 inline size_t _count_following_newlines(csubstr r,
size_t *C4_RESTRICT i)
169 RYML_ASSERT(r[*i] ==
'\n');
170 size_t numnl_following = 0;
172 for( ; *i < r.len; ++(*i))
174 if(r.str[*i] ==
'\n')
177 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
182 return numnl_following;
187 inline size_t _count_following_newlines(csubstr r,
size_t *C4_RESTRICT i,
size_t indentation)
189 RYML_ASSERT(r[*i] ==
'\n');
190 size_t numnl_following = 0;
194 for( ; *i < r.len; ++(*i))
196 if(r.str[*i] ==
'\n')
199 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
207 for( ; *i < r.len; ++(*i))
209 if(r.str[*i] ==
'\n')
213 size_t stop = *i + indentation;
214 for( ; *i < r.len; ++(*i))
216 if(r.str[*i] !=
' ' && r.str[*i] !=
'\r')
218 RYML_ASSERT(*i < stop);
223 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
229 return numnl_following;
239 template<
class EventHandler>
246 template<
class EventHandler>
251 , m_evt_handler(evt_handler)
252 , m_pending_anchors()
254 , m_newline_offsets()
255 , m_newline_offsets_size(0)
256 , m_newline_offsets_capacity(0)
257 , m_newline_offsets_buf()
259 RYML_CHECK(evt_handler);
262 template<
class EventHandler>
264 : m_options(that.m_options)
265 , m_file(that.m_file)
267 , m_evt_handler(that.m_evt_handler)
268 , m_pending_anchors(that.m_pending_anchors)
269 , m_pending_tags(that.m_pending_tags)
270 , m_newline_offsets(that.m_newline_offsets)
271 , m_newline_offsets_size(that.m_newline_offsets_size)
272 , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
273 , m_newline_offsets_buf(that.m_newline_offsets_buf)
278 template<
class EventHandler>
280 : m_options(that.m_options)
281 , m_file(that.m_file)
283 , m_evt_handler(that.m_evt_handler)
284 , m_pending_anchors(that.m_pending_anchors)
285 , m_pending_tags(that.m_pending_tags)
286 , m_newline_offsets()
287 , m_newline_offsets_size()
288 , m_newline_offsets_capacity()
289 , m_newline_offsets_buf()
291 if(that.m_newline_offsets_capacity)
293 _resize_locations(that.m_newline_offsets_capacity);
294 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
295 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size *
sizeof(
size_t));
296 m_newline_offsets_size = that.m_newline_offsets_size;
300 template<
class EventHandler>
304 m_options = (that.m_options);
305 m_file = (that.m_file);
306 m_buf = (that.m_buf);
307 m_evt_handler = that.m_evt_handler;
308 m_pending_anchors = that.m_pending_anchors;
309 m_pending_tags = that.m_pending_tags;
310 m_newline_offsets = (that.m_newline_offsets);
311 m_newline_offsets_size = (that.m_newline_offsets_size);
312 m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
313 m_newline_offsets_buf = (that.m_newline_offsets_buf);
318 template<
class EventHandler>
322 m_options = (that.m_options);
323 m_file = (that.m_file);
324 m_buf = (that.m_buf);
325 m_evt_handler = that.m_evt_handler;
326 m_pending_anchors = that.m_pending_anchors;
327 m_pending_tags = that.m_pending_tags;
328 if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
329 _resize_locations(that.m_newline_offsets_capacity);
330 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
331 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
332 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size *
sizeof(
size_t));
333 m_newline_offsets_size = that.m_newline_offsets_size;
334 m_newline_offsets_buf = that.m_newline_offsets_buf;
338 template<
class EventHandler>
345 m_pending_anchors = {};
347 m_newline_offsets = {};
348 m_newline_offsets_size = {};
349 m_newline_offsets_capacity = {};
350 m_newline_offsets_buf = {};
353 template<
class EventHandler>
354 void ParseEngine<EventHandler>::_free()
356 if(m_newline_offsets)
358 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
359 m_newline_offsets =
nullptr;
360 m_newline_offsets_size = 0u;
361 m_newline_offsets_capacity = 0u;
362 m_newline_offsets_buf = 0u;
369 template<
class EventHandler>
370 void ParseEngine<EventHandler>::_reset()
372 m_pending_anchors = {};
374 if(m_options.locations())
376 _prepare_locations();
378 m_was_inside_qmrk =
false;
384 template<
class EventHandler>
385 void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena)
387 #define _ryml_relocate(s) \
388 if(s.is_sub(prev_arena)) \
390 s.str = next_arena.str + (s.str - prev_arena.str); \
394 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
396 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
398 #undef _ryml_relocate
401 template<
class EventHandler>
402 void ParseEngine<EventHandler>::_s_relocate_arena(
void* data, csubstr prev_arena, substr next_arena)
404 ((ParseEngine*)data)->_relocate_arena(prev_arena, next_arena);
410 template<
class EventHandler>
411 template<
class DumpFn>
412 void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn)
const
414 auto const *
const C4_RESTRICT st = m_evt_handler->m_curr;
415 auto const& lc = st->line_contents;
416 csubstr contents = lc.stripped;
420 size_t offs = 3u +
to_chars(substr{}, st->pos.line) +
to_chars(substr{}, st->pos.col);
423 detail::_dump(dumpfn,
"{}:", m_file);
424 offs += m_file.len + 1;
426 detail::_dump(dumpfn,
"{}:{}: ", st->pos.line, st->pos.col);
427 csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
428 csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr(
"..."));
429 detail::_dump(dumpfn,
"{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len);
431 size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin());
432 size_t lastcol = firstcol + lc.rem.len;
433 for(
size_t i = 0; i < offs + firstcol; ++i)
436 for(
size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i)
438 detail::_dump(dumpfn,
"{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
449 detail::_dump(dumpfn,
"top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
457 template<
class EventHandler>
458 template<
class ...Args>
459 void ParseEngine<EventHandler>::_err(csubstr fmt, Args
const& C4_RESTRICT ...args)
const
462 detail::_SubstrWriter writer(errmsg);
463 auto dumpfn = [&writer](csubstr s){ writer.append(s); };
464 detail::_dump(dumpfn, fmt, args...);
468 m_evt_handler->cancel_parse();
469 m_evt_handler->m_stack.m_callbacks.m_error(errmsg, len, m_evt_handler->m_curr->pos, m_evt_handler->m_stack.m_callbacks.m_user_data);
475 template<
class EventHandler>
476 template<
class ...Args>
477 void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args
const& C4_RESTRICT ...args)
const
481 auto dumpfn = [](csubstr s){
if(s.str) fwrite(s.str, 1, s.len, stdout); };
482 detail::_dump(dumpfn, fmt, args...);
491 template<
class EventHandler>
492 bool ParseEngine<EventHandler>::_finished_file()
const
494 bool ret = m_evt_handler->m_curr->pos.offset >= m_buf.len;
497 _c4dbgp(
"finished file!!!");
502 template<
class EventHandler>
503 C4_HOT C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_finished_line()
const
505 return m_evt_handler->m_curr->line_contents.rem.empty();
511 template<
class EventHandler>
512 void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
514 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
520 _c4dbgpf(
"skip {} whitespace characters", pos);
521 _line_progressed(pos);
525 template<
class EventHandler>
526 void ParseEngine<EventHandler>::_maybe_skipchars(
char c)
528 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
529 if(rem.len && rem.str[0] == c)
531 size_t pos = rem.first_not_of(c);
534 _c4dbgpf(
"skip {}x'{}'", pos, c);
535 _line_progressed(pos);
539 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
540 template<
class EventHandler>
541 void ParseEngine<EventHandler>::_maybe_skipchars_up_to(
char c,
size_t max_to_skip)
543 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
544 if(rem.len && rem.str[0] == c)
546 size_t pos = rem.first_not_of(c);
549 if(pos > max_to_skip)
551 _c4dbgpf(
"skip {}x'{}'", pos, c);
552 _line_progressed(pos);
557 template<
class EventHandler>
559 void ParseEngine<EventHandler>::_skipchars(
const char (&chars)[N])
561 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars));
562 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
564 pos = m_evt_handler->m_curr->line_contents.rem.len;
565 _c4dbgpf(
"skip {} characters", pos);
566 _line_progressed(pos);
569 template<
class EventHandler>
570 void ParseEngine<EventHandler>::_skip_comment()
572 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'));
573 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full));
574 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
575 csubstr full = m_evt_handler->m_curr->line_contents.full;
577 if(!full.begins_with(
'#'))
579 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str > full.str);
580 const char c = full[(size_t)(rem.str - full.str - 1)];
581 if(C4_UNLIKELY(c !=
' ' && c !=
'\t'))
582 _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks,
"comment not preceded by whitespace");
586 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str == full.str);
588 _c4dbgpf(
"comment was '{}'", rem);
589 _line_progressed(rem.len);
592 template<
class EventHandler>
593 void ParseEngine<EventHandler>::_maybe_skip_comment()
595 csubstr s = m_evt_handler->m_curr->line_contents.rem.triml(
' ');
596 if(s.begins_with(
'#'))
598 _line_progressed((
size_t)(s.str - m_evt_handler->m_curr->line_contents.rem.str));
603 template<
class EventHandler>
604 bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
606 if(m_evt_handler->m_curr->line_contents.rem.len)
608 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
610 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
612 pos = m_evt_handler->m_curr->line_contents.rem.len;
613 _c4dbgpf(
"skip {}x'{}'", pos,
' ');
614 _line_progressed(pos);
616 if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] ==
':'))
618 _c4dbgp(
"found ':' colon next");
626 template<
class EventHandler>
627 bool ParseEngine<EventHandler>::_maybe_scan_following_comma() noexcept
629 if(m_evt_handler->m_curr->line_contents.rem.len)
631 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
633 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
635 pos = m_evt_handler->m_curr->line_contents.rem.len;
636 _c4dbgpf(
"skip {}x'{}'", pos,
' ');
637 _line_progressed(pos);
639 if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] ==
','))
641 _c4dbgp(
"found ',' comma next");
652 template<
class EventHandler>
653 csubstr ParseEngine<EventHandler>::_scan_anchor()
655 csubstr s = m_evt_handler->m_curr->line_contents.rem;
656 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'&'));
657 csubstr anchor = s.range(1, s.first_of(
' '));
658 _line_progressed(1u + anchor.len);
659 _maybe_skipchars(
' ');
663 template<
class EventHandler>
664 csubstr ParseEngine<EventHandler>::_scan_ref_seq()
666 csubstr s = m_evt_handler->m_curr->line_contents.rem;
667 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'));
668 csubstr ref = s.first(s.first_of(
",] :"));
669 _line_progressed(ref.len);
673 template<
class EventHandler>
674 csubstr ParseEngine<EventHandler>::_scan_ref_map()
676 csubstr s = m_evt_handler->m_curr->line_contents.rem;
677 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'));
678 csubstr ref = s.first(s.first_of(
",} "));
679 _line_progressed(ref.len);
683 template<
class EventHandler>
684 csubstr ParseEngine<EventHandler>::_scan_tag()
686 csubstr rem = m_evt_handler->m_curr->line_contents.rem.triml(
' ');
687 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with(
'!'));
689 if(rem.begins_with(
"!!"))
691 _c4dbgp(
"begins with '!!'");
693 t = rem.left_of(rem.first_of(
" ,"));
695 t = rem.left_of(rem.first_of(
' '));
697 else if(rem.begins_with(
"!<"))
699 _c4dbgp(
"begins with '!<'");
700 t = rem.left_of(rem.first_of(
'>'),
true);
702 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
703 else if(rem.begins_with(
"!h!"))
705 _c4dbgp(
"begins with '!h!'");
706 t = rem.left_of(rem.first_of(
' '));
711 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with(
'!'));
712 _c4dbgp(
"begins with '!'");
714 t = rem.left_of(rem.first_of(
" ,"));
716 t = rem.left_of(rem.first_of(
' '));
718 _line_progressed(t.len);
719 _maybe_skip_whitespace_tokens();
726 template<
class EventHandler>
727 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
729 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.empty());
745 _c4dbgpf(
"not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
759 _c4err(
"invalid token \":{}\"", _c4prc(s.str[1]));
766 _c4dbgpf(
"not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
788 _c4dbgpf(
"not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
794 _c4err(
"invalid token \"?{}\"", _c4prc(s.str[1]));
813 template<
class EventHandler>
814 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
816 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP));
817 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK));
818 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ|
RSEQIMAP));
819 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
FLOW));
820 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL));
822 substr s = m_evt_handler->m_curr->line_contents.rem;
823 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
824 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
'\n'));
829 if(!_is_valid_start_scalar_plain_flow(s))
832 _c4dbgp(
"scanning seqflow scalar...");
834 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
835 bool needs_filter =
false;
838 _c4dbgpf(
"scanning scalar: curr line=[{}]~~~{}~~~", s.len, s);
839 for(
size_t i = 0; i < s.len; ++i)
841 const char c = s.str[i];
845 _c4dbgpf(
"found terminating character at {}: '{}'", i, c);
847 if(m_evt_handler->m_curr->pos.offset + i > start_offset)
853 _c4dbgp(
"at the beginning. no scalar here.");
858 _c4dbgpf(
"found terminating character at {}: '{}'", i, c);
863 _c4dbgp(
"found suspicious '#'");
866 _c4dbgpf(
"found terminating character at {}: '{}'", i, c);
872 _c4dbgp(
"found suspicious ':'");
875 const char next = s.str[i+1];
876 _c4dbgpf(
"next char is '{}'", _c4prc(next));
879 _c4dbgp(
"map starting!");
880 if(m_evt_handler->m_curr->pos.offset + i > start_offset)
882 _c4dbgp(
"scalar finished!");
888 _c4dbgp(
"at the beginning. no scalar here.");
894 _c4dbgp(
"it's a scalar indeed.");
898 else if(s.len == i+1)
900 _c4dbgp(
"':' at line end. map starting!");
908 _c4err(
"invalid character: '{}'", c);
913 _line_progressed(s.len);
914 if(!_finished_file())
916 _c4dbgp(
"next line!");
922 _c4dbgp(
"file finished!");
925 s = m_evt_handler->m_curr->line_contents.rem;
932 sc->needs_filter = needs_filter;
934 _c4prscalar(
"scanned plain scalar", sc->scalar,
true);
939 template<
class EventHandler>
940 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
942 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ) || has_any(
RSEQIMAP));
943 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK));
944 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP|
RSEQIMAP));
945 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
FLOW));
946 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK));
948 substr s = m_evt_handler->m_curr->line_contents.rem;
949 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
954 if(!_is_valid_start_scalar_plain_flow(s))
957 _c4dbgp(
"scanning scalar...");
959 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
960 bool needs_filter =
false;
963 for(
size_t i = 0; i < s.len; ++i)
965 const char c = s.str[i];
971 _c4dbgpf(
"found terminating character: '{}'", c);
974 if(s.len == i+1 || s.str[i+1] ==
' ' || s.str[i+1] ==
',' || s.str[i+1] ==
'}' _RYML_WITH_TAB_TOKENS(|| s.str[i+1] ==
'\t'))
977 _c4dbgpf(
"found terminating character: '{}'", c);
984 _c4err(
"invalid character: '{}'", c);
991 _c4err(
"invalid character: '{}'", c);
997 _c4dbgpf(
"found terminating character: '{}'", c);
1005 _c4dbgp(
"next line!");
1006 _line_progressed(s.len);
1007 if(!_finished_file())
1009 _c4dbgp(
"next line!");
1015 _c4dbgp(
"file finished!");
1018 s = m_evt_handler->m_curr->line_contents.rem;
1019 needs_filter =
true;
1025 sc->needs_filter = needs_filter;
1027 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1032 template<
class EventHandler>
1033 bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1035 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP));
1036 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK));
1037 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ));
1038 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
FLOW));
1040 substr s = m_evt_handler->m_curr->line_contents.rem;
1041 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
1046 _c4dbgp(
"scanning scalar...");
1053 _c4dbgp(
"not a scalar.");
1058 const size_t len = _is_special_json_scalar(s);
1061 sc->scalar = s.first(len);
1062 sc->needs_filter =
false;
1063 _c4dbgpf(
"special json scalar: '{}'", sc->scalar);
1064 _line_progressed(len);
1071 for( ; i < s.len; ++i)
1073 const char c = s.str[i];
1080 _c4dbgpf(
"found terminating character: '{}'", c);
1083 if(!i || s.str[i-1] ==
' ')
1085 _c4dbgpf(
"found terminating character: '{}'", c);
1096 if(C4_LIKELY(i > 0))
1098 _line_progressed(i);
1099 sc->scalar = s.first(i);
1100 sc->needs_filter =
false;
1101 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1108 template<
class EventHandler>
1109 bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1111 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ));
1112 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK));
1113 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP));
1114 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
FLOW));
1115 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL));
1117 substr s = m_evt_handler->m_curr->line_contents.rem;
1118 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
1123 _c4dbgp(
"scanning scalar...");
1126 const size_t len = _is_special_json_scalar(s);
1129 sc->scalar = s.first(len);
1130 sc->needs_filter =
false;
1131 _c4dbgpf(
"special json scalar: '{}'", sc->scalar);
1132 _line_progressed(len);
1139 for( ; i < s.len; ++i)
1141 const char c = s.str[i];
1148 _c4dbgpf(
"found terminating character: '{}'", c);
1151 if(!i || s.str[i-1] ==
' ')
1153 _c4dbgpf(
"found terminating character: '{}'", c);
1164 if(C4_LIKELY(i > 0))
1166 _line_progressed(i);
1167 sc->scalar = s.first(i);
1168 sc->needs_filter =
false;
1169 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1176 template<
class EventHandler>
1177 bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1179 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] ==
'-');
1180 return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_begin_token(s));
1183 template<
class EventHandler>
1184 bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1186 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] ==
'.');
1187 return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_end_token(s));
1190 template<
class EventHandler>
1191 bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc,
size_t indentation)
1193 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1194 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP));
1195 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
BLCK|
RUNK|
USTY));
1197 substr s = m_evt_handler->m_curr->line_contents.rem;
1198 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
1206 if(_is_blck_token(s))
1210 else if(_is_doc_begin(s))
1212 _c4dbgp(
"token is doc start");
1218 if(_is_blck_token(s))
1231 _c4dbgp(
"token is doc end");
1237 _c4dbgpf(
"plain scalar! indentation={}", indentation);
1239 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1240 const size_t start_line = m_evt_handler->m_curr->pos.line;
1242 bool needs_filter =
false;
1245 _c4dbgpf(
"plain scalar line: [{}]~~~{}~~~", s.len, s);
1246 for(
size_t i = 0; i < s.len; ++i)
1248 const char curr = s.str[i];
1253 _c4dbgpf(
"[{}]: got suspicious ':'", i);
1257 _c4dbgpf(
"followed by '{}'", i+1 == s.len ? csubstr(
"\\n") : _c4prc(s.str[i+1]));
1258 _line_progressed(i);
1260 if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1262 _c4dbgp(
"start line. scalar ends here");
1267 _c4err(
"parse error");
1273 while(j + 1 < s.len && s.str[j+1] ==
':')
1275 _c4dbgp(
"skip colon");
1278 i = j > i ? j-1 : i;
1279 _c4dbgp(
"nothing to see here");
1283 _c4dbgp(
"got suspicious '#'");
1284 if(!i || (s.str[i-1] ==
' ' || s.str[i-1] ==
'\t'))
1286 _c4dbgp(
"comment! scalar ends here");
1287 _line_progressed(i);
1292 _c4dbgp(
"nothing to see here");
1297 _line_progressed(s.len);
1298 csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1299 next_peeked = next_peeked.trimr(
"\n\r");
1300 const size_t next_indentation = next_peeked.first_not_of(
' ');
1301 _c4dbgpf(
"indentation curr={} next={}", indentation, next_indentation);
1302 if(next_indentation < indentation)
1304 _c4dbgp(
"smaller indentation! scalar ended");
1307 else if(next_indentation == 0 && next_peeked.len > 0)
1309 const char first = next_peeked.str[0];
1313 next_peeked = next_peeked.trimr(
"\n\r");
1314 _c4dbgpf(
"doc begin? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ?
"..." :
"");
1315 if(_is_doc_begin_token(next_peeked))
1317 _c4dbgp(
"doc begin! scalar ended");
1322 next_peeked = next_peeked.trimr(
"\n\r");
1323 _c4dbgpf(
"doc end? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ?
"..." :
"");
1324 if(_is_doc_end_token(next_peeked))
1326 _c4dbgp(
"doc end! scalar ended");
1333 _c4dbgp(
"next line!");
1334 if(!_finished_file())
1336 _c4dbgp(
"next line!");
1342 _c4dbgp(
"file finished!");
1345 s = m_evt_handler->m_curr->line_contents.rem;
1346 needs_filter =
true;
1351 sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(
" \n\r\t");
1352 sc->needs_filter = needs_filter;
1354 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1359 template<
class EventHandler>
1360 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc)
1362 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP));
1363 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1364 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP));
1365 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ));
1366 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
BLCK));
1367 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL));
1368 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1371 template<
class EventHandler>
1372 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc)
1374 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ));
1375 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1376 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP));
1377 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
BLCK));
1378 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK));
1379 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1382 template<
class EventHandler>
1383 bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc)
1385 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RUNK|
USTY));
1386 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1392 template<
class EventHandler>
1393 substr ParseEngine<EventHandler>::_peek_next_line(
size_t pos)
const
1397 pos = pos ==
npos ? m_evt_handler->m_curr->pos.offset : pos;
1398 if(pos >= m_buf.len)
1402 rem = from_next_line(m_buf.sub(pos));
1407 nlpos = rem.first_of(
"\r\n");
1409 nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1410 rem = rem.left_of(nlpos,
true);
1412 _c4dbgpf(
"peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr(
"\r\n"));
1416 _c4dbgpf(
"peek next line @ {}: (len=0)''", pos);
1422 template<
class EventHandler>
1423 void ParseEngine<EventHandler>::_scan_line()
1425 if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < m_buf.len))
1426 m_evt_handler->m_curr->line_contents.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
1428 m_evt_handler->m_curr->line_contents.reset(m_buf.last(0), m_buf.last(0));
1431 template<
class EventHandler>
1432 void ParseEngine<EventHandler>::_line_progressed(
size_t ahead)
1434 _c4dbgpf(
"line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->line_contents.full.len, ahead, m_evt_handler->m_curr->pos.col, m_evt_handler->m_curr->pos.col+ahead, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset+ahead);
1435 m_evt_handler->m_curr->pos.offset += ahead;
1436 m_evt_handler->m_curr->pos.col += ahead;
1437 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.stripped.len+1);
1438 m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1441 template<
class EventHandler>
1442 void ParseEngine<EventHandler>::_line_ended()
1444 _c4dbgpf(
"line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1445 m_evt_handler->m_curr->pos.line,
1446 m_evt_handler->m_curr->line_contents.full.len,
1447 m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len,
1448 m_evt_handler->m_curr->pos.col, 1);
1449 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.stripped.len + 1);
1450 m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
1451 ++m_evt_handler->m_curr->pos.line;
1452 m_evt_handler->m_curr->pos.col = 1;
1455 template<
class EventHandler>
1456 void ParseEngine<EventHandler>::_line_ended_undo()
1458 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u);
1459 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u);
1460 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len);
1461 const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
1462 _c4dbgpf(
"line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1463 m_evt_handler->m_curr->pos.offset -= delta;
1464 --m_evt_handler->m_curr->pos.line;
1465 m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.stripped.len + 1u;
1468 m_evt_handler->m_curr->line_contents.rem = m_buf.sub(m_evt_handler->m_curr->pos.offset, 0);
1473 template<
class EventHandler>
1474 void ParseEngine<EventHandler>::_set_indentation(
size_t indentation)
1476 m_evt_handler->m_curr->indref = indentation;
1477 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1480 template<
class EventHandler>
1481 void ParseEngine<EventHandler>::_save_indentation()
1483 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begin() >= m_evt_handler->m_curr->line_contents.full.begin());
1484 m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1485 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1491 template<
class EventHandler>
1492 void ParseEngine<EventHandler>::_end_map_blck()
1494 _c4dbgp(
"mapblck: end");
1497 _c4dbgp(
"mapblck: set missing val");
1498 _handle_annotations_before_blck_val_scalar();
1499 m_evt_handler->set_val_scalar_plain({});
1501 else if(has_any(
QMRK))
1503 _c4dbgp(
"mapblck: set missing keyval");
1504 _handle_annotations_before_blck_key_scalar();
1505 m_evt_handler->set_key_scalar_plain({});
1506 _handle_annotations_before_blck_val_scalar();
1507 m_evt_handler->set_val_scalar_plain({});
1509 m_evt_handler->end_map();
1512 template<
class EventHandler>
1513 void ParseEngine<EventHandler>::_end_seq_blck()
1517 _c4dbgp(
"seqblck: set missing val");
1518 _handle_annotations_before_blck_val_scalar();
1519 m_evt_handler->set_val_scalar_plain({});
1521 m_evt_handler->end_seq();
1524 template<
class EventHandler>
1525 void ParseEngine<EventHandler>::_end2_map()
1527 _c4dbgp(
"map: end");
1528 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP));
1535 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1536 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
USTY));
1537 m_evt_handler->_pop();
1541 template<
class EventHandler>
1542 void ParseEngine<EventHandler>::_end2_seq()
1544 _c4dbgp(
"seq: end");
1545 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ));
1552 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1553 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
USTY));
1554 m_evt_handler->_pop();
1558 template<
class EventHandler>
1559 void ParseEngine<EventHandler>::_begin2_doc()
1563 m_evt_handler->begin_doc();
1564 m_evt_handler->m_curr->indref = 0;
1567 template<
class EventHandler>
1568 void ParseEngine<EventHandler>::_begin2_doc_expl()
1572 m_evt_handler->begin_doc_expl();
1573 m_evt_handler->m_curr->indref = 0;
1576 template<
class EventHandler>
1577 void ParseEngine<EventHandler>::_end2_doc()
1579 _c4dbgp(
"doc: end");
1580 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC));
1583 _c4dbgp(
"doc was empty; add empty val");
1584 m_evt_handler->set_val_scalar_plain({});
1586 m_evt_handler->end_doc();
1589 template<
class EventHandler>
1590 void ParseEngine<EventHandler>::_end2_doc_expl()
1592 _c4dbgp(
"doc: end");
1595 _c4dbgp(
"doc: no children; add empty val");
1596 m_evt_handler->set_val_scalar_plain({});
1598 m_evt_handler->end_doc_expl();
1601 template<
class EventHandler>
1602 void ParseEngine<EventHandler>::_maybe_begin_doc()
1606 _c4dbgp(
"doc must be started");
1610 template<
class EventHandler>
1611 void ParseEngine<EventHandler>::_maybe_end_doc()
1615 _c4dbgp(
"doc must be finished");
1620 template<
class EventHandler>
1621 void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1623 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
1624 if(m_evt_handler->m_stack[0].flags &
RDOC)
1626 _c4dbgp(
"root is RDOC");
1627 if(m_evt_handler->m_curr->level != 0)
1628 _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1630 else if((m_evt_handler->m_stack.size() > 1) && (m_evt_handler->m_stack[1].flags &
RDOC))
1632 _c4dbgp(
"root is STREAM");
1633 if(m_evt_handler->m_curr->level != 1)
1634 _handle_indentation_pop(&m_evt_handler->m_stack[1]);
1638 _c4err(
"internal error");
1640 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC));
1643 template<
class EventHandler>
1644 void ParseEngine<EventHandler>::_end_doc_suddenly()
1646 _c4dbgp(
"end doc suddenly");
1647 _end_doc_suddenly__pop();
1652 template<
class EventHandler>
1653 void ParseEngine<EventHandler>::_start_doc_suddenly()
1655 _c4dbgp(
"start doc suddenly");
1656 _end_doc_suddenly__pop();
1661 template<
class EventHandler>
1662 void ParseEngine<EventHandler>::_end_stream()
1664 _c4dbgpf(
"end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1666 _c4err(
"missing terminating ]");
1668 _c4err(
"missing terminating }");
1669 if(m_evt_handler->m_stack.size() > 1)
1670 _handle_indentation_pop(m_evt_handler->m_stack.begin());
1677 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1681 m_evt_handler->begin_doc();
1682 _handle_annotations_before_blck_val_scalar();
1683 m_evt_handler->set_val_scalar_plain({});
1684 m_evt_handler->end_doc();
1688 m_evt_handler->end_stream();
1692 template<
class EventHandler>
1693 void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState
const* popto)
1695 _c4dbgpf(
"popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ?
"s" :
""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
1696 while(m_evt_handler->m_curr != popto)
1700 _c4dbgpf(
"popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1703 else if(has_any(
RMAP))
1705 _c4dbgpf(
"popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1713 _c4dbgpf(
"current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1716 template<
class EventHandler>
1717 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
1720 using state_type =
typename EventHandler::state;
1721 state_type
const* popto =
nullptr;
1722 auto &stack = m_evt_handler->m_stack;
1723 _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous());
1724 _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1725 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1730 for(state_type
const& s : stack)
1731 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1734 for(state_type
const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
1736 _c4dbgpf(
"searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
1737 if(s->indref == ind)
1739 _c4dbgpf(
"gotit!!! level={} node={}", s->level, s->node_id);
1744 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1746 _c4err(
"parse error: incorrect indentation?");
1748 _handle_indentation_pop(popto);
1751 template<
class EventHandler>
1752 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
1755 using state_type =
typename EventHandler::state;
1756 auto &stack = m_evt_handler->m_stack;
1757 _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous());
1758 _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1759 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1760 state_type
const* popto =
nullptr;
1765 for(state_type
const& s : stack)
1766 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1769 for(state_type
const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s)
1771 _c4dbgpf(
"searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
1776 else if(s->indref == ind)
1778 _c4dbgpf(
"same indentation!!! level={} node={}", s->level, s->node_id);
1779 if(popto && has_any(
RTOP, s) && has_none(
RMAP|
RSEQ, s))
1786 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1787 const size_t first = rem.first_not_of(
' ');
1788 _RYML_CB_ASSERT(stack.m_callbacks, first == ind || first ==
npos);
1789 rem = rem.right_of(first,
true);
1790 _c4dbgpf(
"indentless? rem='{}' first={}", rem, first);
1791 if(rem.begins_with(
'-') && _is_blck_token(rem))
1793 _c4dbgp(
"parent was indentless seq");
1799 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1801 _c4err(
"parse error: incorrect indentation?");
1803 _handle_indentation_pop(popto);
1808 template<
class EventHandler>
1809 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
1815 size_t b = m_evt_handler->m_curr->pos.offset;
1816 substr s = m_buf.sub(b);
1817 if(s.begins_with(
' '))
1820 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1821 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1822 _line_progressed((
size_t)(s.begin() - m_buf.sub(b).begin()));
1824 b = m_evt_handler->m_curr->pos.offset;
1825 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'\''));
1828 _line_progressed(1);
1831 bool needs_filter =
false;
1833 size_t numlines = 1;
1835 while( ! _finished_file())
1837 const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1838 bool line_is_blank =
true;
1839 _c4dbgpf(
"scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_evt_handler->m_curr->pos.line, line);
1840 for(
size_t i = 0; i < line.len; ++i)
1842 const char curr = line.str[i];
1845 const char next = i+1 < line.len ? line.str[i+1] :
'~';
1853 needs_filter =
true;
1857 else if(curr !=
' ')
1859 line_is_blank =
false;
1864 needs_filter = needs_filter
1867 || (_at_line_begin() && line.begins_with(
' '));
1871 _line_progressed(line.len);
1876 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
1877 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] ==
'\'');
1878 _line_progressed(pos + 1);
1879 pos = m_evt_handler->m_curr->pos.offset - b - 1;
1889 _c4err(
"reached end of file while looking for closing quote");
1893 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
1894 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
1895 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() ==
'\'');
1896 s = s.sub(0, pos-1);
1899 _c4prscalar(
"scanned squoted scalar", s,
true);
1901 return ScannedScalar { s, needs_filter };
1906 template<
class EventHandler>
1907 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
1913 size_t b = m_evt_handler->m_curr->pos.offset;
1914 substr s = m_buf.sub(b);
1915 if(s.begins_with(
' '))
1918 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1919 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1920 _line_progressed((
size_t)(s.begin() - m_buf.sub(b).begin()));
1922 b = m_evt_handler->m_curr->pos.offset;
1923 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'"'));
1926 _line_progressed(1);
1929 bool needs_filter =
false;
1931 size_t numlines = 1;
1933 while( ! _finished_file())
1935 const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1936 bool line_is_blank =
true;
1937 _c4dbgpf(
"scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, line);
1938 for(
size_t i = 0; i < line.len; ++i)
1940 const char curr = line.str[i];
1942 line_is_blank =
false;
1946 const char next = i+1 < line.len ? line.str[i+1] :
'~';
1947 needs_filter =
true;
1948 if(next ==
'"' || next ==
'\\')
1951 else if(curr ==
'"')
1959 needs_filter = needs_filter
1962 || (_at_line_begin() && line.begins_with(
' '));
1966 _line_progressed(line.len);
1971 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
1972 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] ==
'"');
1973 _line_progressed(pos + 1);
1974 pos = m_evt_handler->m_curr->pos.offset - b - 1;
1984 _c4err(
"reached end of file looking for closing quote");
1988 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
1989 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() ==
'"');
1990 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
1991 s = s.sub(0, pos-1);
1994 _c4prscalar(
"scanned dquoted scalar", s,
true);
1996 return ScannedScalar { s, needs_filter };
2001 template<
class EventHandler>
2002 void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb,
size_t indref)
2004 _c4dbgpf(
"blck: indref={}", indref);
2005 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, indref !=
npos);
2008 csubstr s = m_evt_handler->m_curr->line_contents.rem;
2009 csubstr trimmed = s.triml(
' ');
2010 if(trimmed.str > s.str)
2012 _c4dbgp(
"skipping whitespace");
2013 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, trimmed.str >= s.str);
2014 _line_progressed(
static_cast<size_t>(trimmed.str - s.str));
2017 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'|') || s.begins_with(
'>'));
2019 _c4dbgpf(
"blck: specs=[{}]~~~{}~~~", s.len, s);
2022 BlockChomp_e chomp = CHOMP_CLIP;
2023 size_t indentation =
npos;
2027 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(
"|>"));
2028 csubstr t = s.sub(1);
2029 _c4dbgpf(
"blck: spec is multichar: '{}'", t);
2030 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, t.len >= 1);
2031 size_t pos = t.first_of(
"-+");
2032 _c4dbgpf(
"blck: spec chomp char at {}", pos);
2036 chomp = CHOMP_STRIP;
2037 else if(t[pos] ==
'+')
2045 digits = t.left_of(t.first_not_of(
"0123456789"));
2046 if( ! digits.empty())
2048 if(C4_UNLIKELY(digits.len > 1))
2049 _c4err(
"parse error: invalid indentation");
2050 _c4dbgpf(
"blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
2051 if(C4_UNLIKELY( !
c4::atou(digits, &indentation)))
2052 _c4err(
"parse error: could not read indentation as decimal");
2053 if(C4_UNLIKELY( ! indentation))
2054 _c4err(
"parse error: null indentation");
2055 _c4dbgpf(
"blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2056 indentation += m_evt_handler->m_curr->indref;
2060 _c4dbgpf(
"blck: style={} chomp={} indentation={}", s.begins_with(
'>') ?
"fold" :
"literal", chomp==CHOMP_CLIP ?
"clip" : (chomp==CHOMP_STRIP ?
"strip" :
"keep"), indentation);
2063 _line_progressed(s.len);
2068 substr raw_block(m_buf.data() + m_evt_handler->m_curr->pos.offset,
size_t(0));
2069 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.begin());
2077 size_t num_lines = 0;
2078 size_t first = m_evt_handler->m_curr->pos.line;
2079 size_t provisional_indentation =
npos;
2081 while(( ! _finished_file()))
2084 lc.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
2085 _c4dbgpf(
"blck: peeking at [{}]~~~{}~~~", lc.stripped.len, lc.stripped);
2087 if(indentation !=
npos)
2089 _c4dbgpf(
"blck: indentation={}", indentation);
2091 if(lc.indentation < indentation && ( ! lc.rem.trim(
" \t").empty()))
2095 _c4dbgpf(
"blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2099 _c4err(
"indentation decreased without any scalar");
2103 else if(indentation == 0)
2105 _c4dbgpf(
"blck: noindent. lc.rem=[{}]~~~{}~~~", lc.rem.len, lc.rem);
2106 if(_is_doc_token(lc.rem))
2108 _c4dbgp(
"blck: stop. indentation=0 and doc ended");
2115 const size_t fns = lc.stripped.first_not_of(
' ');
2116 _c4dbgpf(
"blck: indentation ref not set. firstnonws={}", fns);
2120 if(C4_UNLIKELY(lc.stripped.begins_with(
'\t')))
2121 _c4err(
"parse error");
2123 _c4dbgpf(
"blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2124 if(provisional_indentation ==
npos)
2126 if(lc.indentation < indref)
2128 _c4dbgpf(
"blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2129 if(raw_block.len == 0)
2131 _c4dbgp(
"blck: was empty, undo next line");
2136 else if(lc.indentation == m_evt_handler->m_curr->indref)
2140 _c4dbgpf(
"blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2144 _c4dbgpf(
"blck: set indentation ref from this line: ref={}", lc.indentation);
2145 indentation = lc.indentation;
2149 if(lc.indentation >= provisional_indentation)
2151 _c4dbgpf(
"blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2153 indentation = lc.indentation;
2164 _c4dbgpf(
"blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.stripped.len, lc.indentation, provisional_indentation);
2165 if(provisional_indentation !=
npos)
2167 if(lc.stripped.len >= provisional_indentation)
2169 _c4dbgpf(
"blck: increase provisional_ref {} -> {}", provisional_indentation, lc.stripped.len);
2170 provisional_indentation = lc.stripped.len;
2172 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
2173 else if(lc.indentation >= provisional_indentation && lc.indentation !=
npos)
2175 _c4dbgpf(
"blck: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation);
2176 provisional_indentation = lc.indentation;
2182 provisional_indentation = lc.indentation ? lc.indentation : has_any(
RSEQ|
RVAL);
2183 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2184 if(provisional_indentation ==
npos)
2186 provisional_indentation = lc.stripped.len ? lc.stripped.len : has_any(
RSEQ|
RVAL);
2187 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2189 if(provisional_indentation < indref)
2191 provisional_indentation = indref;
2192 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2198 m_evt_handler->m_curr->line_contents = lc;
2199 _c4dbgpf(
"blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2200 raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2201 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2205 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0));
2206 C4_UNUSED(num_lines);
2209 if(indentation ==
npos)
2211 _c4dbgpf(
"blck: set indentation from provisional: {}", provisional_indentation);
2212 indentation = provisional_indentation;
2218 _c4prscalar(
"scanned block", raw_block,
true);
2220 sb->scalar = raw_block;
2221 sb->indentation = indentation;
2232 #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2234 #define _c4dbgfws(...)
2237 template<
class EventHandler>
2238 template<
class FilterProcessor>
2239 bool ParseEngine<EventHandler>::_filter_ws_handle_to_first_non_space(FilterProcessor &proc)
2241 _c4dbgfws(
"found whitespace '{}'", _c4prc(proc.curr()));
2242 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
' ' || proc.curr() ==
'\t');
2244 const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(
" \t", proc.rpos) : proc.src.first_not_of(
' ', proc.rpos);
2245 if(first_pos !=
npos)
2247 const char first_char = proc.src[first_pos];
2248 _c4dbgfws(
"firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2249 if(first_char ==
'\n' || first_char ==
'\r')
2251 _c4dbgfws(
"whitespace is trailing on line",
"");
2252 proc.skip(first_pos - proc.rpos);
2257 _c4dbgfws(
"legit whitespace. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2261 _c4dbgfws(
"whitespace is trailing on line",
"");
2265 template<
class EventHandler>
2266 template<
class FilterProcessor>
2267 void ParseEngine<EventHandler>::_filter_ws_copy_trailing(FilterProcessor &proc)
2269 if(!_filter_ws_handle_to_first_non_space(proc))
2271 _c4dbgfws(
"... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2272 proc.copy(proc.src.len - proc.rpos);
2276 template<
class EventHandler>
2277 template<
class FilterProcessor>
2278 void ParseEngine<EventHandler>::_filter_ws_skip_trailing(FilterProcessor &proc)
2280 if(!_filter_ws_handle_to_first_non_space(proc))
2282 _c4dbgfws(
"... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2283 proc.skip(proc.src.len - proc.rpos);
2297 #define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2299 #define _c4dbgfps(fmt, ...)
2302 template<
class EventHandler>
2303 template<
class FilterProcessor>
2304 void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation)
2306 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
'\n');
2308 _c4dbgfps(
"found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2309 size_t ii = proc.rpos;
2310 const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2313 proc.set(
'\n', numnl_following);
2314 _c4dbgfps(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2318 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2322 _c4dbgfps(
"single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2326 _c4dbgfps(
"last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2333 template<
class EventHandler>
2334 template<
class FilterProcessor>
2335 auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation) -> decltype(proc.result())
2337 _RYML_CB_ASSERT(this->callbacks(), indentation !=
npos);
2338 _c4dbgfps(
"before=[{}]~~~{}~~~", proc.src.len, proc.src);
2340 while(proc.has_more_chars())
2342 const char curr = proc.curr();
2343 _c4dbgfps(
"'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2349 _filter_ws_skip_trailing(proc);
2353 _filter_nl_plain(proc, indentation);
2356 _c4dbgfps(
"carriage return, ignore", curr);
2365 _c4dbgfps(
"after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2367 return proc.result();
2373 template<
class EventHandler>
2377 return _filter_plain(proc, indentation);
2380 template<
class EventHandler>
2384 return _filter_plain(proc, indentation);
2395 #define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2397 #define _c4dbgfsq(fmt, ...)
2400 template<
class EventHandler>
2401 template<
class FilterProcessor>
2402 void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2404 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
'\n');
2406 _c4dbgfsq(
"found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2407 size_t ii = proc.rpos;
2408 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2411 proc.set(
'\n', numnl_following);
2412 _c4dbgfsq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2416 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2420 _c4dbgfsq(
"single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2425 _c4dbgfsq(
"single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2431 template<
class EventHandler>
2432 template<
class FilterProcessor>
2433 auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2435 _c4dbgfsq(
"before=[{}]~~~{}~~~", proc.src.len, proc.src);
2439 while(proc.has_more_chars())
2441 const char curr = proc.curr();
2442 _c4dbgfsq(
"'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2448 _filter_ws_copy_trailing(proc);
2452 _filter_nl_squoted(proc);
2460 if(proc.next() ==
'\'')
2462 _c4dbgfsq(
"two consecutive squotes", curr);
2468 _c4err(
"filter error");
2477 _c4dbgfsq(
": #filteredchars={} after=~~~[{}]{}~~~", proc.src.len-proc.sofar().len, proc.sofar().len, proc.sofar());
2479 return proc.result();
2484 template<
class EventHandler>
2488 return _filter_squoted(proc);
2491 template<
class EventHandler>
2495 return _filter_squoted(proc);
2506 #define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2508 #define _c4dbgfdq(...)
2511 template<
class EventHandler>
2512 template<
class FilterProcessor>
2513 void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2515 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
'\n');
2517 _c4dbgfdq(
"found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2518 size_t ii = proc.rpos;
2519 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2522 proc.set(
'\n', numnl_following);
2523 _c4dbgfdq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2527 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2531 _c4dbgfdq(
"single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2536 _c4dbgfdq(
"single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2538 if(ii < proc.src.len && proc.src.str[ii] ==
'\\')
2541 const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] :
'\0';
2542 if(next ==
' ' || next ==
'\t')
2544 _c4dbgfdq(
"extend skip to backslash",
"");
2552 template<
class EventHandler>
2553 template<
class FilterProcessor>
2554 void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2556 char next = proc.next();
2557 _c4dbgfdq(
"backslash, next='{}'", _c4prc(next));
2560 if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] ==
'\n')
2564 _c4dbgfdq(
"[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2570 size_t ii = proc.rpos + 2;
2571 for( ; ii < proc.src.len; ++ii)
2574 if(proc.src.str[ii] ==
' ' || proc.src.str[ii] ==
'\t')
2579 proc.skip(ii - proc.rpos);
2581 else if(next ==
'"' || next ==
'/' || next ==
' ' || next ==
'\t')
2584 proc.translate_esc(next);
2585 _c4dbgfdq(
"here, used '{}'", _c4prc(next));
2587 else if(next ==
'\r')
2591 else if(next ==
'n')
2593 proc.translate_esc(
'\n');
2595 else if(next ==
'r')
2597 proc.translate_esc(
'\r');
2599 else if(next ==
't')
2601 proc.translate_esc(
'\t');
2603 else if(next ==
'\\')
2605 proc.translate_esc(
'\\');
2607 else if(next ==
'x')
2609 if(C4_UNLIKELY(proc.rpos + 1u + 2u >= proc.src.len))
2610 _c4err(
"\\x requires 2 hex digits. scalar pos={}", proc.rpos);
2611 csubstr codepoint = proc.src.sub(proc.rpos + 2u, 2u);
2612 _c4dbgfdq(
"utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2613 uint8_t byteval = {};
2614 if(C4_UNLIKELY(!
read_hex(codepoint, &byteval)))
2615 _c4err(
"failed to read \\x codepoint. scalar pos={}", proc.rpos);
2616 proc.translate_esc_bulk((
const char*)&byteval, 1u, 3u);
2617 _c4dbgfdq(
"utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2619 else if(next ==
'u')
2621 if(C4_UNLIKELY(proc.rpos + 1u + 4u >= proc.src.len))
2622 _c4err(
"\\u requires 4 hex digits. scalar pos={}", proc.rpos);
2624 csubstr codepoint = proc.src.sub(proc.rpos + 2u, 4u);
2625 uint32_t codepoint_val = {};
2626 if(C4_UNLIKELY(!
read_hex(codepoint, &codepoint_val)))
2627 _c4err(
"failed to parse \\u codepoint. scalar pos={}", proc.rpos);
2628 const size_t numbytes = decode_code_point((uint8_t*)readbuf,
sizeof(readbuf), codepoint_val);
2629 if(C4_UNLIKELY(numbytes == 0))
2630 _c4err(
"failed to decode code point={}", proc.rpos);
2631 _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2632 proc.translate_esc_bulk(readbuf, numbytes, 5u);
2634 else if(next ==
'U')
2636 if(C4_UNLIKELY(proc.rpos + 1u + 8u >= proc.src.len))
2637 _c4err(
"\\U requires 8 hex digits. scalar pos={}", proc.rpos);
2639 csubstr codepoint = proc.src.sub(proc.rpos + 2u, 8u);
2640 uint32_t codepoint_val = {};
2641 if(C4_UNLIKELY(!
read_hex(codepoint, &codepoint_val)))
2642 _c4err(
"failed to parse \\U codepoint. scalar pos={}", proc.rpos);
2643 const size_t numbytes = decode_code_point((uint8_t*)readbuf,
sizeof(readbuf), codepoint_val);
2644 if(C4_UNLIKELY(numbytes == 0))
2645 _c4err(
"failed to decode code point={}", proc.rpos);
2646 _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2647 proc.translate_esc_bulk(readbuf, numbytes, 9u);
2650 else if(next ==
'0')
2652 proc.translate_esc(
'\0');
2654 else if(next ==
'b')
2656 proc.translate_esc(
'\b');
2658 else if(next ==
'f')
2660 proc.translate_esc(
'\f');
2662 else if(next ==
'a')
2664 proc.translate_esc(
'\a');
2666 else if(next ==
'v')
2668 proc.translate_esc(
'\v');
2670 else if(next ==
'e')
2672 proc.translate_esc(
'\x1b');
2674 else if(next ==
'_')
2677 const char payload[] = {
2678 _RYML_CHCONST(-0x3e, 0xc2),
2679 _RYML_CHCONST(-0x60, 0xa0),
2681 proc.translate_esc_bulk(payload, 2, 1);
2683 else if(next ==
'N')
2686 const char payload[] = {
2687 _RYML_CHCONST(-0x3e, 0xc2),
2688 _RYML_CHCONST(-0x7b, 0x85),
2690 proc.translate_esc_bulk(payload, 2, 1);
2692 else if(next ==
'L')
2695 const char payload[] = {
2696 _RYML_CHCONST(-0x1e, 0xe2),
2697 _RYML_CHCONST(-0x80, 0x80),
2698 _RYML_CHCONST(-0x58, 0xa8),
2700 proc.translate_esc_extending(payload, 3, 1);
2702 else if(next ==
'P')
2705 const char payload[] = {
2706 _RYML_CHCONST(-0x1e, 0xe2),
2707 _RYML_CHCONST(-0x80, 0x80),
2708 _RYML_CHCONST(-0x57, 0xa9),
2710 proc.translate_esc_extending(payload, 3, 1);
2712 else if(next ==
'\0')
2718 _c4err(
"unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2720 _c4dbgfdq(
"backslash...sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2724 template<
class EventHandler>
2725 template<
class FilterProcessor>
2726 auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2728 _c4dbgfdq(
"before=[{}]~~~{}~~~", proc.src.len, proc.src);
2731 while(proc.has_more_chars())
2733 const char curr = proc.curr();
2734 _c4dbgfdq(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2741 _filter_ws_copy_trailing(proc);
2747 _filter_nl_dquoted(proc);
2752 _c4dbgfdq(
"carriage return, ignore", curr);
2758 _filter_dquoted_backslash(proc);
2768 _c4dbgfdq(
"after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2769 return proc.result();
2775 template<
class EventHandler>
2779 return _filter_dquoted(proc);
2782 template<
class EventHandler>
2786 return _filter_dquoted(proc);
2795 template<
class EventHandler>
2796 template<
class FilterProcessor>
2799 _RYML_CB_ASSERT(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP);
2800 _RYML_CB_ASSERT(this->callbacks(), proc.rem().first_not_of(
" \n\r") ==
npos);
2804 #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2806 #define _c4dbgchomp(...)
2814 _c4dbgchomp(
"found newline and larger indentation. last={}", last);
2815 last = proc.rpos + last + size_t(1) + indentation;
2816 _RYML_CB_ASSERT(this->callbacks(), last <= proc.src.len);
2818 while((proc.rpos < last) && proc.has_more_chars())
2820 const char curr = proc.curr();
2826 _c4dbgchomp(
"newline! remlen={}", proc.rem().len);
2829 csubstr at_next_line = proc.rem();
2830 if(at_next_line.begins_with(
' '))
2832 _c4dbgchomp(
"next line begins with spaces. indentation={}", indentation);
2834 size_t first_non_space = at_next_line.first_not_of(
' ');
2835 _c4dbgchomp(
"first_non_space={}", first_non_space);
2836 if(first_non_space ==
npos)
2838 _c4dbgchomp(
"{} spaces, to the end", at_next_line.len);
2839 first_non_space = at_next_line.len;
2841 if(first_non_space <= indentation)
2843 _c4dbgchomp(
"skip spaces={}<=indentation={}", first_non_space, indentation);
2844 proc.skip(first_non_space);
2848 _c4dbgchomp(
"skip indentation={}<spaces={}", indentation, first_non_space);
2849 proc.skip(indentation);
2851 _c4dbgchomp(
"copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
2852 proc.copy(first_non_space - indentation);
2861 _c4err(
"parse error");
2873 bool had_one =
false;
2874 while(proc.has_more_chars())
2876 const char curr = proc.curr();
2897 _c4dbgchomp(
"chomp=CLIP: add missing newline @{}", proc.wpos);
2904 _c4dbgchomp(
"chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
2905 while(proc.has_more_chars())
2907 const char curr = proc.curr();
2926 _c4dbgchomp(
"chomp=STRIP: strip {} characters", proc.rem().len);
2938 #define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2940 #define _c4dbgfb(...)
2943 template<
class EventHandler>
2944 template<
class FilterProcessor>
2945 void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc,
size_t indentation)
2947 csubstr rem = proc.rem();
2950 size_t first = rem.first_not_of(
' ');
2953 _c4dbgfb(
"{} spaces follow before next nonws character", first);
2954 if(first < indentation)
2956 _c4dbgfb(
"skip {}<{} spaces from indentation", first, indentation);
2961 _c4dbgfb(
"skip {} spaces from indentation", indentation);
2962 proc.skip(indentation);
2965 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
2968 _c4dbgfb(
"all spaces to the end: {} spaces", first);
2972 if(first < indentation)
2974 _c4dbgfb(
"skip everything", first);
2975 proc.skip(proc.src.len - proc.rpos);
2979 _c4dbgfb(
"skip {} spaces from indentation", indentation);
2980 proc.skip(indentation);
2988 template<
class EventHandler>
2989 template<
class FilterProcessor>
2990 size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
2992 csubstr contents = proc.src.trimr(
" \n\r");
2993 _c4dbgfb(
"ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
2996 _c4dbgfb(
"ws: all whitespace: len={}", proc.src.len);
2997 if(chomp == CHOMP_KEEP && proc.src.len)
2999 _c4dbgfb(
"ws: chomp=KEEP all {} newlines", proc.src.count(
'\n'));
3000 while(proc.has_more_chars())
3002 const char curr = proc.curr();
3014 return contents.len;
3017 template<
class EventHandler>
3018 template<
class FilterProcessor>
3019 size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc,
size_t contents_len)
3021 _c4dbgfb(
"contents_len={}", contents_len);
3023 _RYML_CB_ASSERT(this->callbacks(), contents_len > 0u);
3027 size_t firstnewl = proc.src.first_of(
'\n', contents_len);
3028 if(firstnewl !=
npos)
3030 contents_len = firstnewl;
3031 _c4dbgfb(
"contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3035 contents_len = proc.src.len;
3036 _c4dbgfb(
"contents_len={} <--- src.len={}", contents_len, proc.src.len);
3039 return contents_len;
3051 #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3053 #define _c4dbgfbl(...)
3056 template<
class EventHandler>
3057 template<
class FilterProcessor>
3058 auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc,
size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3060 _c4dbgfbl(
"indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3062 size_t contents_len = _handle_all_whitespace(proc, chomp);
3064 return proc.result();
3066 contents_len = _extend_to_chomp(proc, contents_len);
3068 _c4dbgfbl(
"to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3070 _filter_block_indentation(proc, indentation);
3073 while(proc.has_more_chars(contents_len))
3075 const char curr = proc.curr();
3076 _c4dbgfbl(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3081 _c4dbgfbl(
"found newline. skip indentation on the next line", curr);
3083 _filter_block_indentation(proc, indentation);
3095 _c4dbgfbl(
"before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3097 _filter_chomp(proc, chomp, indentation);
3099 _c4dbgfbl(
"final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3101 return proc.result();
3106 template<
class EventHandler>
3110 return _filter_block_literal(proc, indentation, chomp);
3113 template<
class EventHandler>
3117 return _filter_block_literal(proc, indentation, chomp);
3127 #define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3129 #define _c4dbgfbf(...)
3133 template<
class EventHandler>
3134 template<
class FilterProcessor>
3135 void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len)
3137 _filter_block_indentation(proc, indentation);
3138 while(proc.has_more_chars(len))
3140 const char curr = proc.curr();
3141 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3147 _filter_block_indentation(proc, indentation);
3155 size_t first = proc.rem().first_not_of(
" \t");
3158 first = proc.rem().len;
3159 _c4dbgfbf(
"... indentation increased to {}", first);
3160 _filter_block_folded_indented_block(proc, indentation, len, first);
3164 _c4dbgfbf(
"newl leading: not space, not newline. stop.", 0);
3170 template<
class EventHandler>
3171 template<
class FilterProcessor>
3172 size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc,
size_t num_newl,
size_t wpos_at_first_newl)
3177 _c4dbgfbf(
"... this is the first newline. turn into space. wpos={}", proc.wpos);
3178 wpos_at_first_newl = proc.wpos;
3183 _c4dbgfbf(
"... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3184 _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl !=
npos);
3185 _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
' ');
3186 _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos);
3188 proc.set_at(wpos_at_first_newl,
'\n');
3189 _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
'\n');
3192 _c4dbgfbf(
"... subsequent newline (num_newl={}). copy", num_newl);
3196 return wpos_at_first_newl;
3199 template<
class EventHandler>
3200 template<
class FilterProcessor>
3201 void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len)
3203 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
'\n');
3204 size_t num_newl = 0;
3205 size_t wpos_at_first_newl =
npos;
3206 while(proc.has_more_chars(len))
3208 const char curr = proc.curr();
3209 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3214 _c4dbgfbf(
"newline. sofar={}", num_newl);
3250 wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3251 _filter_block_indentation(proc, indentation);
3257 size_t first = proc.rem().first_not_of(
" \t");
3260 first = proc.rem().len;
3261 _c4dbgfbf(
"... indentation increased to {}", first);
3264 _c4dbgfbf(
"... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3265 proc.set_at(wpos_at_first_newl,
'\n');
3269 _c4dbgfbf(
"... add missing newline", wpos_at_first_newl);
3272 _filter_block_folded_indented_block(proc, indentation, len, first);
3274 wpos_at_first_newl =
npos;
3281 _c4dbgfbf(
"not space, not newline. stop.", 0);
3288 template<
class EventHandler>
3289 template<
class FilterProcessor>
3290 void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len,
size_t curr_indentation) noexcept
3292 _RYML_CB_ASSERT(this->callbacks(), (proc.rem().first_not_of(
" \t") == curr_indentation) || (proc.rem().first_not_of(
" \t") ==
npos));
3293 if(curr_indentation)
3294 proc.copy(curr_indentation);
3295 while(proc.has_more_chars(len))
3297 const char curr = proc.curr();
3298 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3304 _filter_block_indentation(proc, indentation);
3305 csubstr rem = proc.rem();
3306 const size_t first = rem.first_not_of(
' ');
3307 _c4dbgfbf(
"newline. firstns={}", first);
3310 const char c = rem[first];
3311 _c4dbgfbf(
"firstns={}='{}'", first, _c4prc(c));
3312 if(c ==
'\n' || c ==
'\r')
3318 _c4dbgfbf(
"done with indented block", first);
3322 else if(first !=
npos)
3343 template<
class EventHandler>
3344 template<
class FilterProcessor>
3345 auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc,
size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3347 _c4dbgfbf(
"indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3349 size_t contents_len = _handle_all_whitespace(proc, chomp);
3351 return proc.result();
3353 contents_len = _extend_to_chomp(proc, contents_len);
3355 _c4dbgfbf(
"to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3357 _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3360 while(proc.has_more_chars(contents_len))
3362 const char curr = proc.curr();
3363 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3369 _filter_block_folded_newlines(proc, indentation, contents_len);
3381 _c4dbgfbf(
"before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3383 _filter_chomp(proc, chomp, indentation);
3385 _c4dbgfbf(
"final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3387 return proc.result();
3392 template<
class EventHandler>
3396 return _filter_block_folded(proc, indentation, chomp);
3399 template<
class EventHandler>
3403 return _filter_block_folded(proc, indentation, chomp);
3411 template<
class EventHandler>
3414 _c4dbgpf(
"filtering plain scalar: s=[{}]~~~{}~~~", s.len, s);
3415 FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3416 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, r.valid());
3417 _c4dbgpf(
"filtering plain scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3423 template<
class EventHandler>
3424 csubstr ParseEngine<EventHandler>::_filter_scalar_squot(substr s)
3426 _c4dbgpf(
"filtering squo scalar: s=[{}]~~~{}~~~", s.len, s);
3427 FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3428 _RYML_CB_ASSERT(this->callbacks(), r.valid());
3429 _c4dbgpf(
"filtering squo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3436 template<
class EventHandler>
3437 csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
3439 _c4dbgpf(
"filtering dquo scalar: s=[{}]~~~{}~~~", s.len, s);
3440 FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3441 if(C4_LIKELY(r.valid()))
3443 _c4dbgpf(
"filtering dquo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3448 const size_t len = r.required_len();
3449 _c4dbgpf(
"filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3450 substr dst = m_evt_handler->alloc_arena(len, &s);
3451 _c4dbgpf(
"filtering dquo scalar: dst.len={}", dst.len);
3452 _RYML_CB_ASSERT(this->callbacks(), dst.len == len);
3453 FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3454 _c4dbgpf(
"filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3455 _RYML_CB_ASSERT(this->callbacks(), rsd.required_len() <= len);
3456 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3457 _c4dbgpf(
"filtering dquo scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3464 template<
class EventHandler>
3465 csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s,
size_t indentation, BlockChomp_e chomp)
3467 _c4dbgpf(
"filtering block literal scalar: s=[{}]~~~{}~~~", s.len, s);
3468 FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3469 if(C4_LIKELY(r.valid()))
3471 _c4dbgpf(
"filtering block literal scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3476 _c4dbgpf(
"filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3477 substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
3478 FilterResult rsd = this->filter_scalar_block_literal(s, dst, indentation, chomp);
3479 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3480 _c4dbgpf(
"filtering block literal scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3487 template<
class EventHandler>
3488 csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s,
size_t indentation, BlockChomp_e chomp)
3490 _c4dbgpf(
"filtering block folded scalar: s=[{}]~~~{}~~~", s.len, s);
3491 FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3492 if(C4_LIKELY(r.valid()))
3494 _c4dbgpf(
"filtering block folded scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3499 _c4dbgpf(
"filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3500 substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
3501 FilterResult rsd = this->filter_scalar_block_folded(s, dst, indentation, chomp);
3502 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3503 _c4dbgpf(
"filtering block folded scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3511 template<
class EventHandler>
3512 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar
const& C4_RESTRICT sc,
size_t indentation)
3514 csubstr maybe_filtered = sc.scalar;
3517 if(m_options.scalar_filtering())
3519 maybe_filtered = _filter_scalar_plain(sc.scalar, indentation);
3523 _c4dbgp(
"plain scalar left unfiltered");
3524 m_evt_handler->mark_key_scalar_unfiltered();
3529 _c4dbgp(
"plain scalar doesn't need filtering");
3531 return maybe_filtered;
3534 template<
class EventHandler>
3535 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar
const& C4_RESTRICT sc,
size_t indentation)
3537 csubstr maybe_filtered = sc.scalar;
3540 if(m_options.scalar_filtering())
3542 maybe_filtered = _filter_scalar_plain(sc.scalar, indentation);
3546 _c4dbgp(
"plain scalar left unfiltered");
3547 m_evt_handler->mark_val_scalar_unfiltered();
3552 _c4dbgp(
"plain scalar doesn't need filtering");
3554 return maybe_filtered;
3560 template<
class EventHandler>
3561 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar
const& C4_RESTRICT sc)
3563 csubstr maybe_filtered = sc.scalar;
3566 if(m_options.scalar_filtering())
3568 maybe_filtered = _filter_scalar_squot(sc.scalar);
3572 _c4dbgp(
"squo key scalar left unfiltered");
3573 m_evt_handler->mark_key_scalar_unfiltered();
3578 _c4dbgp(
"squo key scalar doesn't need filtering");
3580 return maybe_filtered;
3583 template<
class EventHandler>
3584 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar
const& C4_RESTRICT sc)
3586 csubstr maybe_filtered = sc.scalar;
3589 if(m_options.scalar_filtering())
3591 maybe_filtered = _filter_scalar_squot(sc.scalar);
3595 _c4dbgp(
"squo val scalar left unfiltered");
3596 m_evt_handler->mark_val_scalar_unfiltered();
3601 _c4dbgp(
"squo val scalar doesn't need filtering");
3603 return maybe_filtered;
3609 template<
class EventHandler>
3610 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar
const& C4_RESTRICT sc)
3612 csubstr maybe_filtered = sc.scalar;
3615 if(m_options.scalar_filtering())
3617 maybe_filtered = _filter_scalar_dquot(sc.scalar);
3621 _c4dbgp(
"dquo scalar left unfiltered");
3622 m_evt_handler->mark_key_scalar_unfiltered();
3627 _c4dbgp(
"dquo scalar doesn't need filtering");
3629 return maybe_filtered;
3632 template<
class EventHandler>
3633 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar
const& C4_RESTRICT sc)
3635 csubstr maybe_filtered = sc.scalar;
3638 if(m_options.scalar_filtering())
3640 maybe_filtered = _filter_scalar_dquot(sc.scalar);
3644 _c4dbgp(
"dquo scalar left unfiltered");
3645 m_evt_handler->mark_val_scalar_unfiltered();
3650 _c4dbgp(
"dquo scalar doesn't need filtering");
3652 return maybe_filtered;
3658 template<
class EventHandler>
3659 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_literal(ScannedBlock
const& C4_RESTRICT sb)
3661 csubstr maybe_filtered = sb.scalar;
3662 if(m_options.scalar_filtering())
3664 maybe_filtered = _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3668 _c4dbgp(
"literal scalar left unfiltered");
3669 m_evt_handler->mark_key_scalar_unfiltered();
3671 return maybe_filtered;
3674 template<
class EventHandler>
3675 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_literal(ScannedBlock
const& C4_RESTRICT sb)
3677 csubstr maybe_filtered = sb.scalar;
3678 if(m_options.scalar_filtering())
3680 maybe_filtered = _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3684 _c4dbgp(
"literal scalar left unfiltered");
3685 m_evt_handler->mark_val_scalar_unfiltered();
3687 return maybe_filtered;
3693 template<
class EventHandler>
3694 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_folded(ScannedBlock
const& C4_RESTRICT sb)
3696 csubstr maybe_filtered = sb.scalar;
3697 if(m_options.scalar_filtering())
3699 maybe_filtered = _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3703 _c4dbgp(
"folded scalar left unfiltered");
3704 m_evt_handler->mark_key_scalar_unfiltered();
3706 return maybe_filtered;
3709 template<
class EventHandler>
3710 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_folded(ScannedBlock
const& C4_RESTRICT sb)
3712 csubstr maybe_filtered = sb.scalar;
3713 if(m_options.scalar_filtering())
3715 maybe_filtered = _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3719 _c4dbgp(
"folded scalar left unfiltered");
3720 m_evt_handler->mark_val_scalar_unfiltered();
3722 return maybe_filtered;
3732 template<
class EventHandler>
3733 void ParseEngine<EventHandler>::add_flags(
ParserFlag_t on, ParserState * s)
3735 char buf1_[64], buf2_[64], buf3_[64];
3736 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3737 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3738 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
3739 _c4dbgpf(
"state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
3743 template<
class EventHandler>
3746 char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
3747 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3748 csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
3749 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
3750 csubstr buf4 = detail::_parser_flags_to_str(buf4_, ((s->flags|on)&(~off)));
3751 _c4dbgpf(
"state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
3756 template<
class EventHandler>
3757 void ParseEngine<EventHandler>::rem_flags(
ParserFlag_t off, ParserState * s)
3759 char buf1_[64], buf2_[64], buf3_[64];
3760 csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
3761 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3762 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
3763 _c4dbgpf(
"state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
3767 inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf,
ParserFlag_t flags)
3770 bool gotone =
false;
3772 #define _prflag(fl) \
3773 if((flags & fl) == (fl)) \
3777 if(pos + 1 < buf.len) \
3781 csubstr fltxt = #fl; \
3782 if(pos + fltxt.len <= buf.len) \
3783 memcpy(buf.str + pos, fltxt.str, fltxt.len); \
3813 RYML_CHECK(pos <= buf.len);
3815 return buf.first(pos);
3825 template<
class EventHandler>
3828 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, loc.
offset < m_buf.len);
3829 return m_buf.sub(loc.
offset);
3832 template<
class EventHandler>
3835 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, node.
readable());
3836 return location(*node.
tree(), node.
id());
3839 template<
class EventHandler>
3844 if(_location_from_node(tree, node, &loc, 0))
3846 return val_location(m_buf.str);
3849 template<
class EventHandler>
3854 csubstr k = tree.
key(node);
3855 if(C4_LIKELY(k.str !=
nullptr))
3857 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, k.is_sub(m_buf));
3858 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(k));
3859 *loc = val_location(k.str);
3866 csubstr v = tree.
val(node);
3867 if(C4_LIKELY(v.str !=
nullptr))
3869 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, v.is_sub(m_buf));
3870 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(v));
3871 *loc = val_location(v.str);
3878 if(_location_from_cont(tree, node, loc))
3889 if(_location_from_node(tree, prev, loc, level+1))
3898 if(_location_from_node(tree, next, loc, level+1))
3907 if(_location_from_node(tree, parent, loc, level+1))
3916 template<
class EventHandler>
3917 bool ParseEngine<EventHandler>::_location_from_cont(Tree
const& tree,
id_type node, Location *C4_RESTRICT loc)
const
3919 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, tree.is_container(node));
3920 if(!tree.is_stream(node))
3922 const char *node_start = tree._p(node)->m_val.scalar.str;
3923 if(tree.has_children(node))
3925 id_type child = tree.first_child(node);
3926 if(tree.has_key(child))
3929 csubstr k = tree.key(child);
3930 if(k.str && node_start > k.str)
3934 *loc = val_location(node_start);
3939 *loc = val_location(m_buf.str);
3945 template<
class EventHandler>
3948 if(C4_UNLIKELY(val ==
nullptr))
3949 return {m_file, 0, 0, 0};
3950 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_options.locations());
3953 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str);
3954 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len);
3955 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_options.locations());
3956 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
3957 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets !=
nullptr);
3958 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
3960 csubstr src = m_buf;
3961 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, val !=
nullptr || src.str ==
nullptr);
3962 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str ==
nullptr && val ==
nullptr));
3964 using lineptr_type =
size_t const* C4_RESTRICT;
3965 lineptr_type lineptr =
nullptr;
3966 size_t offset = (size_t)(val - src.begin());
3970 for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
3985 size_t count = m_newline_offsets_size;
3988 lineptr = m_newline_offsets;
3992 it = lineptr + step;
4004 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
4005 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
4006 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
4010 loc.
line = (size_t)(lineptr - m_newline_offsets);
4011 if(lineptr > m_newline_offsets)
4012 loc.
col = (offset - *(lineptr-1) - 1u);
4018 template<
class EventHandler>
4021 m_newline_offsets_buf = m_buf;
4022 size_t numnewlines = 1u + m_buf.count(
'\n');
4023 _resize_locations(numnewlines);
4024 m_newline_offsets_size = 0;
4025 for(
size_t i = 0; i < m_buf.len; i++)
4026 if(m_buf[i] ==
'\n')
4027 m_newline_offsets[m_newline_offsets_size++] = i;
4028 m_newline_offsets[m_newline_offsets_size++] = m_buf.len;
4029 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4032 template<
class EventHandler>
4033 void ParseEngine<EventHandler>::_resize_locations(
size_t numnewlines)
4035 if(numnewlines > m_newline_offsets_capacity)
4037 if(m_newline_offsets)
4038 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
4039 m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks,
size_t, numnewlines, m_newline_offsets);
4040 m_newline_offsets_capacity = numnewlines;
4044 template<
class EventHandler>
4045 bool ParseEngine<EventHandler>::_locations_dirty()
const
4047 return !m_newline_offsets_size;
4055 template<
class EventHandler>
4056 void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4058 if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4060 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4061 if(rem.str[0] ==
' ' || rem.str[0] ==
'\t')
4063 _c4dbgpf(
"starts with whitespace: '{}'", _c4prc(rem.str[0]));
4065 rem = m_evt_handler->m_curr->line_contents.rem;
4068 if(rem.begins_with(
'#'))
4070 _c4dbgpf(
"it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4071 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4080 template<
class EventHandler>
4081 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str,
size_t indentation,
size_t line)
4083 _c4dbgpf(
"store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, str, indentation, line);
4084 if(C4_UNLIKELY(dst->num_entries >= C4_COUNTOF(dst->annotations)))
4085 _c4err(
"too many annotations");
4086 dst->annotations[dst->num_entries].str = str;
4087 dst->annotations[dst->num_entries].indentation = indentation;
4088 dst->annotations[dst->num_entries].line = line;
4092 template<
class EventHandler>
4093 void ParseEngine<EventHandler>::_clear_annotations(Annotation *C4_RESTRICT dst)
4095 dst->num_entries = 0;
4098 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
4099 template<
class EventHandler>
4100 bool ParseEngine<EventHandler>::_handle_indentation_from_annotations()
4102 if(m_pending_anchors.num_entries == 1u || m_pending_tags.num_entries == 1u)
4104 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries < 2u && m_pending_tags.num_entries < 2u);
4105 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.annotations[0].line < m_evt_handler->m_curr->pos.line);
4106 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.annotations[1].line < m_evt_handler->m_curr->pos.line);
4107 size_t to_skip = m_evt_handler->m_curr->indref;
4108 if(m_pending_anchors.num_entries)
4109 to_skip = m_pending_anchors.annotations[0].indentation > to_skip ? m_pending_anchors.annotations[0].indentation : to_skip;
4110 if(m_pending_tags.num_entries)
4111 to_skip = m_pending_tags.annotations[0].indentation > to_skip ? m_pending_tags.annotations[0].indentation : to_skip;
4112 _c4dbgpf(
"annotations pending, skip indentation up to {}!", to_skip);
4113 _maybe_skipchars_up_to(
' ', to_skip);
4120 template<
class EventHandler>
4121 bool ParseEngine<EventHandler>::_annotations_require_key_container()
const
4123 return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4126 template<
class EventHandler>
4127 void ParseEngine<EventHandler>::_check_tag(csubstr tag)
4129 if(!tag.begins_with(
"!<"))
4131 if(C4_UNLIKELY(tag.first_of(
"[]{},") !=
npos))
4132 _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks,
"tags must not contain any of '[]{},'", m_evt_handler->m_curr->pos);
4136 if(C4_UNLIKELY(!tag.ends_with(
'>')))
4137 _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks,
"malformed tag", m_evt_handler->m_curr->pos);
4141 template<
class EventHandler>
4142 void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4144 _c4dbgpf(
"annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4145 if(m_pending_tags.num_entries)
4147 _c4dbgpf(
"annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4148 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4150 _check_tag(m_pending_tags.annotations[0].str);
4151 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4152 _clear_annotations(&m_pending_tags);
4156 _c4err(
"too many tags");
4159 if(m_pending_anchors.num_entries)
4161 _c4dbgpf(
"annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4162 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4164 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4165 _clear_annotations(&m_pending_anchors);
4169 _c4err(
"too many anchors");
4174 template<
class EventHandler>
4175 void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4177 _c4dbgpf(
"annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4178 if(m_pending_tags.num_entries)
4180 _c4dbgpf(
"annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4181 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4183 _check_tag(m_pending_tags.annotations[0].str);
4184 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4185 _clear_annotations(&m_pending_tags);
4189 _c4err(
"too many tags");
4192 if(m_pending_anchors.num_entries)
4194 _c4dbgpf(
"annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4195 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4197 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4198 _clear_annotations(&m_pending_anchors);
4202 _c4err(
"too many anchors");
4207 template<
class EventHandler>
4208 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(
size_t current_line)
4210 _c4dbgpf(
"annotations_before_start_mapblck, current_line={}", current_line);
4211 if(m_pending_tags.num_entries == 2)
4213 _c4dbgp(
"2 tags, setting entry 0");
4214 _check_tag(m_pending_tags.annotations[0].str);
4215 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4217 else if(m_pending_tags.num_entries == 1)
4219 _c4dbgpf(
"1 tag. line={}, curr={}", m_pending_tags.annotations[0].line);
4220 if(m_pending_tags.annotations[0].line < current_line)
4222 _c4dbgp(
"...tag is for the map. setting it.");
4223 _check_tag(m_pending_tags.annotations[0].str);
4224 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4225 _clear_annotations(&m_pending_tags);
4229 if(m_pending_anchors.num_entries == 2)
4231 _c4dbgp(
"2 anchors, setting entry 0");
4232 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4234 else if(m_pending_anchors.num_entries == 1)
4236 _c4dbgpf(
"1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line);
4237 if(m_pending_anchors.annotations[0].line < current_line)
4239 _c4dbgp(
"...anchor is for the map. setting it.");
4240 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4241 _clear_annotations(&m_pending_anchors);
4246 template<
class EventHandler>
4247 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4249 _c4dbgp(
"annotations_before_start_mapblck_as_key");
4250 if(m_pending_tags.num_entries == 2)
4252 _check_tag(m_pending_tags.annotations[0].str);
4253 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4255 if(m_pending_anchors.num_entries == 2)
4257 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4261 template<
class EventHandler>
4262 void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(
size_t key_indentation,
size_t key_line)
4264 _c4dbgp(
"annotations_after_start_mapblck");
4265 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2);
4266 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2);
4267 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4269 key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4270 switch(m_pending_tags.num_entries)
4273 _check_tag(m_pending_tags.annotations[0].str);
4274 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4275 _clear_annotations(&m_pending_tags);
4278 _check_tag(m_pending_tags.annotations[1].str);
4279 m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4280 _clear_annotations(&m_pending_tags);
4283 switch(m_pending_anchors.num_entries)
4286 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4287 _clear_annotations(&m_pending_anchors);
4290 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4291 _clear_annotations(&m_pending_anchors);
4295 _set_indentation(key_indentation);
4298 template<
class EventHandler>
4299 size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(
size_t val_indentation,
size_t val_line)
4301 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries || m_pending_anchors.num_entries);
4303 auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4304 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4306 auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4307 if(ann.line > curr->line)
4309 else if(ann.indentation < curr->indentation)
4312 for(
size_t j = 0; j < m_pending_tags.num_entries; ++j)
4314 auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4315 if(ann.line > curr->line)
4317 else if(ann.indentation < curr->indentation)
4320 return curr->line < val_line ? val_indentation : curr->indentation;
4323 template<
class EventHandler>
4324 void ParseEngine<EventHandler>::_handle_directive(csubstr rem)
4326 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.is_sub(m_evt_handler->m_curr->line_contents.rem));
4327 const size_t pos = rem.find(
'#');
4328 _c4dbgpf(
"handle_directive: pos={} rem={}", pos, rem);
4331 m_evt_handler->add_directive(rem);
4332 _line_progressed(rem.len);
4336 csubstr to_comment = rem.first(pos);
4337 csubstr trimmed = to_comment.trimr(
" \t");
4338 m_evt_handler->add_directive(trimmed);
4339 _line_progressed(pos);
4347 template<
class EventHandler>
4348 void ParseEngine<EventHandler>::_handle_seq_json()
4351 _c4dbgpf(
"handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4353 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4354 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ));
4355 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
FLOW));
4356 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT));
4357 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT));
4359 _handle_flow_skip_whitespace();
4360 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4366 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4367 const char first = rem.str[0];
4368 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
4373 _c4dbgp(
"seqjson[RVAL]: scanning double-quoted scalar");
4374 ScannedScalar sc = _scan_scalar_dquot();
4375 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4376 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4382 _c4dbgp(
"seqjson[RVAL]: start child seqjson");
4384 m_evt_handler->begin_seq_val_flow();
4386 _line_progressed(1);
4391 _c4dbgp(
"seqjson[RVAL]: start child mapjson");
4393 m_evt_handler->begin_map_val_flow();
4395 _line_progressed(1);
4396 goto seqjson_finish;
4400 _c4dbgp(
"seqjson[RVAL]: end!");
4402 m_evt_handler->end_seq();
4403 _line_progressed(1);
4405 goto seqjson_finish;
4411 if(_scan_scalar_seq_json(&sc))
4413 _c4dbgp(
"seqjson[RVAL]: it's a plain scalar.");
4414 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4415 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4420 _c4err(
"parse error");
4427 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
4428 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4429 const char first = rem.str[0];
4430 _c4dbgpf(
"mapjson[RNXT]: '{}'", first);
4435 _c4dbgp(
"seqjson[RNXT]: expect next val");
4437 m_evt_handler->add_sibling();
4438 _line_progressed(1);
4443 _c4dbgp(
"seqjson[RNXT]: end!");
4444 m_evt_handler->end_seq();
4445 _line_progressed(1);
4446 goto seqjson_finish;
4449 _c4err(
"parse error");
4454 _c4dbgt(
"seqjson: go again", 0);
4455 if(_finished_line())
4457 if(C4_LIKELY(!_finished_file()))
4465 _c4err(
"missing terminating ]");
4471 _c4dbgp(
"seqjson: finish");
4477 template<
class EventHandler>
4478 void ParseEngine<EventHandler>::_handle_map_json()
4481 _c4dbgpf(
"handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4483 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
4484 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
FLOW));
4485 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4486 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT));
4487 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT)));
4489 _handle_flow_skip_whitespace();
4490 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4496 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4497 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4498 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4499 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4500 const char first = rem.str[0];
4501 _c4dbgpf(
"mapjson[RKEY]: '{}'", first);
4506 _c4dbgp(
"mapjson[RKEY]: scanning double-quoted scalar");
4507 ScannedScalar sc = _scan_scalar_dquot();
4508 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4509 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4515 _c4dbgp(
"mapjson[RKEY]: end!");
4516 m_evt_handler->end_map();
4517 _line_progressed(1);
4518 goto mapjson_finish;
4521 _c4err(
"parse error");
4524 else if(has_any(
RVAL))
4526 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4527 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4528 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4529 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4530 const char first = rem.str[0];
4531 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
4536 _c4dbgp(
"mapjson[RVAL]: scanning double-quoted scalar");
4537 ScannedScalar sc = _scan_scalar_dquot();
4538 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4539 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4545 _c4dbgp(
"mapjson[RVAL]: start val seqjson");
4547 m_evt_handler->begin_seq_val_flow();
4548 _set_indentation(m_evt_handler->m_parent->indref);
4550 _line_progressed(1);
4551 goto mapjson_finish;
4555 _c4dbgp(
"mapjson[RVAL]: start val mapjson");
4557 m_evt_handler->begin_map_val_flow();
4558 _set_indentation(m_evt_handler->m_parent->indref);
4560 _line_progressed(1);
4567 if(_scan_scalar_map_json(&sc))
4569 _c4dbgp(
"mapjson[RVAL]: plain scalar.");
4570 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4571 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4576 _c4err(
"parse error");
4582 else if(has_any(
RKCL))
4584 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4585 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4586 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4587 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4588 const char first = rem.str[0];
4589 _c4dbgpf(
"mapjson[RKCL]: '{}'", first);
4592 _c4dbgp(
"mapjson[RKCL]: found the colon");
4594 _line_progressed(1);
4598 _c4err(
"parse error");
4601 else if(has_any(
RNXT))
4603 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4604 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4605 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4606 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4607 _c4dbgpf(
"mapjson[RNXT]: '{}'", rem.str[0]);
4608 if(rem.begins_with(
','))
4610 _c4dbgp(
"mapjson[RNXT]: expect next keyval");
4611 m_evt_handler->add_sibling();
4613 _line_progressed(1);
4615 else if(rem.begins_with(
'}'))
4617 _c4dbgp(
"mapjson[RNXT]: end!");
4618 m_evt_handler->end_map();
4619 _line_progressed(1);
4620 goto mapjson_finish;
4624 _c4err(
"parse error");
4629 _c4dbgt(
"mapjson: go again", 0);
4630 if(_finished_line())
4632 if(C4_LIKELY(!_finished_file()))
4640 _c4err(
"missing terminating }");
4646 _c4dbgp(
"mapjson: finish");
4652 template<
class EventHandler>
4653 void ParseEngine<EventHandler>::_handle_seq_imap()
4656 _c4dbgpf(
"handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4658 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQIMAP));
4659 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4660 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT|
QMRK|
RKCL));
4661 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == has_all(
RVAL) + has_all(
RNXT) + has_all(
QMRK) + has_all(
RKCL));
4662 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3);
4664 _handle_flow_skip_whitespace();
4665 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4671 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL));
4672 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4673 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4674 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4675 const char first = rem.str[0];
4676 _c4dbgpf(
"seqimap[RVAL]: '{}'", _c4prc(first));
4680 _c4dbgp(
"seqimap[RVAL]: scanning single-quoted scalar");
4681 sc = _scan_scalar_squot();
4682 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
4683 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
4684 m_evt_handler->end_map();
4685 goto seqimap_finish;
4687 else if(first ==
'"')
4689 _c4dbgp(
"seqimap[RVAL]: scanning double-quoted scalar");
4690 sc = _scan_scalar_dquot();
4691 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4692 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4693 m_evt_handler->end_map();
4694 goto seqimap_finish;
4697 else if(_scan_scalar_plain_map_flow(&sc))
4699 _c4dbgp(
"seqimap[RVAL]: it's a scalar.");
4700 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4701 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4702 m_evt_handler->end_map();
4703 goto seqimap_finish;
4705 else if(first ==
'[')
4707 _c4dbgp(
"seqimap[RVAL]: start child seqflow");
4709 m_evt_handler->begin_seq_val_flow();
4711 _set_indentation(m_evt_handler->m_parent->indref);
4712 _line_progressed(1);
4713 goto seqimap_finish;
4715 else if(first ==
'{')
4717 _c4dbgp(
"seqimap[RVAL]: start child mapflow");
4719 m_evt_handler->begin_map_val_flow();
4721 _set_indentation(m_evt_handler->m_parent->indref);
4722 _line_progressed(1);
4723 goto seqimap_finish;
4725 else if(first ==
',' || first ==
']')
4727 _c4dbgp(
"seqimap[RVAL]: finish without val.");
4728 m_evt_handler->set_val_scalar_plain({});
4729 m_evt_handler->end_map();
4730 goto seqimap_finish;
4732 else if(first ==
'&')
4734 csubstr anchor = _scan_anchor();
4735 _c4dbgp(
"seqimap[RVAL]: anchor!");
4736 m_evt_handler->set_val_anchor(anchor);
4738 else if(first ==
'*')
4740 csubstr ref = _scan_ref_seq();
4741 _c4dbgp(
"seqimap[RVAL]: ref!");
4742 m_evt_handler->set_val_ref(ref);
4747 _c4err(
"parse error");
4750 else if(has_any(
RNXT))
4752 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
4753 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4754 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4755 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4756 const char first = rem.str[0];
4757 _c4dbgpf(
"seqimap[RNXT]: '{}'", _c4prc(first));
4758 if(first ==
',' || first ==
']')
4762 _c4dbgp(
"seqimap: done");
4763 m_evt_handler->end_map();
4764 goto seqimap_finish;
4768 _c4err(
"parse error");
4771 else if(has_any(
QMRK))
4773 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
QMRK));
4774 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4775 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4776 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4777 const char first = rem.str[0];
4778 _c4dbgpf(
"seqimap[QMRK]: '{}'", _c4prc(first));
4782 _c4dbgp(
"seqimap[QMRK]: scanning single-quoted scalar");
4783 sc = _scan_scalar_squot();
4784 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
4785 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
4789 else if(first ==
'"')
4791 _c4dbgp(
"seqimap[QMRK]: scanning double-quoted scalar");
4792 sc = _scan_scalar_dquot();
4793 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4794 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4799 else if(_scan_scalar_plain_map_flow(&sc))
4801 _c4dbgp(
"seqimap[QMRK]: it's a scalar.");
4802 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
4803 m_evt_handler->set_key_scalar_plain(maybe_filtered);
4807 else if(first ==
'[')
4809 _c4dbgp(
"seqimap[QMRK]: start child seqflow");
4811 m_evt_handler->begin_seq_key_flow();
4813 _set_indentation(m_evt_handler->m_parent->indref);
4814 _line_progressed(1);
4815 goto seqimap_finish;
4817 else if(first ==
'{')
4819 _c4dbgp(
"seqimap[QMRK]: start child mapflow");
4821 m_evt_handler->begin_map_key_flow();
4823 _set_indentation(m_evt_handler->m_parent->indref);
4824 _line_progressed(1);
4825 goto seqimap_finish;
4827 else if(first ==
',' || first ==
']')
4829 _c4dbgp(
"seqimap[QMRK]: finish without key.");
4830 m_evt_handler->set_key_scalar_plain({});
4831 m_evt_handler->set_val_scalar_plain({});
4832 m_evt_handler->end_map();
4833 goto seqimap_finish;
4835 else if(first ==
'&')
4837 csubstr anchor = _scan_anchor();
4838 _c4dbgp(
"seqimap[QMRK]: anchor!");
4839 m_evt_handler->set_key_anchor(anchor);
4841 else if(first ==
'*')
4843 csubstr ref = _scan_ref_seq();
4844 _c4dbgp(
"seqimap[QMRK]: ref!");
4845 m_evt_handler->set_key_ref(ref);
4850 _c4err(
"parse error");
4853 else if(has_any(
RKCL))
4855 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4856 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4857 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4858 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKCL));
4859 const char first = rem.str[0];
4860 _c4dbgpf(
"seqimap[RKCL]: '{}'", _c4prc(first));
4863 _c4dbgp(
"seqimap[RKCL]: found ':'");
4865 _line_progressed(1);
4868 else if(first ==
',' || first ==
']')
4870 _c4dbgp(
"seqimap[RKCL]: found ','. finish without val");
4871 m_evt_handler->set_val_scalar_plain({});
4872 m_evt_handler->end_map();
4873 goto seqimap_finish;
4877 _c4err(
"parse error");
4882 _c4dbgt(
"seqimap: go again", 0);
4883 if(_finished_line())
4885 if(C4_LIKELY(!_finished_file()))
4893 _c4err(
"parse error");
4899 _c4dbgp(
"seqimap: finish");
4905 template<
class EventHandler>
4906 void ParseEngine<EventHandler>::_handle_seq_flow()
4909 _c4dbgpf(
"handle2_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4911 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4912 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ));
4913 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
FLOW));
4914 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT));
4915 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT));
4916 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos);
4918 _handle_flow_skip_whitespace();
4919 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4925 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4926 const char first = rem.str[0];
4930 _c4dbgp(
"seqflow[RVAL]: scanning single-quoted scalar");
4931 sc = _scan_scalar_squot();
4932 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
4933 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
4936 else if(first ==
'"')
4938 _c4dbgp(
"seqflow[RVAL]: scanning double-quoted scalar");
4939 sc = _scan_scalar_dquot();
4940 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4941 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4945 else if(_scan_scalar_plain_seq_flow(&sc))
4947 _c4dbgp(
"seqflow[RVAL]: it's a scalar.");
4948 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4949 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4952 else if(first ==
'[')
4954 _c4dbgp(
"seqflow[RVAL]: start child seqflow");
4956 m_evt_handler->begin_seq_val_flow();
4957 _set_indentation(m_evt_handler->m_parent->indref);
4959 _line_progressed(1);
4961 else if(first ==
'{')
4963 _c4dbgp(
"seqflow[RVAL]: start child mapflow");
4965 m_evt_handler->begin_map_val_flow();
4966 _set_indentation(m_evt_handler->m_parent->indref);
4968 _line_progressed(1);
4969 goto seqflow_finish;
4971 else if(first ==
']')
4973 _c4dbgp(
"seqflow[RVAL]: end!");
4974 _line_progressed(1);
4975 m_evt_handler->end_seq();
4976 goto seqflow_finish;
4978 else if(first ==
'*')
4980 csubstr ref = _scan_ref_seq();
4981 _c4dbgpf(
"seqflow[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
4982 m_evt_handler->set_val_ref(ref);
4985 else if(first ==
'&')
4987 csubstr anchor = _scan_anchor();
4988 _c4dbgpf(
"seqflow[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
4989 m_evt_handler->set_val_anchor(anchor);
4990 if(_maybe_scan_following_comma())
4992 _c4dbgp(
"seqflow[RVAL]: empty scalar!");
4993 m_evt_handler->set_val_scalar_plain({});
4994 m_evt_handler->add_sibling();
4997 else if(first ==
'!')
4999 csubstr tag = _scan_tag();
5000 _c4dbgpf(
"seqflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5002 m_evt_handler->set_val_tag(tag);
5003 if(_maybe_scan_following_comma())
5005 _c4dbgp(
"seqflow[RVAL]: empty scalar!");
5006 m_evt_handler->set_val_scalar_plain({});
5007 m_evt_handler->add_sibling();
5010 else if(first ==
':')
5012 _c4dbgpf(
"seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5014 m_evt_handler->begin_map_val_flow();
5015 _set_indentation(m_evt_handler->m_parent->indref);
5016 m_evt_handler->set_key_scalar_plain({});
5018 _line_progressed(1);
5019 goto seqflow_finish;
5021 else if(first ==
'?')
5023 _c4dbgp(
"seqflow[RVAL]: start child mapflow, explicit key");
5025 m_was_inside_qmrk =
true;
5026 m_evt_handler->begin_map_val_flow();
5027 _set_indentation(m_evt_handler->m_parent->indref);
5029 _line_progressed(1);
5030 _maybe_skip_whitespace_tokens();
5031 goto seqflow_finish;
5035 _c4err(
"parse error");
5040 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
5041 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5042 const char first = rem.str[0];
5045 _c4dbgp(
"seqflow[RNXT]: expect next val");
5047 m_evt_handler->add_sibling();
5048 _line_progressed(1);
5050 else if(first ==
']')
5052 _c4dbgp(
"seqflow[RNXT]: end!");
5053 m_evt_handler->end_seq();
5054 _line_progressed(1);
5055 goto seqflow_finish;
5057 else if(first ==
':')
5059 _c4dbgpf(
"seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5060 m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5061 _set_indentation(m_evt_handler->m_parent->indref);
5062 _line_progressed(1);
5064 goto seqflow_finish;
5068 _c4err(
"parse error");
5073 _c4dbgt(
"seqflow: go again", 0);
5074 if(_finished_line())
5076 if(C4_LIKELY(!_finished_file()))
5084 _c4err(
"missing terminating ]");
5090 _c4dbgp(
"seqflow: finish");
5096 template<
class EventHandler>
5097 void ParseEngine<EventHandler>::_handle_map_flow()
5100 _c4dbgpf(
"handle2_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5102 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
5103 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
FLOW));
5105 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)));
5107 _handle_flow_skip_whitespace();
5108 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5114 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5115 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5116 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5117 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5118 const char first = rem.str[0];
5119 _c4dbgpf(
"mapflow[RKEY]: '{}'", first);
5123 _c4dbgp(
"mapflow[RKEY]: scanning single-quoted scalar");
5124 sc = _scan_scalar_squot();
5125 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5126 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5129 else if(first ==
'"')
5131 _c4dbgp(
"mapflow[RKEY]: scanning double-quoted scalar");
5132 sc = _scan_scalar_dquot();
5133 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5134 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5138 else if(_scan_scalar_plain_map_flow(&sc))
5140 _c4dbgp(
"mapflow[RKEY]: plain scalar");
5141 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5142 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5145 else if(first ==
'?')
5147 _c4dbgp(
"mapflow[RKEY]: explicit key");
5148 _line_progressed(1);
5150 _maybe_skip_whitespace_tokens();
5152 else if(first ==
':')
5154 _c4dbgp(
"mapflow[RKEY]: setting empty key");
5155 m_evt_handler->set_key_scalar_plain({});
5157 _line_progressed(1);
5158 _maybe_skip_whitespace_tokens();
5160 else if(first ==
'}')
5162 _c4dbgp(
"mapflow[RKEY]: end!");
5163 m_evt_handler->end_map();
5164 _line_progressed(1);
5165 goto mapflow_finish;
5167 else if(first ==
'&')
5169 csubstr anchor = _scan_anchor();
5170 _c4dbgpf(
"mapflow[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5171 m_evt_handler->set_key_anchor(anchor);
5173 else if(first ==
'*')
5175 csubstr ref = _scan_ref_map();
5176 _c4dbgpf(
"mapflow[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
5177 m_evt_handler->set_key_ref(ref);
5180 else if(first ==
'[')
5185 _c4dbgp(
"mapflow[RKEY]: start child seqflow (!)");
5187 m_evt_handler->begin_seq_key_flow();
5189 _set_indentation(m_evt_handler->m_parent->indref);
5190 _line_progressed(1);
5191 goto mapflow_finish;
5193 else if(first ==
'{')
5198 _c4dbgp(
"mapflow[RKEY]: start child mapflow (!)");
5200 m_evt_handler->begin_map_key_flow();
5202 _set_indentation(m_evt_handler->m_parent->indref);
5203 _line_progressed(1);
5206 else if(first ==
'!')
5208 csubstr tag = _scan_tag();
5209 _c4dbgpf(
"mapflow[RKEY]: tag! [{}]~~~{}~~~", tag.len, tag);
5211 m_evt_handler->set_key_tag(tag);
5215 _c4err(
"parse error");
5218 else if(has_any(
RKCL))
5220 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5221 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5222 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5223 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5224 const char first = rem.str[0];
5225 _c4dbgpf(
"mapflow[RKCL]: '{}'", first);
5228 _c4dbgp(
"mapflow[RKCL]: found the colon");
5230 _line_progressed(1);
5232 else if(first ==
'}')
5234 _c4dbgp(
"mapflow[RKCL]: end with missing val!");
5236 m_evt_handler->set_val_scalar_plain({});
5237 m_evt_handler->end_map();
5238 _line_progressed(1);
5239 goto mapflow_finish;
5241 else if(first ==
',')
5243 _c4dbgp(
"mapflow[RKCL]: got comma. val is missing");
5244 m_evt_handler->set_val_scalar_plain({});
5245 m_evt_handler->add_sibling();
5247 _line_progressed(1);
5251 _c4err(
"parse error");
5254 else if(has_any(
RVAL))
5256 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5257 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5258 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5259 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5260 const char first = rem.str[0];
5261 _c4dbgpf(
"mapflow[RVAL]: '{}'", first);
5265 _c4dbgp(
"mapflow[RVAL]: scanning single-quoted scalar");
5266 sc = _scan_scalar_squot();
5267 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5268 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5271 else if(first ==
'"')
5273 _c4dbgp(
"mapflow[RVAL]: scanning double-quoted scalar");
5274 sc = _scan_scalar_dquot();
5275 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5276 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5280 else if(_scan_scalar_plain_map_flow(&sc))
5282 _c4dbgp(
"mapflow[RVAL]: plain scalar.");
5283 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5284 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5287 else if(first ==
'[')
5289 _c4dbgp(
"mapflow[RVAL]: start val seqflow");
5291 m_evt_handler->begin_seq_val_flow();
5292 _set_indentation(m_evt_handler->m_parent->indref);
5294 _line_progressed(1);
5295 goto mapflow_finish;
5297 else if(first ==
'{')
5299 _c4dbgp(
"mapflow[RVAL]: start val mapflow");
5301 m_evt_handler->begin_map_val_flow();
5302 _set_indentation(m_evt_handler->m_parent->indref);
5304 _line_progressed(1);
5307 else if(first ==
'}')
5309 _c4dbgp(
"mapflow[RVAL]: end!");
5310 m_evt_handler->set_val_scalar_plain({});
5311 m_evt_handler->end_map();
5312 _line_progressed(1);
5313 goto mapflow_finish;
5315 else if(first ==
'*')
5317 csubstr ref = _scan_ref_map();
5318 _c4dbgpf(
"mapflow[RVAL]: key ref! [{}]~~~{}~~~", ref.len, ref);
5319 m_evt_handler->set_val_ref(ref);
5322 else if(first ==
'&')
5324 csubstr anchor = _scan_anchor();
5325 _c4dbgpf(
"mapflow[RVAL]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5326 m_evt_handler->set_val_anchor(anchor);
5328 else if(first ==
'!')
5330 csubstr tag = _scan_tag();
5331 _c4dbgpf(
"mapflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5333 m_evt_handler->set_val_tag(tag);
5337 _c4err(
"parse error");
5340 else if(has_any(
RNXT))
5342 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5343 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5344 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5345 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5346 _c4dbgpf(
"mapflow[RNXT]: '{}'", rem.str[0]);
5347 if(rem.begins_with(
','))
5349 _c4dbgp(
"mapflow[RNXT]: expect next keyval");
5350 m_evt_handler->add_sibling();
5352 _line_progressed(1);
5354 else if(rem.begins_with(
'}'))
5356 _c4dbgp(
"mapflow[RNXT]: end!");
5357 m_evt_handler->end_map();
5358 _line_progressed(1);
5359 goto mapflow_finish;
5363 _c4err(
"parse error");
5366 else if(has_any(
QMRK))
5368 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5369 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5370 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5371 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5372 const char first = rem.str[0];
5373 _c4dbgpf(
"mapflow[QMRK]: '{}'", first);
5377 _c4dbgp(
"mapflow[QMRK]: scanning single-quoted scalar");
5378 sc = _scan_scalar_squot();
5379 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5380 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5383 else if(first ==
'"')
5385 _c4dbgp(
"mapflow[QMRK]: scanning double-quoted scalar");
5386 sc = _scan_scalar_dquot();
5387 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5388 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5392 else if(_scan_scalar_plain_map_flow(&sc))
5394 _c4dbgp(
"mapflow[QMRK]: plain scalar");
5395 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5396 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5399 else if(first ==
':')
5401 _c4dbgp(
"mapflow[QMRK]: setting empty key");
5402 m_evt_handler->set_key_scalar_plain({});
5404 _line_progressed(1);
5405 _maybe_skip_whitespace_tokens();
5407 else if(first ==
'}')
5409 _c4dbgp(
"mapflow[QMRK]: end!");
5410 m_evt_handler->set_key_scalar_plain({});
5411 m_evt_handler->set_val_scalar_plain({});
5412 m_evt_handler->end_map();
5413 _line_progressed(1);
5414 goto mapflow_finish;
5416 else if(first ==
'&')
5418 csubstr anchor = _scan_anchor();
5419 _c4dbgpf(
"mapflow[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5420 m_evt_handler->set_key_anchor(anchor);
5422 else if(first ==
'*')
5424 csubstr ref = _scan_ref_map();
5425 _c4dbgpf(
"mapflow[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
5426 m_evt_handler->set_key_ref(ref);
5429 else if(first ==
'[')
5434 _c4dbgp(
"mapflow[QMRK]: start child seqflow (!)");
5436 m_evt_handler->begin_seq_key_flow();
5438 _set_indentation(m_evt_handler->m_parent->indref);
5439 _line_progressed(1);
5440 goto mapflow_finish;
5442 else if(first ==
'{')
5447 _c4dbgp(
"mapflow[QMRK]: start child mapflow (!)");
5449 m_evt_handler->begin_map_key_flow();
5450 _set_indentation(m_evt_handler->m_parent->indref);
5452 _line_progressed(1);
5455 else if(first ==
'!')
5457 csubstr tag = _scan_tag();
5458 _c4dbgpf(
"mapflow[QMRK]: tag! [{}]~~~{}~~~", tag.len, tag);
5460 m_evt_handler->set_key_tag(tag);
5464 _c4err(
"parse error");
5469 _c4dbgt(
"mapflow: go again", 0);
5470 if(_finished_line())
5472 if(C4_LIKELY(!_finished_file()))
5480 _c4err(
"missing terminating }");
5486 _c4dbgp(
"mapflow: finish");
5492 template<
class EventHandler>
5493 void ParseEngine<EventHandler>::_handle_seq_block()
5496 _c4dbgpf(
"handle2_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5498 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ));
5499 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
BLCK));
5500 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT));
5501 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RVAL) + has_any(
RNXT)));
5503 _maybe_skip_comment();
5504 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5510 _c4dbgpf(
"seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
5511 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5512 if(m_evt_handler->m_curr->at_line_beginning())
5514 _c4dbgpf(
"seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5515 if(m_evt_handler->m_curr->indentation_ge())
5517 _c4dbgpf(
"seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
5518 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5519 rem = m_evt_handler->m_curr->line_contents.rem;
5523 else if(m_evt_handler->m_curr->indentation_lt())
5525 _c4dbgp(
"seqblck[RVAL]: smaller indentation!");
5526 _handle_indentation_pop_from_block_seq();
5527 goto seqblck_finish;
5529 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
5531 _c4dbgp(
"seqblck[RVAL]: empty line!");
5532 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5536 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
5544 if(rem.str[0] ==
' ')
5546 if(_handle_indentation_from_annotations())
5548 _c4dbgp(
"seqblck[RVAL]: annotations!");
5549 rem = m_evt_handler->m_curr->line_contents.rem;
5556 _RYML_CB_ASSERT(callbacks(), rem.len);
5557 _c4dbgpf(
"seqblck[RVAL]: '{}' node_id={}", rem.str[0], m_evt_handler->m_curr->node_id);
5558 const char first = rem.str[0];
5559 const size_t startline = m_evt_handler->m_curr->pos.line;
5562 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
5566 _c4dbgp(
"seqblck[RVAL]: single-quoted scalar");
5567 sc = _scan_scalar_squot();
5568 if(!_maybe_scan_following_colon())
5570 _c4dbgp(
"seqblck[RVAL]: set as val");
5571 _handle_annotations_before_blck_val_scalar();
5572 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5573 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5578 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
5580 _handle_annotations_before_start_mapblck(startline);
5581 m_evt_handler->begin_map_val_block();
5582 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5583 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5584 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5586 _maybe_skip_whitespace_tokens();
5587 goto seqblck_finish;
5590 else if(first ==
'"')
5592 _c4dbgp(
"seqblck[RVAL]: double-quoted scalar");
5593 sc = _scan_scalar_dquot();
5594 if(!_maybe_scan_following_colon())
5596 _c4dbgp(
"seqblck[RVAL]: set as val");
5597 _handle_annotations_before_blck_val_scalar();
5598 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5599 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5604 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
5606 _handle_annotations_before_start_mapblck(startline);
5607 m_evt_handler->begin_map_val_block();
5608 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5609 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5610 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5612 _maybe_skip_whitespace_tokens();
5613 goto seqblck_finish;
5619 else if(first ==
'|')
5621 _c4dbgp(
"seqblck[RVAL]: block-literal scalar");
5623 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5624 _handle_annotations_before_blck_val_scalar();
5625 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
5626 m_evt_handler->set_val_scalar_literal(maybe_filtered);
5629 else if(first ==
'>')
5631 _c4dbgp(
"seqblck[RVAL]: block-folded scalar");
5633 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5634 _handle_annotations_before_blck_val_scalar();
5635 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
5636 m_evt_handler->set_val_scalar_folded(maybe_filtered);
5639 else if(_scan_scalar_plain_seq_blck(&sc))
5641 _c4dbgp(
"seqblck[RVAL]: plain scalar.");
5642 if(!_maybe_scan_following_colon())
5644 _c4dbgp(
"seqblck[RVAL]: set as val");
5645 _handle_annotations_before_blck_val_scalar();
5646 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5647 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5652 if(startindent > m_evt_handler->m_curr->indref)
5654 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
5656 _handle_annotations_before_start_mapblck(startline);
5657 m_evt_handler->begin_map_val_block();
5658 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5659 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5660 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5662 _maybe_skip_whitespace_tokens();
5663 goto seqblck_finish;
5665 else if(m_evt_handler->m_parent && m_evt_handler->m_parent->indref == startindent && has_any(
RMAP|
BLCK, m_evt_handler->m_parent))
5667 _c4dbgp(
"seqblck[RVAL]: empty val + end indentless seq + set key");
5668 m_evt_handler->set_val_scalar_plain({});
5669 m_evt_handler->end_seq();
5670 m_evt_handler->add_sibling();
5671 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5672 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5674 _maybe_skip_whitespace_tokens();
5675 goto seqblck_finish;
5679 _c4err(
"parse error");
5683 else if(first ==
'[')
5685 _c4dbgp(
"seqblck[RVAL]: start child seqflow");
5687 m_evt_handler->begin_seq_val_flow();
5689 _line_progressed(1);
5690 _set_indentation(m_evt_handler->m_parent->indref + 1u);
5691 goto seqblck_finish;
5693 else if(first ==
'{')
5695 _c4dbgp(
"seqblck[RVAL]: start child mapflow");
5697 _handle_annotations_before_blck_val_scalar();
5698 m_evt_handler->begin_map_val_flow();
5700 _line_progressed(1);
5701 _set_indentation(m_evt_handler->m_parent->indref + 1u);
5702 goto seqblck_finish;
5704 else if(first ==
'-')
5706 if(startindent == m_evt_handler->m_curr->indref)
5708 _c4dbgp(
"seqblck[RVAL]: prev val was empty");
5709 _handle_annotations_before_blck_val_scalar();
5710 m_evt_handler->set_val_scalar_plain({});
5712 m_evt_handler->add_sibling();
5716 _c4dbgp(
"seqblck[RVAL]: start child seqblck");
5717 _RYML_CB_ASSERT(this->callbacks(), startindent > m_evt_handler->m_curr->indref);
5719 _handle_annotations_before_blck_val_scalar();
5720 m_evt_handler->begin_seq_val_block();
5722 _save_indentation();
5725 _line_progressed(1);
5726 _maybe_skip_whitespace_tokens();
5728 else if(first ==
':')
5730 _c4dbgp(
"seqblck[RVAL]: start child mapblck with empty key");
5732 _handle_annotations_before_start_mapblck(startline);
5733 m_evt_handler->begin_map_val_block();
5734 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5735 m_evt_handler->set_key_scalar_plain({});
5737 _line_progressed(1);
5738 _maybe_skip_whitespace_tokens();
5739 goto seqblck_finish;
5741 else if(first ==
'&')
5743 const csubstr anchor = _scan_anchor();
5744 _c4dbgpf(
"seqblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5747 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
5749 else if(first ==
'*')
5751 csubstr ref = _scan_ref_seq();
5752 _c4dbgpf(
"seqblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5753 if(!_maybe_scan_following_colon())
5755 _c4dbgp(
"seqblck[RVAL]: set ref as val!");
5756 _handle_annotations_before_blck_val_scalar();
5757 m_evt_handler->set_val_ref(ref);
5762 _c4dbgp(
"seqblck[RVAL]: ref is key of map");
5764 _handle_annotations_before_start_mapblck(startline);
5765 m_evt_handler->begin_map_val_block();
5766 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5767 m_evt_handler->set_key_ref(ref);
5769 _set_indentation(startindent);
5770 _maybe_skip_whitespace_tokens();
5771 goto seqblck_finish;
5774 else if(first ==
'!')
5776 csubstr tag = _scan_tag();
5777 _c4dbgpf(
"seqblck[RVAL]: val tag! [{}]~~~{}~~~", tag.len, tag);
5780 _add_annotation(&m_pending_tags, tag, startindent, startline);
5782 else if(first ==
'?')
5784 _c4dbgp(
"seqblck[RVAL]: start child mapblck, explicit key");
5786 m_was_inside_qmrk =
true;
5787 m_evt_handler->begin_map_val_block();
5789 _save_indentation();
5790 _line_progressed(1);
5791 _maybe_skip_whitespace_tokens();
5792 goto seqblck_finish;
5796 _c4err(
"parse error");
5801 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
5802 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5806 _c4dbgpf(
"seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5807 if(C4_UNLIKELY(!_at_line_begin()))
5808 _c4err(
"parse error");
5809 if(m_evt_handler->m_curr->indentation_ge())
5811 _c4dbgpf(
"seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
5812 _line_progressed(m_evt_handler->m_curr->indref);
5813 _maybe_skip_whitespace_tokens();
5814 rem = m_evt_handler->m_curr->line_contents.rem;
5818 else if(m_evt_handler->m_curr->indentation_lt())
5820 _c4dbgp(
"seqblck[RNXT]: smaller indentation!");
5821 _handle_indentation_pop_from_block_seq();
5824 _c4dbgp(
"seqblck[RNXT]: still seqblck!");
5825 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
5826 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5827 rem = m_evt_handler->m_curr->line_contents.rem;
5833 _c4dbgp(
"seqblck[RNXT]: no longer seqblck!");
5834 goto seqblck_finish;
5837 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
5839 _c4dbgpf(
"seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
5840 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5841 rem = m_evt_handler->m_curr->line_contents.rem;
5848 const char first = rem.str[0];
5849 _c4dbgpf(
"seqblck[RNXT]: '{}' node_id={}", first, m_evt_handler->m_curr->node_id);
5852 if(m_evt_handler->m_curr->indref > 0 || m_evt_handler->m_curr->line_contents.indentation > 0 || !_is_doc_begin_token(rem))
5854 _c4dbgp(
"seqblck[RNXT]: expect next val");
5856 m_evt_handler->add_sibling();
5857 _line_progressed(1);
5858 _maybe_skip_whitespace_tokens();
5862 _c4dbgp(
"seqblck[RNXT]: start doc");
5863 _start_doc_suddenly();
5864 _line_progressed(3);
5865 _maybe_skip_whitespace_tokens();
5866 goto seqblck_finish;
5869 else if(first ==
':')
5875 auto const *C4_RESTRICT prev_state = m_evt_handler->m_parent;
5876 if(C4_LIKELY(prev_state && (prev_state->flags &
RMAP)))
5878 _c4dbgp(
"seqblck[RNXT]: actually this seq was '?' key of parent map");
5879 m_evt_handler->end_seq();
5880 goto seqblck_finish;
5884 _c4err(
"parse error");
5887 else if(first ==
'.')
5889 _c4dbgp(
"seqblck[RNXT]: maybe doc?");
5890 csubstr rs = rem.sub(1);
5891 if(rs ==
".." || rs.begins_with(
".. "))
5893 _c4dbgp(
"seqblck[RNXT]: end+start doc");
5894 _end_doc_suddenly();
5895 _line_progressed(3);
5896 _maybe_skip_whitespace_tokens();
5897 goto seqblck_finish;
5901 _c4err(
"parse error");
5910 for(
auto const& s : m_evt_handler->m_stack)
5912 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
5915 if(m_evt_handler->m_parent && has_all(
RMAP|
BLCK, m_evt_handler->m_parent) && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
5917 _c4dbgpf(
"seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
5918 _RYML_CB_ASSERT(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent);
5919 _handle_indentation_pop(m_evt_handler->m_parent);
5920 _RYML_CB_ASSERT(this->callbacks(), has_all(
RMAP|
BLCK));
5921 m_evt_handler->add_sibling();
5923 goto seqblck_finish;
5927 _c4err(
"parse error");
5933 _c4dbgt(
"seqblck: go again", 0);
5934 if(_finished_line())
5938 if(_finished_file())
5940 _c4dbgp(
"seqblck: finish!");
5942 goto seqblck_finish;
5949 _c4dbgp(
"seqblck: finish");
5955 template<
class EventHandler>
5956 void ParseEngine<EventHandler>::_handle_map_block()
5959 _c4dbgpf(
"handle2_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5962 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
5963 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
BLCK));
5965 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)));
5967 _maybe_skip_comment();
5968 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5974 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5975 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5976 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5977 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5981 if(m_evt_handler->m_curr->at_line_beginning())
5983 if(m_evt_handler->m_curr->indentation_eq())
5985 _c4dbgpf(
"mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
5986 _line_progressed(m_evt_handler->m_curr->indref);
5987 rem = m_evt_handler->m_curr->line_contents.rem;
5991 else if(m_evt_handler->m_curr->indentation_lt())
5993 _c4dbgp(
"mapblck[RKEY]: smaller indentation!");
5994 _handle_indentation_pop_from_block_map();
5995 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5998 _c4dbgp(
"mapblck[RKEY]: still mapblck!");
5999 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY));
6000 rem = m_evt_handler->m_curr->line_contents.rem;
6006 _c4dbgp(
"mapblck[RKEY]: no longer mapblck!");
6007 goto mapblck_finish;
6012 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt());
6013 _c4err(
"invalid indentation");
6019 const char first = rem.str[0];
6020 const size_t startline = m_evt_handler->m_curr->pos.line;
6021 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6022 _c4dbgpf(
"mapblck[RKEY]: '{}'", first);
6026 _c4dbgp(
"mapblck[RKEY]: scanning single-quoted scalar");
6027 sc = _scan_scalar_squot();
6028 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6029 _handle_annotations_before_blck_key_scalar();
6030 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6032 if(!_maybe_scan_following_colon())
6033 _c4err(
"could not find ':' colon after key");
6034 _maybe_skip_whitespace_tokens();
6036 else if(first ==
'"')
6038 _c4dbgp(
"mapblck[RKEY]: scanning double-quoted scalar");
6039 sc = _scan_scalar_dquot();
6040 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6041 _handle_annotations_before_blck_key_scalar();
6042 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6044 if(!_maybe_scan_following_colon())
6045 _c4err(
"could not find ':' colon after key");
6046 _maybe_skip_whitespace_tokens();
6050 else if(C4_UNLIKELY(first ==
'|'))
6052 _c4err(
"block literal keys must be enclosed in '?'");
6054 else if(C4_UNLIKELY(first ==
'>'))
6056 _c4err(
"block literal keys must be enclosed in '?'");
6058 else if(_scan_scalar_plain_map_blck(&sc))
6060 _c4dbgp(
"mapblck[RKEY]: plain scalar");
6061 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6062 _handle_annotations_before_blck_key_scalar();
6063 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6065 if(!_maybe_scan_following_colon())
6066 _c4err(
"could not find ':' colon after key");
6067 _maybe_skip_whitespace_tokens();
6069 else if(first ==
'?')
6071 _c4dbgp(
"mapblck[RKEY]: key token!");
6073 _line_progressed(1);
6074 _maybe_skip_whitespace_tokens();
6075 m_was_inside_qmrk =
true;
6078 else if(first ==
':')
6080 _c4dbgp(
"mapblck[RKEY]: setting empty key");
6081 _handle_annotations_before_blck_key_scalar();
6082 m_evt_handler->set_key_scalar_plain({});
6084 _line_progressed(1);
6085 _maybe_skip_whitespace_tokens();
6087 else if(first ==
'*')
6089 csubstr ref = _scan_ref_map();
6090 _c4dbgpf(
"mapblck[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
6091 _handle_annotations_before_blck_key_scalar();
6092 m_evt_handler->set_key_ref(ref);
6094 if(!_maybe_scan_following_colon())
6095 _c4err(
"could not find ':' colon after key");
6096 _maybe_skip_whitespace_tokens();
6098 else if(first ==
'&')
6100 csubstr anchor = _scan_anchor();
6101 _c4dbgpf(
"mapblck[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
6102 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6104 else if(first ==
'!')
6106 csubstr tag = _scan_tag();
6107 _c4dbgpf(
"mapblck[RKEY]: key tag! [{}]~~~{}~~~", tag.len, tag);
6108 _add_annotation(&m_pending_tags, tag, startindent, startline);
6110 else if(first ==
'[')
6115 _c4dbgp(
"mapblck[RKEY]: start child seqflow (!)");
6117 _handle_annotations_before_blck_key_scalar();
6118 m_evt_handler->begin_seq_key_flow();
6120 _line_progressed(1);
6121 _set_indentation(startindent);
6122 goto mapblck_finish;
6124 else if(first ==
'{')
6129 _c4dbgp(
"mapblck[RKEY]: start child mapflow (!)");
6131 _handle_annotations_before_blck_key_scalar();
6132 m_evt_handler->begin_map_key_flow();
6134 _line_progressed(1);
6135 _set_indentation(startindent);
6136 goto mapblck_finish;
6138 else if(first ==
'-')
6140 _c4dbgp(
"mapblck[RKEY]: maybe doc?");
6141 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(rem))
6143 _c4dbgp(
"mapblck[RKEY]: end+start doc");
6144 _start_doc_suddenly();
6145 _line_progressed(3);
6146 _maybe_skip_whitespace_tokens();
6147 goto mapblck_finish;
6151 _c4err(
"parse error");
6154 else if(first ==
'.')
6156 _c4dbgp(
"mapblck[RKEY]: maybe end doc?");
6157 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(rem))
6159 _c4dbgp(
"mapblck[RKEY]: end doc");
6160 _end_doc_suddenly();
6161 _line_progressed(3);
6162 _maybe_skip_whitespace_tokens();
6163 goto mapblck_finish;
6167 _c4err(
"parse error");
6171 else if(first ==
'\t')
6173 _c4dbgp(
"mapblck[RKEY]: skip tabs");
6174 _maybe_skipchars(
'\t');
6178 _c4err(
"parse error");
6181 else if(has_any(
RKCL))
6183 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6184 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6185 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6186 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6190 if(m_evt_handler->m_curr->at_line_beginning())
6192 if(m_evt_handler->m_curr->indentation_eq())
6194 _c4dbgpf(
"mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
6195 _line_progressed(m_evt_handler->m_curr->indref);
6196 rem = m_evt_handler->m_curr->line_contents.rem;
6200 else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
6202 _c4err(
"invalid indentation");
6205 const char first = rem.str[0];
6206 _c4dbgpf(
"mapblck[RKCL]: '{}'", first);
6209 _c4dbgp(
"mapblck[RKCL]: found the colon");
6211 _line_progressed(1);
6212 _maybe_skip_whitespace_tokens();
6214 else if(first ==
'?')
6216 _c4dbgp(
"mapblck[RKCL]: got '?'. val was empty");
6217 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_was_inside_qmrk);
6218 m_evt_handler->set_val_scalar_plain({});
6219 m_evt_handler->add_sibling();
6221 _line_progressed(1);
6222 _maybe_skip_whitespace_tokens();
6224 else if(first ==
'-')
6226 if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6228 _c4dbgp(
"mapblck[RKCL]: end+start doc");
6229 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6230 _start_doc_suddenly();
6231 _line_progressed(3);
6232 _maybe_skip_whitespace_tokens();
6233 goto mapblck_finish;
6237 _c4err(
"parse error");
6240 else if(first ==
'.')
6242 _c4dbgp(
"mapblck[RKCL]: maybe end doc?");
6243 csubstr rs = rem.sub(1);
6244 if(rs ==
".." || rs.begins_with(
".. "))
6246 _c4dbgp(
"mapblck[RKCL]: end+start doc");
6247 _end_doc_suddenly();
6248 _line_progressed(3);
6249 goto mapblck_finish;
6253 _c4err(
"parse error");
6256 else if(m_was_inside_qmrk)
6258 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_eq());
6259 _c4dbgp(
"mapblck[RKCL]: missing :");
6260 m_evt_handler->set_val_scalar_plain({});
6261 m_evt_handler->add_sibling();
6262 m_was_inside_qmrk =
false;
6267 _c4err(
"parse error");
6270 else if(has_any(
RVAL))
6272 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6273 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6274 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6275 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6279 if(m_evt_handler->m_curr->at_line_beginning())
6281 _c4dbgpf(
"mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6282 m_evt_handler->m_curr->more_indented =
false;
6283 if(m_evt_handler->m_curr->indref ==
npos)
6285 _c4dbgpf(
"mapblck[RVAL]: setting indentation={}", m_evt_handler->m_parent->indref);
6286 _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6287 _line_progressed(m_evt_handler->m_curr->indref);
6288 rem = m_evt_handler->m_curr->line_contents.rem;
6292 else if(m_evt_handler->m_curr->indentation_eq())
6294 _c4dbgp(
"mapblck[RVAL]: skip indentation!");
6295 _line_progressed(m_evt_handler->m_curr->indref);
6296 rem = m_evt_handler->m_curr->line_contents.rem;
6324 else if(m_evt_handler->m_curr->indentation_gt())
6326 _c4dbgp(
"mapblck[RVAL]: more indented!");
6327 m_evt_handler->m_curr->more_indented =
true;
6328 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6329 rem = m_evt_handler->m_curr->line_contents.rem;
6333 else if(m_evt_handler->m_curr->indentation_lt())
6335 _c4dbgp(
"mapblck[RVAL]: smaller indentation!");
6336 _handle_indentation_pop_from_block_map();
6339 _c4dbgp(
"mapblck[RVAL]: still mapblck!");
6340 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6345 _c4dbgp(
"mapblck[RVAL]: no longer mapblck!");
6346 goto mapblck_finish;
6349 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
6351 _c4dbgp(
"mapblck[RVAL]: empty line!");
6352 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6359 const char first = rem.str[0];
6360 const size_t startline = m_evt_handler->m_curr->pos.line;
6361 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6362 _c4dbgpf(
"mapblck[RVAL]: '{}'", first);
6366 _c4dbgp(
"mapblck[RVAL]: scanning single-quoted scalar");
6367 sc = _scan_scalar_squot();
6368 if(!_maybe_scan_following_colon())
6370 _c4dbgp(
"mapblck[RVAL]: set as val");
6371 _handle_annotations_before_blck_val_scalar();
6372 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6373 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6378 if(startindent != m_evt_handler->m_curr->indref)
6380 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6381 _handle_annotations_before_start_mapblck(startline);
6383 m_evt_handler->begin_map_val_block();
6384 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6385 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6386 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6387 _maybe_skip_whitespace_tokens();
6388 _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6394 _c4dbgp(
"mapblck[RVAL]: prev val empty+this is a key");
6395 m_evt_handler->set_val_scalar_plain({});
6396 m_evt_handler->add_sibling();
6397 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6398 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6400 _maybe_skip_whitespace_tokens();
6404 else if(first ==
'"')
6406 _c4dbgp(
"mapblck[RVAL]: scanning double-quoted scalar");
6407 sc = _scan_scalar_dquot();
6408 if(!_maybe_scan_following_colon())
6410 _c4dbgp(
"mapblck[RVAL]: set as val");
6411 _handle_annotations_before_blck_val_scalar();
6412 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6413 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6418 if(startindent != m_evt_handler->m_curr->indref)
6420 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6421 _handle_annotations_before_start_mapblck(startline);
6423 m_evt_handler->begin_map_val_block();
6424 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6425 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6426 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6427 _maybe_skip_whitespace_tokens();
6428 _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6434 _c4dbgp(
"mapblck[RVAL]: prev val empty+this is a key");
6435 m_evt_handler->set_val_scalar_plain({});
6436 m_evt_handler->add_sibling();
6437 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6438 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6440 _maybe_skip_whitespace_tokens();
6446 else if(first ==
'|')
6448 _c4dbgp(
"mapblck[RVAL]: scanning block-literal scalar");
6450 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6451 _handle_annotations_before_blck_val_scalar();
6452 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6453 m_evt_handler->set_val_scalar_literal(maybe_filtered);
6456 else if(first ==
'>')
6458 _c4dbgp(
"mapblck[RVAL]: scanning block-folded scalar");
6460 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6461 _handle_annotations_before_blck_val_scalar();
6462 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6463 m_evt_handler->set_val_scalar_folded(maybe_filtered);
6466 else if(_scan_scalar_plain_map_blck(&sc))
6468 _c4dbgp(
"mapblck[RVAL]: plain scalar.");
6469 if(!_maybe_scan_following_colon())
6471 _c4dbgp(
"mapblck[RVAL]: set as val");
6472 _handle_annotations_before_blck_val_scalar();
6473 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6474 m_evt_handler->set_val_scalar_plain(maybe_filtered);
6479 if(startindent != m_evt_handler->m_curr->indref)
6481 _c4dbgpf(
"mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
6483 _handle_annotations_before_start_mapblck(startline);
6484 m_evt_handler->begin_map_val_block();
6485 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6486 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6487 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6488 _maybe_skip_whitespace_tokens();
6489 _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6495 _c4dbgp(
"mapblck[RVAL]: prev val empty+this is a key");
6496 _handle_annotations_before_blck_val_scalar();
6497 m_evt_handler->set_val_scalar_plain({});
6498 m_evt_handler->add_sibling();
6499 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6500 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6502 _maybe_skip_whitespace_tokens();
6506 else if(first ==
'-')
6510 _c4dbgp(
"mapblck[RVAL]: start val seqblck");
6512 _handle_annotations_before_blck_val_scalar();
6513 m_evt_handler->begin_seq_val_block();
6515 _set_indentation(startindent);
6516 _line_progressed(1);
6517 _maybe_skip_whitespace_tokens();
6518 goto mapblck_finish;
6520 else if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6522 _c4dbgp(
"mapblck[RVAL]: end+start doc");
6523 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6524 _start_doc_suddenly();
6525 _line_progressed(3);
6526 _maybe_skip_whitespace_tokens();
6527 goto mapblck_finish;
6531 _c4err(
"parse error");
6534 else if(first ==
'[')
6536 _c4dbgp(
"mapblck[RVAL]: start val seqflow");
6538 _handle_annotations_before_blck_val_scalar();
6539 m_evt_handler->begin_seq_val_flow();
6541 _set_indentation(m_evt_handler->m_curr->indref + 1u);
6542 _line_progressed(1);
6543 goto mapblck_finish;
6545 else if(first ==
'{')
6547 _c4dbgp(
"mapblck[RVAL]: start val mapflow");
6549 _handle_annotations_before_blck_val_scalar();
6550 m_evt_handler->begin_map_val_flow();
6552 m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
6553 _set_indentation(m_evt_handler->m_curr->indref + 1u);
6554 _line_progressed(1);
6555 goto mapblck_finish;
6557 else if(first ==
'*')
6559 csubstr ref = _scan_ref_map();
6560 _c4dbgpf(
"mapblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
6561 if(startindent == m_evt_handler->m_curr->indref)
6563 _c4dbgpf(
"mapblck[RVAL]: same indentation {}", startindent);
6564 m_evt_handler->set_val_ref(ref);
6569 _c4dbgpf(
"mapblck[RVAL]: larger indentation {}>{}", startindent, m_evt_handler->m_curr->indref);
6570 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref);
6571 if(_maybe_scan_following_colon())
6573 _c4dbgp(
"mapblck[RVAL]: start child map, block");
6575 _handle_annotations_before_blck_val_scalar();
6576 m_evt_handler->begin_map_val_block();
6577 m_evt_handler->set_key_ref(ref);
6578 _set_indentation(startindent);
6584 _c4dbgp(
"mapblck[RVAL]: was val ref");
6585 _handle_annotations_before_blck_val_scalar();
6586 m_evt_handler->set_val_ref(ref);
6590 _maybe_skip_whitespace_tokens();
6592 else if(first ==
'&')
6594 csubstr anchor = _scan_anchor();
6595 _c4dbgpf(
"mapblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
6596 if(startindent == m_evt_handler->m_curr->indref)
6598 _c4dbgp(
"mapblck[RVAL]: anchor for next key. val is missing!");
6599 m_evt_handler->set_val_scalar_plain({});
6600 m_evt_handler->add_sibling();
6605 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6607 else if(first ==
'!')
6609 csubstr tag = _scan_tag();
6610 _c4dbgpf(
"mapblck[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
6611 if(startindent == m_evt_handler->m_curr->indref)
6613 _c4dbgp(
"mapblck[RVAL]: tag for next key. val is missing!");
6614 _handle_annotations_before_blck_val_scalar();
6615 m_evt_handler->set_val_scalar_plain({});
6616 m_evt_handler->add_sibling();
6621 _add_annotation(&m_pending_tags, tag, startindent, startline);
6623 else if(first ==
'?')
6625 if(startindent == m_evt_handler->m_curr->indref)
6627 _c4dbgp(
"mapblck[RVAL]: got '?'. val was empty");
6628 _handle_annotations_before_blck_val_scalar();
6629 m_evt_handler->set_val_scalar_plain({});
6630 m_evt_handler->add_sibling();
6633 else if(startindent > m_evt_handler->m_curr->indref)
6635 _c4dbgp(
"mapblck[RVAL]: start val mapblck");
6637 _handle_annotations_before_blck_val_scalar();
6638 m_evt_handler->begin_map_val_block();
6640 _set_indentation(startindent);
6644 _c4err(
"parse error");
6646 m_was_inside_qmrk =
true;
6647 _line_progressed(1);
6648 _maybe_skip_whitespace_tokens();
6651 else if(first ==
':')
6653 if(startindent == m_evt_handler->m_curr->indref)
6655 _c4dbgp(
"mapblck[RVAL]: got ':'. val was empty, next key as well");
6656 m_evt_handler->set_val_scalar_plain({});
6657 m_evt_handler->add_sibling();
6658 m_evt_handler->set_key_scalar_plain({});
6659 _line_progressed(1);
6660 _maybe_skip_whitespace_tokens();
6665 _c4err(
"parse error");
6668 else if(first ==
'.')
6670 _c4dbgp(
"mapblck[RVAL]: maybe doc?");
6671 csubstr rs = rem.sub(1);
6672 if(rs ==
".." || rs.begins_with(
".. "))
6674 _c4dbgp(
"seqblck[RVAL]: end doc expl");
6675 _end_doc_suddenly();
6676 _line_progressed(3);
6677 _maybe_skip_whitespace_tokens();
6678 goto mapblck_finish;
6682 _c4err(
"parse error");
6686 else if(first ==
'\t')
6688 _c4dbgp(
"mapblck[RVAL]: skip tabs");
6689 _maybe_skipchars(
'\t');
6693 _c4err(
"parse error");
6696 else if(has_any(
RNXT))
6698 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6699 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6700 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6701 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6705 if(m_evt_handler->m_curr->at_line_beginning())
6707 _c4dbgpf(
"mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6708 if(m_evt_handler->m_curr->indentation_eq())
6710 _c4dbgpf(
"mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6711 _line_progressed(m_evt_handler->m_curr->indref);
6712 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
6713 m_evt_handler->add_sibling();
6717 else if(m_evt_handler->m_curr->indentation_lt())
6719 _c4dbgp(
"mapblck[RNXT]: smaller indentation!");
6720 _handle_indentation_pop_from_block_map();
6723 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6726 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
6727 m_evt_handler->add_sibling();
6734 goto mapblck_finish;
6741 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
6742 const char first = rem.str[0];
6743 _c4dbgpf(
"mapblck[RNXT]: '{}'", _c4prc(first));
6746 if(m_evt_handler->m_curr->more_indented)
6748 _c4dbgp(
"mapblck[RNXT]: start child block map");
6749 C4_NOT_IMPLEMENTED();
6751 _line_progressed(1);
6752 _set_indentation(m_evt_handler->m_curr->scalar_col);
6753 m_evt_handler->m_curr->more_indented =
false;
6758 _c4err(
"parse error");
6761 else if(first ==
' ')
6763 _c4dbgp(
"mapblck[RNXT]: skip spaces");
6764 _maybe_skip_whitespace_tokens();
6768 _c4err(
"parse error");
6771 else if(has_any(
QMRK))
6773 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6774 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6775 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6776 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6780 if(m_evt_handler->m_curr->at_line_beginning())
6782 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation !=
npos);
6783 if(m_evt_handler->m_curr->indentation_eq())
6785 _c4dbgpf(
"mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref);
6786 _line_progressed(m_evt_handler->m_curr->indref);
6787 rem = m_evt_handler->m_curr->line_contents.rem;
6791 else if(m_evt_handler->m_curr->indentation_lt())
6793 _c4dbgp(
"mapblck[QMRK]: smaller indentation!");
6794 _handle_indentation_pop_from_block_map();
6795 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6798 _c4dbgp(
"mapblck[QMRK]: still mapblck!");
6799 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
QMRK));
6800 rem = m_evt_handler->m_curr->line_contents.rem;
6806 _c4dbgp(
"mapblck[QMRK]: no longer mapblck!");
6807 goto mapblck_finish;
6813 _c4dbgp(
"mapblck[QMRK]: larger indentation !");
6814 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6815 rem = m_evt_handler->m_curr->line_contents.rem;
6823 const char first = rem.str[0];
6824 const size_t startline = m_evt_handler->m_curr->pos.line;
6825 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6826 _c4dbgpf(
"mapblck[QMRK]: '{}'", first);
6830 _c4dbgp(
"mapblck[QMRK]: scanning single-quoted scalar");
6831 sc = _scan_scalar_squot();
6832 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6833 if(!_maybe_scan_following_colon())
6835 _c4dbgp(
"mapblck[QMRK]: set as key");
6836 _handle_annotations_before_blck_key_scalar();
6837 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6842 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
6844 _handle_annotations_before_start_mapblck_as_key();
6845 m_evt_handler->begin_map_key_block();
6846 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6847 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6848 _maybe_skip_whitespace_tokens();
6849 _set_indentation(startindent);
6854 else if(first ==
'"')
6856 _c4dbgp(
"mapblck[QMRK]: scanning double-quoted scalar");
6857 sc = _scan_scalar_dquot();
6858 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6859 if(!_maybe_scan_following_colon())
6861 _c4dbgp(
"mapblck[QMRK]: set as key");
6862 _handle_annotations_before_blck_key_scalar();
6863 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6868 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
6870 _handle_annotations_before_start_mapblck_as_key();
6871 m_evt_handler->begin_map_key_block();
6872 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6873 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6874 _maybe_skip_whitespace_tokens();
6875 _set_indentation(startindent);
6880 else if(first ==
'|')
6882 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
6884 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6885 csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb);
6886 _handle_annotations_before_blck_key_scalar();
6887 m_evt_handler->set_key_scalar_literal(maybe_filtered);
6890 else if(first ==
'>')
6892 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
6894 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6895 csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb);
6896 _handle_annotations_before_blck_key_scalar();
6897 m_evt_handler->set_key_scalar_folded(maybe_filtered);
6900 else if(_scan_scalar_plain_map_blck(&sc))
6902 _c4dbgp(
"mapblck[QMRK]: plain scalar");
6903 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6904 if(!_maybe_scan_following_colon())
6906 _c4dbgp(
"mapblck[QMRK]: set as key");
6907 _handle_annotations_before_blck_key_scalar();
6908 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6913 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
6915 _handle_annotations_before_start_mapblck_as_key();
6916 m_evt_handler->begin_map_key_block();
6917 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6918 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6919 _maybe_skip_whitespace_tokens();
6920 _set_indentation(startindent);
6925 else if(first ==
':')
6927 if(startindent == m_evt_handler->m_curr->indref)
6929 _c4dbgp(
"mapblck[QMRK]: empty key");
6931 _handle_annotations_before_blck_key_scalar();
6932 m_evt_handler->set_key_scalar_plain({});
6933 _line_progressed(1);
6934 _maybe_skip_whitespace_tokens();
6938 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), empty key");
6940 _handle_annotations_before_start_mapblck_as_key();
6941 m_evt_handler->begin_map_key_block();
6942 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6943 m_evt_handler->set_key_scalar_plain({});
6944 _line_progressed(1);
6945 _maybe_skip_whitespace_tokens();
6946 _set_indentation(startindent);
6951 else if(first ==
'*')
6953 csubstr ref = _scan_ref_map();
6954 _c4dbgpf(
"mapblck[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
6955 if(!_maybe_scan_following_colon())
6957 _c4dbgp(
"mapblck[QMRK]: set ref as key");
6958 _handle_annotations_before_blck_key_scalar();
6959 m_evt_handler->set_key_ref(ref);
6964 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set ref as key");
6966 _handle_annotations_before_blck_key_scalar();
6967 m_evt_handler->begin_map_key_block();
6968 m_evt_handler->set_key_ref(ref);
6969 _set_indentation(startindent);
6973 _maybe_skip_whitespace_tokens();
6975 else if(first ==
'&')
6977 csubstr anchor = _scan_anchor();
6978 _c4dbgpf(
"mapblck[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
6979 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6981 else if(first ==
'!')
6983 csubstr tag = _scan_tag();
6984 _c4dbgpf(
"mapblck[QMRK]: key tag! [{}]~~~{}~~~", tag.len, tag);
6985 _add_annotation(&m_pending_tags, tag, startindent, startline);
6987 else if(first ==
'-')
6989 _c4dbgp(
"mapblck[QMRK]: maybe doc?");
6990 csubstr rs = rem.sub(1);
6991 if(rs ==
"--" || rs.begins_with(
"-- "))
6993 _c4dbgp(
"mapblck[QMRK]: end+start doc");
6994 _start_doc_suddenly();
6995 _line_progressed(3);
6999 _c4dbgp(
"mapblck[QMRK]: start child seqblck (!)");
7001 m_evt_handler->begin_seq_key_block();
7003 _set_indentation(startindent);
7004 _line_progressed(1);
7006 _maybe_skip_whitespace_tokens();
7007 goto mapblck_finish;
7009 else if(first ==
'[')
7011 _c4dbgp(
"mapblck[QMRK]: start child seqflow (!)");
7013 m_evt_handler->begin_seq_key_flow();
7015 _set_indentation(m_evt_handler->m_parent->indref);
7016 _line_progressed(1);
7017 goto mapblck_finish;
7019 else if(first ==
'{')
7021 _c4dbgp(
"mapblck[QMRK]: start child mapblck (!)");
7023 m_evt_handler->begin_map_key_flow();
7025 _set_indentation(m_evt_handler->m_parent->indref);
7026 _line_progressed(1);
7027 goto mapblck_finish;
7029 else if(first ==
'?')
7031 _c4dbgp(
"mapblck[QMRK]: another QMRK '?'");
7032 m_evt_handler->set_key_scalar_plain({});
7033 m_evt_handler->set_val_scalar_plain({});
7034 m_evt_handler->add_sibling();
7035 _line_progressed(1);
7037 else if(first ==
'.')
7039 _c4dbgp(
"mapblck[QMRK]: maybe end doc?");
7040 csubstr rs = rem.sub(1);
7041 if(rs ==
".." || rs.begins_with(
".. "))
7043 _c4dbgp(
"mapblck[QMRK]: end+start doc");
7044 _end_doc_suddenly();
7045 _line_progressed(3);
7046 goto mapblck_finish;
7050 _c4err(
"parse error");
7055 _c4err(
"parse error");
7060 _c4dbgt(
"mapblck: again", 0);
7061 if(_finished_line())
7065 if(_finished_file())
7067 _c4dbgp(
"mapblck: file finished!");
7069 goto mapblck_finish;
7076 _c4dbgp(
"mapblck: finish");
7082 template<
class EventHandler>
7083 void ParseEngine<EventHandler>::_handle_unk_json()
7085 _c4dbgpf(
"handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7087 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP));
7088 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP));
7090 _maybe_skip_comment();
7091 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7095 size_t pos = rem.first_not_of(
" \t");
7098 pos = pos !=
npos ? pos : rem.len;
7099 _c4dbgpf(
"skipping indentation of {}", pos);
7100 _line_progressed(pos);
7101 rem = m_evt_handler->m_curr->line_contents.rem;
7104 _c4dbgpf(
"rem is now [{}]~~~{}~~~", rem.len, rem);
7107 if(rem.begins_with(
'['))
7109 _c4dbgp(
"it's a seq");
7110 m_evt_handler->check_trailing_doc_token();
7112 m_evt_handler->begin_seq_val_flow();
7114 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7115 m_doc_empty =
false;
7116 _line_progressed(1);
7118 else if(rem.begins_with(
'{'))
7120 _c4dbgp(
"it's a map");
7121 m_evt_handler->check_trailing_doc_token();
7123 m_evt_handler->begin_map_val_flow();
7125 m_doc_empty =
false;
7126 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7127 _line_progressed(1);
7131 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
7132 _maybe_skip_whitespace_tokens();
7133 csubstr s = m_evt_handler->m_curr->line_contents.rem;
7136 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7137 const char first = s.str[0];
7141 _c4dbgp(
"runk_json: scanning double-quoted scalar");
7142 m_evt_handler->check_trailing_doc_token();
7145 m_doc_empty =
false;
7146 sc = _scan_scalar_dquot();
7147 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7148 if(!_maybe_scan_following_colon())
7150 _c4dbgp(
"runk_json: set as val");
7151 _handle_annotations_before_blck_val_scalar();
7152 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7156 _c4err(
"parse error");
7159 else if(_scan_scalar_plain_unk(&sc))
7161 _c4dbgp(
"runk_json: got a plain scalar");
7162 m_evt_handler->check_trailing_doc_token();
7165 m_doc_empty =
false;
7166 if(!_maybe_scan_following_colon())
7168 _c4dbgp(
"runk_json: set as val");
7169 _handle_annotations_before_blck_val_scalar();
7170 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7171 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7175 _c4err(
"parse error");
7180 _c4err(
"parse error");
7188 template<
class EventHandler>
7189 void ParseEngine<EventHandler>::_handle_unk()
7191 _c4dbgpf(
"handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7193 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP));
7194 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP));
7196 _maybe_skip_comment();
7197 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7201 size_t pos = rem.first_not_of(
" \t");
7204 pos = pos !=
npos ? pos : rem.len;
7205 _c4dbgpf(
"skipping {} whitespace characters", pos);
7206 _line_progressed(pos);
7207 rem = m_evt_handler->m_curr->line_contents.rem;
7210 _c4dbgpf(
"rem is now [{}]~~~{}~~~", rem.len, rem);
7213 if(m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin())
7215 const char first = rem.str[0];
7216 _c4dbgp(
"rtop: zero indent + at line begin");
7219 _c4dbgp(
"rtop: suspecting doc");
7220 if(_is_doc_begin_token(rem))
7222 _c4dbgp(
"rtop: begin doc");
7225 _set_indentation(0);
7227 _line_progressed(3u);
7228 _maybe_skip_whitespace_tokens();
7232 else if(first ==
'.')
7234 _c4dbgp(
"rtop: suspecting doc end");
7235 if(_is_doc_end_token(rem))
7237 _c4dbgp(
"rtop: end doc");
7244 _c4dbgp(
"rtop: ignore end doc");
7247 _line_progressed(3u);
7248 _maybe_skip_whitespace_tokens();
7252 else if(first ==
'%')
7254 _c4dbgpf(
"directive: {}", rem);
7255 if(C4_UNLIKELY(!m_doc_empty && has_none(
NDOC)))
7256 _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks,
"need document footer before directives");
7257 _handle_directive(rem);
7263 char first = rem.str[0];
7267 m_evt_handler->check_trailing_doc_token();
7269 m_doc_empty =
false;
7270 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7271 if(C4_LIKELY( ! _annotations_require_key_container()))
7273 _c4dbgp(
"it's a seq, flow");
7274 _handle_annotations_before_blck_val_scalar();
7275 m_evt_handler->begin_seq_val_flow();
7277 _set_indentation(startindent);
7281 _c4dbgp(
"start new block map, set flow seq as key (!)");
7282 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7283 m_evt_handler->begin_map_val_block();
7285 _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
7286 m_evt_handler->begin_seq_key_flow();
7288 _set_indentation(startindent);
7290 _line_progressed(1);
7292 else if(first ==
'{')
7294 m_evt_handler->check_trailing_doc_token();
7296 m_doc_empty =
false;
7297 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7298 if(C4_LIKELY( ! _annotations_require_key_container()))
7300 _c4dbgp(
"it's a map, flow");
7301 _handle_annotations_before_blck_val_scalar();
7302 m_evt_handler->begin_map_val_flow();
7304 _set_indentation(startindent);
7308 _c4dbgp(
"start new block map, set flow map as key (!)");
7309 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7310 m_evt_handler->begin_map_val_block();
7312 _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
7313 m_evt_handler->begin_map_key_flow();
7315 _set_indentation(startindent);
7317 _line_progressed(1);
7319 else if(first ==
'-' && _is_blck_token(rem))
7321 _c4dbgp(
"it's a seq, block");
7322 m_evt_handler->check_trailing_doc_token();
7324 _handle_annotations_before_blck_val_scalar();
7325 m_evt_handler->begin_seq_val_block();
7327 m_doc_empty =
false;
7328 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7329 _line_progressed(1);
7330 _maybe_skip_whitespace_tokens();
7332 else if(first ==
'?' && _is_blck_token(rem))
7334 _c4dbgp(
"it's a map + this key is complex");
7335 m_evt_handler->check_trailing_doc_token();
7337 _handle_annotations_before_blck_val_scalar();
7338 m_evt_handler->begin_map_val_block();
7340 m_doc_empty =
false;
7341 m_was_inside_qmrk =
true;
7342 _save_indentation();
7343 _line_progressed(1);
7344 _maybe_skip_whitespace_tokens();
7346 else if(first ==
':' && _is_blck_token(rem))
7350 _c4dbgp(
"it's a map with an empty key");
7351 m_evt_handler->check_trailing_doc_token();
7353 _handle_annotations_before_blck_val_scalar();
7354 m_evt_handler->begin_map_val_block();
7355 m_evt_handler->set_key_scalar_plain({});
7356 m_doc_empty =
false;
7357 _save_indentation();
7361 _c4dbgp(
"actually prev val is a key!");
7362 size_t prev_indentation = m_evt_handler->m_curr->indref;
7363 m_evt_handler->actually_val_is_first_key_of_new_map_block();
7364 _set_indentation(prev_indentation);
7367 _line_progressed(1);
7368 _maybe_skip_whitespace_tokens();
7370 else if(first ==
'&')
7372 csubstr anchor = _scan_anchor();
7373 _c4dbgpf(
"anchor! [{}]~~~{}~~~", anchor.len, anchor);
7374 m_evt_handler->check_trailing_doc_token();
7376 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7377 const size_t line = m_evt_handler->m_curr->pos.line;
7378 _add_annotation(&m_pending_anchors, anchor, indentation, line);
7379 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7380 m_doc_empty =
false;
7382 else if(first ==
'*')
7384 csubstr ref = _scan_ref_map();
7385 _c4dbgpf(
"ref! [{}]~~~{}~~~", ref.len, ref);
7386 m_evt_handler->check_trailing_doc_token();
7388 m_doc_empty =
false;
7389 if(!_maybe_scan_following_colon())
7391 _c4dbgp(
"runk: set val ref");
7392 _handle_annotations_before_blck_val_scalar();
7393 m_evt_handler->set_val_ref(ref);
7397 _c4dbgp(
"runk: start new block map, set ref as key");
7398 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7399 const size_t startline = m_evt_handler->m_curr->pos.line;
7400 _handle_annotations_before_start_mapblck(startline);
7401 m_evt_handler->begin_map_val_block();
7402 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7403 m_evt_handler->set_key_ref(ref);
7404 _maybe_skip_whitespace_tokens();
7405 _set_indentation(startindent);
7409 else if(first ==
'!')
7411 csubstr tag = _scan_tag();
7412 _c4dbgpf(
"unk: val tag! [{}]~~~{}~~~", tag.len, tag);
7415 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7416 const size_t line = m_evt_handler->m_curr->pos.line;
7417 _add_annotation(&m_pending_tags, tag, indentation, line);
7421 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
7422 _maybe_skip_whitespace_tokens();
7423 csubstr s = m_evt_handler->m_curr->line_contents.rem;
7426 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7427 const size_t startline = m_evt_handler->m_curr->pos.line;
7432 _c4dbgp(
"runk: scanning single-quoted scalar");
7433 m_evt_handler->check_trailing_doc_token();
7436 m_doc_empty =
false;
7437 sc = _scan_scalar_squot();
7438 if(!_maybe_scan_following_colon())
7440 _c4dbgp(
"runk: set as val");
7441 _handle_annotations_before_blck_val_scalar();
7442 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
7443 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
7447 _c4dbgp(
"runk: start new block map, set scalar as key");
7448 _handle_annotations_before_start_mapblck(startline);
7449 m_evt_handler->begin_map_val_block();
7450 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7451 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7452 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7453 _maybe_skip_whitespace_tokens();
7454 _set_indentation(startindent);
7458 else if(first ==
'"')
7460 _c4dbgp(
"runk: scanning double-quoted scalar");
7461 m_evt_handler->check_trailing_doc_token();
7464 m_doc_empty =
false;
7465 sc = _scan_scalar_dquot();
7466 if(!_maybe_scan_following_colon())
7468 _c4dbgp(
"runk: set as val");
7469 _handle_annotations_before_blck_val_scalar();
7470 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7471 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7475 _c4dbgp(
"runk: start new block map, set double-quoted scalar as key");
7476 _handle_annotations_before_start_mapblck(startline);
7477 m_evt_handler->begin_map_val_block();
7478 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7479 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7480 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7481 _maybe_skip_whitespace_tokens();
7482 _set_indentation(startindent);
7486 else if(first ==
'|')
7488 _c4dbgp(
"runk: scanning block-literal scalar");
7489 m_evt_handler->check_trailing_doc_token();
7492 m_doc_empty =
false;
7494 _scan_block(&sb, startindent);
7495 if(C4_LIKELY(!_maybe_scan_following_colon()))
7497 _c4dbgp(
"runk: set as val");
7498 _handle_annotations_before_blck_val_scalar();
7499 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7500 m_evt_handler->set_val_scalar_literal(maybe_filtered);
7504 _c4err(
"block literal keys must be enclosed in '?'");
7507 else if(first ==
'>')
7509 _c4dbgp(
"runk: scanning block-folded scalar");
7510 m_evt_handler->check_trailing_doc_token();
7513 m_doc_empty =
false;
7515 _scan_block(&sb, startindent);
7516 if(C4_LIKELY(!_maybe_scan_following_colon()))
7518 _c4dbgp(
"runk: set as val");
7519 _handle_annotations_before_blck_val_scalar();
7520 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7521 m_evt_handler->set_val_scalar_folded(maybe_filtered);
7525 _c4err(
"block folded keys must be enclosed in '?'");
7528 else if(_scan_scalar_plain_unk(&sc))
7530 _c4dbgp(
"runk: got a plain scalar");
7531 m_evt_handler->check_trailing_doc_token();
7534 m_doc_empty =
false;
7535 if(!_maybe_scan_following_colon())
7537 _c4dbgp(
"runk: set as val");
7538 _handle_annotations_before_blck_val_scalar();
7539 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7540 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7544 _c4dbgp(
"runk: start new block map, set scalar as key");
7545 _handle_annotations_before_start_mapblck(startline);
7546 m_evt_handler->begin_map_val_block();
7547 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7548 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7549 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7550 _maybe_skip_whitespace_tokens();
7551 _set_indentation(startindent);
7561 template<
class EventHandler>
7562 C4_COLD
void ParseEngine<EventHandler>::_handle_usty()
7564 _c4dbgpf(
"handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7566 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK|
FLOW));
7568 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
7571 _c4dbgp(
"usty[RNXT]: finishing!");
7576 _maybe_skip_comment();
7577 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7581 size_t pos = rem.first_not_of(
" \t");
7584 pos = pos !=
npos ? pos : rem.len;
7585 _c4dbgpf(
"skipping indentation of {}", pos);
7586 _line_progressed(pos);
7587 rem = m_evt_handler->m_curr->line_contents.rem;
7590 _c4dbgpf(
"rem is now [{}]~~~{}~~~", rem.len, rem);
7593 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
7594 size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7595 char first = rem.str[0];
7598 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
RMAP));
7599 _c4dbgpf(
"usty[RSEQ]: first='{}'", _c4prc(first));
7602 _c4dbgp(
"usty[RSEQ]: it's a flow seq. merging it");
7604 m_evt_handler->_push();
7606 _set_indentation(startindent);
7607 _line_progressed(1);
7608 _maybe_skip_whitespace_tokens();
7610 else if(first ==
'-' && _is_blck_token(rem))
7612 _c4dbgp(
"usty[RSEQ]: it's a block seq. merging it");
7614 m_evt_handler->_push();
7616 _set_indentation(startindent);
7617 _line_progressed(1);
7618 _maybe_skip_whitespace_tokens();
7622 _c4err(
"can only parse a seq into an existing seq");
7625 else if(has_any(
RMAP))
7627 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ));
7628 _c4dbgpf(
"usty[RMAP]: first='{}'", _c4prc(first));
7631 _c4dbgp(
"usty[RMAP]: it's a flow map. merging it");
7633 _handle_annotations_before_blck_val_scalar();
7634 m_evt_handler->_push();
7636 _set_indentation(startindent);
7637 _line_progressed(1);
7638 _maybe_skip_whitespace_tokens();
7640 else if(first ==
'?' && _is_blck_token(rem))
7642 _c4dbgp(
"usty[RMAP]: it's a block map + this key is complex");
7644 _handle_annotations_before_blck_val_scalar();
7645 m_evt_handler->_push();
7647 m_was_inside_qmrk =
true;
7648 _save_indentation();
7649 _line_progressed(1);
7650 _maybe_skip_whitespace_tokens();
7652 else if(first ==
':' && _is_blck_token(rem))
7654 _c4dbgp(
"usty[RMAP]: it's a map with an empty key");
7656 _handle_annotations_before_blck_val_scalar();
7657 m_evt_handler->_push();
7658 m_evt_handler->set_key_scalar_plain({});
7660 _save_indentation();
7661 _line_progressed(1);
7662 _maybe_skip_whitespace_tokens();
7664 else if(rem.begins_with(
'&'))
7666 csubstr anchor = _scan_anchor();
7667 _c4dbgpf(
"usty[RMAP]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
7668 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7669 const size_t line = m_evt_handler->m_curr->pos.line;
7670 _add_annotation(&m_pending_anchors, anchor, indentation, line);
7671 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7673 else if(first ==
'*')
7675 csubstr ref = _scan_ref_map();
7676 _c4dbgpf(
"usty[RMAP]: ref! [{}]~~~{}~~~", ref.len, ref);
7677 if(!_maybe_scan_following_colon())
7679 _c4err(
"cannot read a VAL to a map");
7683 _c4dbgp(
"usty[RMAP]: start new block map, set ref as key");
7684 const size_t startline = m_evt_handler->m_curr->pos.line;
7686 _handle_annotations_before_start_mapblck(startline);
7687 m_evt_handler->_push();
7688 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7689 m_evt_handler->set_key_ref(ref);
7690 _maybe_skip_whitespace_tokens();
7691 _set_indentation(startindent);
7695 else if(first ==
'!')
7697 csubstr tag = _scan_tag();
7698 _c4dbgpf(
"usty[RMAP]: val tag! [{}]~~~{}~~~", tag.len, tag);
7701 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7702 const size_t line = m_evt_handler->m_curr->pos.line;
7703 _add_annotation(&m_pending_tags, tag, indentation, line);
7705 else if(first ==
'[' || (first ==
'-' && _is_blck_token(rem)))
7707 _c4err(
"cannot parse a seq into an existing map");
7711 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
7712 startindent = m_evt_handler->m_curr->line_contents.indentation;
7713 const size_t startline = m_evt_handler->m_curr->pos.line;
7715 _c4dbgpf(
"usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
7718 _c4dbgp(
"usty[RMAP]: scanning single-quoted scalar");
7719 sc = _scan_scalar_squot();
7720 if(!_maybe_scan_following_colon())
7722 _c4err(
"cannot read a VAL to a map");
7726 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
7728 _handle_annotations_before_start_mapblck(startline);
7729 m_evt_handler->_push();
7730 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7731 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7732 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7733 _set_indentation(startindent);
7735 _maybe_skip_whitespace_tokens();
7738 else if(first ==
'"')
7740 _c4dbgp(
"usty[RMAP]: scanning double-quoted scalar");
7741 sc = _scan_scalar_dquot();
7742 if(!_maybe_scan_following_colon())
7744 _c4err(
"cannot read a VAL to a map");
7748 _c4dbgp(
"usty[RMAP]: start new block map, set double-quoted scalar as key");
7750 _handle_annotations_before_start_mapblck(startline);
7751 m_evt_handler->_push();
7752 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7753 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7754 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7755 _set_indentation(startindent);
7757 _maybe_skip_whitespace_tokens();
7760 else if(first ==
'|')
7762 _c4err(
"block literal keys must be enclosed in '?'");
7764 else if(first ==
'>')
7766 _c4err(
"block literal keys must be enclosed in '?'");
7768 else if(_scan_scalar_plain_unk(&sc))
7770 _c4dbgp(
"usty[RMAP]: got a plain scalar");
7771 if(!_maybe_scan_following_colon())
7773 _c4err(
"cannot read a VAL to a map");
7777 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
7779 _handle_annotations_before_start_mapblck(startline);
7780 m_evt_handler->_push();
7781 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7782 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7783 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7784 _set_indentation(startindent);
7786 _maybe_skip_whitespace_tokens();
7791 _c4err(
"parse error");
7797 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ));
7798 _c4dbgpf(
"usty[UNK]: first='{}'", _c4prc(first));
7801 _c4dbgp(
"usty[UNK]: it's a flow seq");
7803 _handle_annotations_before_blck_val_scalar();
7804 m_evt_handler->begin_seq_val_flow();
7806 _set_indentation(startindent);
7807 _line_progressed(1);
7808 _maybe_skip_whitespace_tokens();
7810 else if(first ==
'-' && _is_blck_token(rem))
7812 _c4dbgp(
"usty[UNK]: it's a block seq");
7814 _handle_annotations_before_blck_val_scalar();
7815 m_evt_handler->begin_seq_val_block();
7817 _set_indentation(startindent);
7818 _line_progressed(1);
7819 _maybe_skip_whitespace_tokens();
7821 else if(first ==
'{')
7823 _c4dbgp(
"usty[UNK]: it's a flow map");
7825 _handle_annotations_before_blck_val_scalar();
7826 m_evt_handler->begin_map_val_flow();
7828 _set_indentation(startindent);
7829 _line_progressed(1);
7830 _maybe_skip_whitespace_tokens();
7832 else if(first ==
'?' && _is_blck_token(rem))
7834 _c4dbgp(
"usty[UNK]: it's a map + this key is complex");
7836 _handle_annotations_before_blck_val_scalar();
7837 m_evt_handler->begin_map_val_block();
7839 m_was_inside_qmrk =
true;
7840 _save_indentation();
7841 _line_progressed(1);
7842 _maybe_skip_whitespace_tokens();
7844 else if(first ==
':' && _is_blck_token(rem))
7846 _c4dbgp(
"usty[UNK]: it's a map with an empty key");
7848 _handle_annotations_before_blck_val_scalar();
7849 m_evt_handler->begin_map_val_block();
7850 m_evt_handler->set_key_scalar_plain({});
7852 _save_indentation();
7853 _line_progressed(1);
7854 _maybe_skip_whitespace_tokens();
7856 else if(first ==
'&')
7858 csubstr anchor = _scan_anchor();
7859 _c4dbgpf(
"usty[UNK]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
7860 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7861 const size_t line = m_evt_handler->m_curr->pos.line;
7862 _add_annotation(&m_pending_anchors, anchor, indentation, line);
7863 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7865 else if(first ==
'*')
7867 csubstr ref = _scan_ref_map();
7868 _c4dbgpf(
"usty[UNK]: ref! [{}]~~~{}~~~", ref.len, ref);
7869 if(!_maybe_scan_following_colon())
7871 _c4dbgp(
"usty[UNK]: set val ref");
7872 _handle_annotations_before_blck_val_scalar();
7873 m_evt_handler->set_val_ref(ref);
7877 _c4dbgp(
"usty[UNK]: start new block map, set ref as key");
7878 const size_t startline = m_evt_handler->m_curr->pos.line;
7880 _handle_annotations_before_start_mapblck(startline);
7881 m_evt_handler->begin_map_val_block();
7882 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7883 m_evt_handler->set_key_ref(ref);
7884 _maybe_skip_whitespace_tokens();
7885 _set_indentation(startindent);
7889 else if(first ==
'!')
7891 csubstr tag = _scan_tag();
7892 _c4dbgpf(
"usty[UNK]: val tag! [{}]~~~{}~~~", tag.len, tag);
7895 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7896 const size_t line = m_evt_handler->m_curr->pos.line;
7897 _add_annotation(&m_pending_tags, tag, indentation, line);
7901 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
7902 startindent = m_evt_handler->m_curr->line_contents.indentation;
7903 const size_t startline = m_evt_handler->m_curr->pos.line;
7906 _c4dbgpf(
"usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
7909 _c4dbgp(
"usty[UNK]: scanning single-quoted scalar");
7910 sc = _scan_scalar_squot();
7911 if(!_maybe_scan_following_colon())
7913 _c4dbgp(
"usty[UNK]: set as val");
7914 _handle_annotations_before_blck_val_scalar();
7915 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
7916 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
7921 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
7923 _handle_annotations_before_start_mapblck(startline);
7924 m_evt_handler->begin_map_val_block();
7925 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7926 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7927 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7928 _set_indentation(startindent);
7930 _maybe_skip_whitespace_tokens();
7933 else if(first ==
'"')
7935 _c4dbgp(
"usty[UNK]: scanning double-quoted scalar");
7936 sc = _scan_scalar_dquot();
7937 if(!_maybe_scan_following_colon())
7939 _c4dbgp(
"usty[UNK]: set as val");
7940 _handle_annotations_before_blck_val_scalar();
7941 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7942 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7947 _c4dbgp(
"usty[UNK]: start new block map, set double-quoted scalar as key");
7949 _handle_annotations_before_start_mapblck(startline);
7950 m_evt_handler->begin_map_val_block();
7951 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7952 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7953 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7954 _set_indentation(startindent);
7956 _maybe_skip_whitespace_tokens();
7959 else if(first ==
'|')
7961 _c4dbgp(
"usty[UNK]: scanning block-literal scalar");
7963 _scan_block(&sb, startindent);
7964 _c4dbgp(
"usty[UNK]: set as val");
7965 _handle_annotations_before_blck_val_scalar();
7966 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7967 m_evt_handler->set_val_scalar_literal(maybe_filtered);
7970 else if(first ==
'>')
7972 _c4dbgp(
"usty[UNK]: scanning block-folded scalar");
7974 _scan_block(&sb, startindent);
7975 _c4dbgp(
"usty[UNK]: set as val");
7976 _handle_annotations_before_blck_val_scalar();
7977 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7978 m_evt_handler->set_val_scalar_folded(maybe_filtered);
7981 else if(_scan_scalar_plain_unk(&sc))
7983 _c4dbgp(
"usty[UNK]: got a plain scalar");
7984 if(!_maybe_scan_following_colon())
7986 _c4dbgp(
"usty[UNK]: set as val");
7987 _handle_annotations_before_blck_val_scalar();
7988 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7989 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7994 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
7996 _handle_annotations_before_start_mapblck(startline);
7997 m_evt_handler->begin_map_val_block();
7998 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7999 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8000 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8001 _set_indentation(startindent);
8003 _maybe_skip_whitespace_tokens();
8008 _c4err(
"parse error");
8017 template<
class EventHandler>
8020 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8024 m_evt_handler->start_parse(filename.str, &_s_relocate_arena,
this);
8025 m_evt_handler->begin_stream();
8026 while( ! _finished_file())
8029 while( ! _finished_line())
8032 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8037 else if(has_any(
RMAP))
8041 else if(has_any(
RUNK))
8047 _c4err(
"internal error");
8050 if(_finished_file())
8055 m_evt_handler->finish_parse();
8061 template<
class EventHandler>
8064 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8068 m_evt_handler->start_parse(filename.str, &_s_relocate_arena,
this);
8069 m_evt_handler->begin_stream();
8070 while( ! _finished_file())
8073 while( ! _finished_line())
8076 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8087 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
8096 else if(has_any(
BLCK))
8100 _handle_seq_block();
8104 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
8105 _handle_map_block();
8108 else if(has_any(
RUNK))
8112 else if(has_any(
USTY))
8118 _c4err(
"internal error");
8121 if(_finished_file())
8126 m_evt_handler->finish_parse();
8132 #undef _c4dbgnextline
8134 #if defined(_MSC_VER)
8135 # pragma warning(pop)
8136 #elif defined(__clang__)
8137 # pragma clang diagnostic pop
8138 #elif defined(__GNUC__)
8139 # pragma GCC diagnostic pop
Lightweight generic type-safe wrappers for converting individual values to/from strings.
Holds a pointer to an existing tree, and a node id.
Tree const * tree() const noexcept
id_type id() const noexcept
bool readable() const noexcept
because a ConstNodeRef cannot be used to write to the tree, readable() has the same meaning as !...
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
Location location(Tree const &tree, id_type node_id) const
Get the location of a node of the last tree to be parsed by this parser.
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&)
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
NodeType type(id_type node) const
id_type prev_sibling(id_type node) const
bool has_key(id_type node) const
id_type parent(id_type node) const
id_type next_sibling(id_type node) const
csubstr const & key(id_type node) const
bool has_val(id_type node) const
csubstr const & val(id_type node) const
bool is_container(id_type node) const
#define RYML_ERRMSG_SIZE
size for the error message buffer
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
@ NOTYPE
no node type or style is set
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
size_t to_chars(substr buf, uint8_t v) noexcept
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
@ npos
a null string position
size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept
@ RTOP
reading at top level
@ BLCK
reading in block mode
@ RSET
the (implicit) map being read is a !!set.
@ RNXT
read next val or keyval
@ FLOW
reading is inside explicit flow chars: [] or {}
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a scalar as key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ QSCL
stored scalar was quoted
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a scalar as val
int ParserFlag_t
data type for ParserState_e
#define _RYML_WITHOUT_TAB_TOKENS(...)
#define _ryml_relocate(s)
#define _c4dbgfsq(fmt,...)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _RYML_WITH_TAB_TOKENS(...)
#define _c4dbgfps(fmt,...)
Filters an input string into a different output string.
size_t offset
number of bytes from the beginning of the source buffer
Options to give to the parser to control its behavior.