1 #ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2 #define _C4_YML_PARSE_ENGINE_DEF_HPP_
5 #include "c4/error.hpp"
11 #include "c4/yml/detail/dbgprint.hpp"
14 #include <c4/dump.hpp>
15 #include "c4/yml/detail/print.hpp"
16 #define _c4err_(fmt, ...) do { RYML_DEBUG_BREAK(); this->_err("ERROR:\n" "{}:{}: " fmt, __FILE__, __LINE__, __VA_ARGS__); } while(0)
17 #define _c4err(fmt) do { RYML_DEBUG_BREAK(); this->_err("ERROR:\n" "{}:{}: " fmt, __FILE__, __LINE__); } while(0)
19 #define _c4err_(fmt, ...) this->_err("ERROR: " fmt, __VA_ARGS__)
20 #define _c4err(fmt) this->_err("ERROR: {}", fmt)
24 #if defined(RYML_WITH_TAB_TOKENS)
25 #define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
26 #define _RYML_WITHOUT_TAB_TOKENS(...)
27 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
29 #define _RYML_WITH_TAB_TOKENS(...)
30 #define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
31 #define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
36 #define _c4dbgnextline() \
38 _c4dbgq("\n-----------"); \
39 _c4dbgt("handling line={}, offset={}B", \
40 m_evt_handler->m_curr->pos.line, \
41 m_evt_handler->m_curr->pos.offset); \
46 # pragma warning(push)
47 # pragma warning(disable: 4296)
48 # pragma warning(disable: 4702)
49 #elif defined(__clang__)
50 # pragma clang diagnostic push
51 # pragma clang diagnostic ignored "-Wtype-limits"
52 # pragma clang diagnostic ignored "-Wformat-nonliteral"
53 # pragma clang diagnostic ignored "-Wold-style-cast"
54 #elif defined(__GNUC__)
55 # pragma GCC diagnostic push
56 # pragma GCC diagnostic ignored "-Wtype-limits"
57 # pragma GCC diagnostic ignored "-Wformat-nonliteral"
58 # pragma GCC diagnostic ignored "-Wold-style-cast"
60 # pragma GCC diagnostic ignored "-Wduplicated-branches"
71 C4_HOT C4_ALWAYS_INLINE
bool _is_blck_token(csubstr s) noexcept
73 RYML_ASSERT(s.len > 0);
74 RYML_ASSERT(s.str[0] ==
'-' || s.str[0] ==
':' || s.str[0] ==
'?');
78 inline bool _is_doc_begin_token(csubstr s)
80 RYML_ASSERT(s.begins_with(
'-'));
81 RYML_ASSERT(!s.ends_with(
"\n"));
82 RYML_ASSERT(!s.ends_with(
"\r"));
83 return (s.len >= 3 && s.str[1] ==
'-' && s.str[2] ==
'-')
87 inline bool _is_doc_end_token(csubstr s)
89 RYML_ASSERT(s.begins_with(
'.'));
90 RYML_ASSERT(!s.ends_with(
"\n"));
91 RYML_ASSERT(!s.ends_with(
"\r"));
92 return (s.len >= 3 && s.str[1] ==
'.' && s.str[2] ==
'.')
96 inline bool _is_doc_token(csubstr s) noexcept
124 return (s.str[1] ==
'-' && s.str[2] ==
'-')
128 return (s.str[1] ==
'.' && s.str[2] ==
'.')
135 inline size_t _is_special_json_scalar(csubstr s)
141 if(s.len >= 5 && s.begins_with(
"false"))
145 if(s.len >= 4 && s.begins_with(
"true"))
149 if(s.len >= 4 && s.begins_with(
"null"))
159 C4_ALWAYS_INLINE
size_t _extend_from_combined_newline(
char nl,
char following)
161 return (nl ==
'\n' && following ==
'\r') || (nl ==
'\r' && following ==
'\n');
165 inline substr from_next_line(substr rem)
167 size_t nlpos = rem.first_of(
"\r\n");
170 const char nl = rem[nlpos];
171 rem = rem.right_of(nlpos);
174 if(_extend_from_combined_newline(nl, rem.front()))
182 inline size_t _count_following_newlines(csubstr r,
size_t *C4_RESTRICT i)
184 RYML_ASSERT(r[*i] ==
'\n');
185 size_t numnl_following = 0;
187 for( ; *i < r.len; ++(*i))
189 if(r.str[*i] ==
'\n')
192 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
197 return numnl_following;
202 inline size_t _count_following_newlines(csubstr r,
size_t *C4_RESTRICT i,
size_t indentation)
204 RYML_ASSERT(r[*i] ==
'\n');
205 size_t numnl_following = 0;
209 for( ; *i < r.len; ++(*i))
211 if(r.str[*i] ==
'\n')
214 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
222 for( ; *i < r.len; ++(*i))
224 if(r.str[*i] ==
'\n')
228 size_t stop = *i + indentation;
229 for( ; *i < r.len; ++(*i))
231 if(r.str[*i] !=
' ' && r.str[*i] !=
'\r')
233 RYML_ASSERT(*i < stop);
238 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
244 return numnl_following;
254 template<
class EventHandler>
261 template<
class EventHandler>
266 , m_evt_handler(evt_handler)
267 , m_pending_anchors()
269 , m_was_inside_qmrk(false)
273 , m_newline_offsets()
274 , m_newline_offsets_size(0)
275 , m_newline_offsets_capacity(0)
276 , m_newline_offsets_buf()
278 RYML_CHECK(evt_handler);
281 template<
class EventHandler>
283 : m_options(that.m_options)
284 , m_file(that.m_file)
286 , m_evt_handler(that.m_evt_handler)
287 , m_pending_anchors(that.m_pending_anchors)
288 , m_pending_tags(that.m_pending_tags)
289 , m_was_inside_qmrk(
false)
293 , m_newline_offsets(that.m_newline_offsets)
294 , m_newline_offsets_size(that.m_newline_offsets_size)
295 , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
296 , m_newline_offsets_buf(that.m_newline_offsets_buf)
301 template<
class EventHandler>
303 : m_options(that.m_options)
304 , m_file(that.m_file)
306 , m_evt_handler(that.m_evt_handler)
307 , m_pending_anchors(that.m_pending_anchors)
308 , m_pending_tags(that.m_pending_tags)
309 , m_was_inside_qmrk(false)
313 , m_newline_offsets()
314 , m_newline_offsets_size()
315 , m_newline_offsets_capacity()
316 , m_newline_offsets_buf()
318 if(that.m_newline_offsets_capacity)
320 _resize_locations(that.m_newline_offsets_capacity);
321 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
322 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size *
sizeof(
size_t));
323 m_newline_offsets_size = that.m_newline_offsets_size;
327 template<
class EventHandler>
331 m_options = (that.m_options);
332 m_file = (that.m_file);
333 m_buf = (that.m_buf);
334 m_evt_handler = that.m_evt_handler;
335 m_pending_anchors = that.m_pending_anchors;
336 m_pending_tags = that.m_pending_tags;
337 m_was_inside_qmrk = that.m_was_inside_qmrk;
338 m_doc_empty = that.m_doc_empty;
339 m_prev_colon = that.m_prev_colon;
340 m_encoding = that.m_encoding;
341 m_newline_offsets = (that.m_newline_offsets);
342 m_newline_offsets_size = (that.m_newline_offsets_size);
343 m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
344 m_newline_offsets_buf = (that.m_newline_offsets_buf);
349 template<
class EventHandler>
355 m_options = (that.m_options);
356 m_file = (that.m_file);
357 m_buf = (that.m_buf);
358 m_evt_handler = that.m_evt_handler;
359 m_pending_anchors = that.m_pending_anchors;
360 m_pending_tags = that.m_pending_tags;
361 m_was_inside_qmrk = that.m_was_inside_qmrk;
362 m_doc_empty = that.m_doc_empty;
363 m_prev_colon = that.m_prev_colon;
364 m_encoding = that.m_encoding;
365 if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
366 _resize_locations(that.m_newline_offsets_capacity);
367 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
368 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
369 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size *
sizeof(
size_t));
370 m_newline_offsets_size = that.m_newline_offsets_size;
371 m_newline_offsets_buf = that.m_newline_offsets_buf;
376 template<
class EventHandler>
383 m_pending_anchors = {};
385 m_was_inside_qmrk =
false;
389 m_newline_offsets = {};
390 m_newline_offsets_size = {};
391 m_newline_offsets_capacity = {};
392 m_newline_offsets_buf = {};
395 template<
class EventHandler>
396 void ParseEngine<EventHandler>::_free()
398 if(m_newline_offsets)
400 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
401 m_newline_offsets =
nullptr;
402 m_newline_offsets_size = 0u;
403 m_newline_offsets_capacity = 0u;
404 m_newline_offsets_buf =
nullptr;
411 template<
class EventHandler>
412 void ParseEngine<EventHandler>::_reset()
414 m_pending_anchors = {};
417 m_was_inside_qmrk =
false;
420 if(m_options.locations())
422 _prepare_locations();
429 template<
class EventHandler>
430 void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena)
432 #define _ryml_relocate(s) \
433 if((s).is_sub(prev_arena)) \
435 (s).str = next_arena.str + ((s).str - prev_arena.str); \
439 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
441 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
443 #undef _ryml_relocate
446 template<
class EventHandler>
447 void ParseEngine<EventHandler>::_s_relocate_arena(
void* data, csubstr prev_arena, substr next_arena)
449 ((ParseEngine*)data)->_relocate_arena(prev_arena, next_arena);
455 template<
class EventHandler>
456 template<
class DumpFn>
457 void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn)
const
459 auto const *
const C4_RESTRICT st = m_evt_handler->m_curr;
460 auto const& lc = st->line_contents;
461 csubstr contents = lc.stripped;
465 size_t offs = 3u +
to_chars(substr{}, st->pos.line) +
to_chars(substr{}, st->pos.col);
468 detail::_dump(std::forward<DumpFn>(dumpfn),
"{}:", m_file);
469 offs += m_file.len + 1;
471 detail::_dump(std::forward<DumpFn>(dumpfn),
"{}:{}: ", st->pos.line, st->pos.col);
472 csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
473 csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr(
"..."));
474 detail::_dump(std::forward<DumpFn>(dumpfn),
"{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len);
476 size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin());
477 size_t lastcol = firstcol + lc.rem.len;
478 for(
size_t i = 0; i < offs + firstcol; ++i)
479 std::forward<DumpFn>(dumpfn)(
" ");
480 std::forward<DumpFn>(dumpfn)(
"^");
481 for(
size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i)
482 std::forward<DumpFn>(dumpfn)(
"~");
483 detail::_dump(std::forward<DumpFn>(dumpfn),
"{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
487 std::forward<DumpFn>(dumpfn)(
"\n");
494 detail::_dump(std::forward<DumpFn>(dumpfn),
"top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
502 template<
class EventHandler>
503 template<
class ...Args>
504 void ParseEngine<EventHandler>::_err(csubstr fmt, Args
const& C4_RESTRICT ...args)
const
507 detail::_SubstrWriter writer(errmsg);
508 auto dumpfn = [&writer](csubstr s){ writer.append(s); };
509 detail::_dump(dumpfn, fmt, args...);
513 m_evt_handler->cancel_parse();
514 m_evt_handler->m_stack.m_callbacks.m_error(errmsg, len, m_evt_handler->m_curr->pos, m_evt_handler->m_stack.m_callbacks.m_user_data);
520 template<
class EventHandler>
521 template<
class ...Args>
522 void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args
const& C4_RESTRICT ...args)
const
526 auto dumpfn = [](csubstr s){
if(s.str) fwrite(s.str, 1, s.len, stdout); };
527 detail::_dump(dumpfn, fmt, args...);
536 template<
class EventHandler>
537 bool ParseEngine<EventHandler>::_finished_file()
const
539 bool ret = m_evt_handler->m_curr->pos.offset >= m_buf.len;
542 _c4dbgp(
"finished file!!!");
547 template<
class EventHandler>
548 C4_HOT C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_finished_line()
const
550 return m_evt_handler->m_curr->line_contents.rem.empty();
556 template<
class EventHandler>
557 void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
559 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
565 _c4dbgpf(
"skip {} whitespace characters", pos);
566 _line_progressed(pos);
570 template<
class EventHandler>
571 void ParseEngine<EventHandler>::_maybe_skipchars(
char c)
573 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
574 if(rem.len && rem.str[0] == c)
576 size_t pos = rem.first_not_of(c);
579 _c4dbgpf(
"skip {}x'{}'", pos, c);
580 _line_progressed(pos);
584 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
585 template<
class EventHandler>
586 void ParseEngine<EventHandler>::_maybe_skipchars_up_to(
char c,
size_t max_to_skip)
588 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
589 if(rem.len && rem.str[0] == c)
591 size_t pos = rem.first_not_of(c);
594 if(pos > max_to_skip)
596 _c4dbgpf(
"skip {}x'{}'", pos, c);
597 _line_progressed(pos);
602 template<
class EventHandler>
604 void ParseEngine<EventHandler>::_skipchars(
const char (&chars)[N])
606 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars));
607 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
609 pos = m_evt_handler->m_curr->line_contents.rem.len;
610 _c4dbgpf(
"skip {} characters", pos);
611 _line_progressed(pos);
614 template<
class EventHandler>
615 void ParseEngine<EventHandler>::_skip_comment()
617 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'));
618 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full));
619 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
620 csubstr full = m_evt_handler->m_curr->line_contents.full;
622 if(!full.begins_with(
'#'))
624 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str > full.str);
625 const char c = full[(size_t)(rem.str - full.str - 1)];
626 if(C4_UNLIKELY(c !=
' ' && c !=
'\t'))
627 _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks,
"comment not preceded by whitespace");
631 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str == full.str);
633 _c4dbgpf(
"comment was '{}'", rem);
634 _line_progressed(rem.len);
637 template<
class EventHandler>
638 void ParseEngine<EventHandler>::_maybe_skip_comment()
640 csubstr s = m_evt_handler->m_curr->line_contents.rem.triml(
' ');
641 if(s.begins_with(
'#'))
643 _line_progressed((
size_t)(s.str - m_evt_handler->m_curr->line_contents.rem.str));
648 template<
class EventHandler>
649 bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
651 if(m_evt_handler->m_curr->line_contents.rem.len)
653 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
655 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
657 pos = m_evt_handler->m_curr->line_contents.rem.len;
658 _c4dbgpf(
"skip {}x'{}'", pos,
' ');
659 _line_progressed(pos);
661 if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] ==
':'))
663 _c4dbgp(
"found ':' colon next");
671 template<
class EventHandler>
672 bool ParseEngine<EventHandler>::_maybe_scan_following_comma() noexcept
674 if(m_evt_handler->m_curr->line_contents.rem.len)
676 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
678 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
680 pos = m_evt_handler->m_curr->line_contents.rem.len;
681 _c4dbgpf(
"skip {}x'{}'", pos,
' ');
682 _line_progressed(pos);
684 if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] ==
','))
686 _c4dbgp(
"found ',' comma next");
697 template<
class EventHandler>
698 csubstr ParseEngine<EventHandler>::_scan_anchor()
700 csubstr s = m_evt_handler->m_curr->line_contents.rem;
701 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'&'));
702 csubstr anchor = s.range(1, s.first_of(
' '));
703 _line_progressed(1u + anchor.len);
704 _maybe_skipchars(
' ');
708 template<
class EventHandler>
709 csubstr ParseEngine<EventHandler>::_scan_ref_seq()
711 csubstr s = m_evt_handler->m_curr->line_contents.rem;
712 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'));
713 csubstr ref = s.first(s.first_of(
",] :"));
714 _line_progressed(ref.len);
718 template<
class EventHandler>
719 csubstr ParseEngine<EventHandler>::_scan_ref_map()
721 csubstr s = m_evt_handler->m_curr->line_contents.rem;
722 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'));
723 csubstr ref = s.first(s.first_of(
",} "));
724 _line_progressed(ref.len);
728 template<
class EventHandler>
729 csubstr ParseEngine<EventHandler>::_scan_tag()
731 csubstr rem = m_evt_handler->m_curr->line_contents.rem.triml(
' ');
732 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with(
'!'));
734 if(rem.begins_with(
"!!"))
736 _c4dbgp(
"begins with '!!'");
738 t = rem.left_of(rem.first_of(
" ,"));
740 t = rem.left_of(rem.first_of(
' '));
742 else if(rem.begins_with(
"!<"))
744 _c4dbgp(
"begins with '!<'");
745 t = rem.left_of(rem.first_of(
'>'),
true);
747 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
748 else if(rem.begins_with(
"!h!"))
750 _c4dbgp(
"begins with '!h!'");
751 t = rem.left_of(rem.first_of(
' '));
756 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with(
'!'));
757 _c4dbgp(
"begins with '!'");
759 t = rem.left_of(rem.first_of(
" ,"));
761 t = rem.left_of(rem.first_of(
' '));
763 _line_progressed(t.len);
764 _maybe_skip_whitespace_tokens();
771 template<
class EventHandler>
772 bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
774 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.empty());
790 _c4dbgpf(
"not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
804 _c4err_(
"invalid token \":{}\"", _c4prc(s.str[1]));
811 _c4dbgpf(
"not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
833 _c4dbgpf(
"not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
839 _c4err_(
"invalid token \"?{}\"", _c4prc(s.str[1]));
858 template<
class EventHandler>
859 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
861 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP));
862 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK));
863 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ|
RSEQIMAP));
864 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
FLOW));
865 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL));
867 substr s = m_evt_handler->m_curr->line_contents.rem;
868 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
869 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
'\n'));
874 if(!_is_valid_start_scalar_plain_flow(s))
877 _c4dbgp(
"scanning seqflow scalar...");
879 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
880 bool needs_filter =
false;
883 _c4dbgpf(
"scanning scalar: curr line=[{}]~~~{}~~~", s.len, s);
884 for(
size_t i = 0; i < s.len; ++i)
886 const char c = s.str[i];
890 _c4dbgpf(
"found terminating character at {}: '{}'", i, c);
892 if(m_evt_handler->m_curr->pos.offset + i > start_offset)
898 _c4dbgp(
"at the beginning. no scalar here.");
903 _c4dbgpf(
"found terminating character at {}: '{}'", i, c);
908 _c4dbgp(
"found suspicious '#'");
911 _c4dbgpf(
"found terminating character at {}: '{}'", i, c);
917 _c4dbgp(
"found suspicious ':'");
920 const char next = s.str[i+1];
921 _c4dbgpf(
"next char is '{}'", _c4prc(next));
924 _c4dbgp(
"map starting!");
925 if(m_evt_handler->m_curr->pos.offset + i > start_offset)
927 _c4dbgp(
"scalar finished!");
933 _c4dbgp(
"at the beginning. no scalar here.");
939 _c4dbgp(
"it's a scalar indeed.");
943 else if(s.len == i+1)
945 _c4dbgp(
"':' at line end. map starting!");
953 _c4err_(
"invalid character: '{}'", c);
958 _line_progressed(s.len);
959 if(!_finished_file())
961 _c4dbgp(
"next line!");
967 _c4dbgp(
"file finished!");
970 s = m_evt_handler->m_curr->line_contents.rem;
977 sc->needs_filter = needs_filter;
979 _c4prscalar(
"scanned plain scalar", sc->scalar,
true);
984 template<
class EventHandler>
985 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
987 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ) || has_any(
RSEQIMAP));
988 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK));
989 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP|
RSEQIMAP));
990 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
FLOW));
991 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK));
993 substr s = m_evt_handler->m_curr->line_contents.rem;
994 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
999 if(!_is_valid_start_scalar_plain_flow(s))
1002 _c4dbgp(
"scanning scalar...");
1004 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1005 bool needs_filter =
false;
1008 for(
size_t i = 0; i < s.len; ++i)
1010 const char c = s.str[i];
1015 _line_progressed(i);
1016 _c4dbgpf(
"found terminating character: '{}'", c);
1019 if(s.len == i+1 || s.str[i+1] ==
' ' || s.str[i+1] ==
',' || s.str[i+1] ==
'}' _RYML_WITH_TAB_TOKENS(|| s.str[i+1] ==
'\t'))
1021 _line_progressed(i);
1022 _c4dbgpf(
"found terminating character: '{}'", c);
1028 _line_progressed(i);
1029 _c4err_(
"invalid character: '{}'", c);
1032 _line_progressed(i);
1036 _c4err_(
"invalid character: '{}'", c);
1041 _line_progressed(i);
1042 _c4dbgpf(
"found terminating character: '{}'", c);
1050 _c4dbgp(
"next line!");
1051 _line_progressed(s.len);
1052 if(!_finished_file())
1054 _c4dbgp(
"next line!");
1060 _c4dbgp(
"file finished!");
1063 s = m_evt_handler->m_curr->line_contents.rem;
1064 needs_filter =
true;
1070 sc->needs_filter = needs_filter;
1072 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1074 return sc->scalar.len > 0u;
1077 template<
class EventHandler>
1078 bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1080 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP));
1081 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK));
1082 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ));
1083 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
FLOW));
1085 substr s = m_evt_handler->m_curr->line_contents.rem;
1086 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
1091 _c4dbgp(
"scanning scalar...");
1098 _c4dbgp(
"not a scalar.");
1103 const size_t len = _is_special_json_scalar(s);
1106 sc->scalar = s.first(len);
1107 sc->needs_filter =
false;
1108 _c4dbgpf(
"special json scalar: '{}'", sc->scalar);
1109 _line_progressed(len);
1116 for( ; i < s.len; ++i)
1118 const char c = s.str[i];
1125 _c4dbgpf(
"found terminating character: '{}'", c);
1128 if(!i || s.str[i-1] ==
' ')
1130 _c4dbgpf(
"found terminating character: '{}'", c);
1141 if(C4_LIKELY(i > 0))
1143 _line_progressed(i);
1144 sc->scalar = s.first(i);
1145 sc->needs_filter =
false;
1146 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1153 template<
class EventHandler>
1154 bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1156 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ));
1157 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK));
1158 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP));
1159 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
FLOW));
1160 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL));
1162 substr s = m_evt_handler->m_curr->line_contents.rem;
1163 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
1168 _c4dbgp(
"scanning scalar...");
1171 const size_t len = _is_special_json_scalar(s);
1174 sc->scalar = s.first(len);
1175 sc->needs_filter =
false;
1176 _c4dbgpf(
"special json scalar: '{}'", sc->scalar);
1177 _line_progressed(len);
1184 for( ; i < s.len; ++i)
1186 const char c = s.str[i];
1193 _c4dbgpf(
"found terminating character: '{}'", c);
1196 if(!i || s.str[i-1] ==
' ')
1198 _c4dbgpf(
"found terminating character: '{}'", c);
1209 if(C4_LIKELY(i > 0))
1211 _line_progressed(i);
1212 sc->scalar = s.first(i);
1213 sc->needs_filter =
false;
1214 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1221 template<
class EventHandler>
1222 bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1224 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] ==
'-');
1225 return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_begin_token(s));
1228 template<
class EventHandler>
1229 bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1231 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] ==
'.');
1232 return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_end_token(s));
1235 template<
class EventHandler>
1236 bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc,
size_t indentation)
1238 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1239 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP));
1240 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
BLCK|
RUNK|
USTY));
1242 substr s = m_evt_handler->m_curr->line_contents.rem;
1243 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '));
1251 if(_is_blck_token(s))
1255 else if(_is_doc_begin(s))
1257 _c4dbgp(
"token is doc start");
1263 if(_is_blck_token(s))
1276 _c4dbgp(
"token is doc end");
1282 _c4dbgpf(
"plain scalar! indentation={}", indentation);
1284 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1285 const size_t start_line = m_evt_handler->m_curr->pos.line;
1287 bool needs_filter =
false;
1290 _c4dbgpf(
"plain scalar line: [{}]~~~{}~~~", s.len, s);
1291 for(
size_t i = 0; i < s.len; ++i)
1293 const char curr = s.str[i];
1298 _c4dbgpf(
"[{}]: got suspicious ':'", i);
1302 _c4dbgpf(
"followed by '{}'", i+1 == s.len ? csubstr(
"\\n") : _c4prc(s.str[i+1]));
1303 _line_progressed(i);
1305 if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1307 _c4dbgp(
"start line. scalar ends here");
1318 while(j + 1 < s.len && s.str[j+1] ==
':')
1320 _c4dbgp(
"skip colon");
1323 i = j > i ? j-1 : i;
1324 _c4dbgp(
"nothing to see here");
1328 _c4dbgp(
"got suspicious '#'");
1329 if(!i || (s.str[i-1] ==
' ' || s.str[i-1] ==
'\t'))
1331 _c4dbgp(
"comment! scalar ends here");
1332 _line_progressed(i);
1337 _c4dbgp(
"nothing to see here");
1342 _line_progressed(s.len);
1343 csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1344 next_peeked = next_peeked.trimr(
"\n\r");
1345 const size_t next_indentation = next_peeked.first_not_of(
' ');
1346 _c4dbgpf(
"indentation curr={} next={}", indentation, next_indentation);
1347 if(next_indentation < indentation)
1349 _c4dbgp(
"smaller indentation! scalar ended");
1352 else if(next_indentation == 0 && next_peeked.len > 0)
1354 const char first = next_peeked.str[0];
1358 next_peeked = next_peeked.trimr(
"\n\r");
1359 _c4dbgpf(
"doc begin? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ?
"..." :
"");
1360 if(_is_doc_begin_token(next_peeked))
1362 _c4dbgp(
"doc begin! scalar ended");
1367 next_peeked = next_peeked.trimr(
"\n\r");
1368 _c4dbgpf(
"doc end? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ?
"..." :
"");
1369 if(_is_doc_end_token(next_peeked))
1371 _c4dbgp(
"doc end! scalar ended");
1378 _c4dbgp(
"next line!");
1379 if(!_finished_file())
1381 _c4dbgp(
"next line!");
1387 _c4dbgp(
"file finished!");
1390 s = m_evt_handler->m_curr->line_contents.rem;
1391 needs_filter =
true;
1396 sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(
" \n\r\t");
1397 sc->needs_filter = needs_filter;
1399 _c4dbgpf(
"scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
1404 template<
class EventHandler>
1405 bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc)
1407 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP));
1408 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1409 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP));
1410 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ));
1411 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
BLCK));
1412 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL));
1413 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1416 template<
class EventHandler>
1417 bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc)
1419 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ));
1420 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1421 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP));
1422 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
BLCK));
1423 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK));
1424 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1427 template<
class EventHandler>
1428 bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc)
1430 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RUNK|
USTY));
1431 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1437 template<
class EventHandler>
1438 substr ParseEngine<EventHandler>::_peek_next_line(
size_t pos)
const
1442 pos = pos ==
npos ? m_evt_handler->m_curr->pos.offset : pos;
1443 if(pos >= m_buf.len)
1447 rem = from_next_line(m_buf.sub(pos));
1452 nlpos = rem.first_of(
"\r\n");
1454 nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1455 rem = rem.left_of(nlpos,
true);
1457 _c4dbgpf(
"peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr(
"\r\n"));
1461 _c4dbgpf(
"peek next line @ {}: (len=0)''", pos);
1467 template<
class EventHandler>
1468 void ParseEngine<EventHandler>::_scan_line()
1470 if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < m_buf.len))
1471 m_evt_handler->m_curr->line_contents.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
1473 m_evt_handler->m_curr->line_contents.reset(m_buf.last(0), m_buf.last(0));
1476 template<
class EventHandler>
1477 void ParseEngine<EventHandler>::_line_progressed(
size_t ahead)
1479 _c4dbgpf(
"line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->line_contents.full.len, ahead, m_evt_handler->m_curr->pos.col, m_evt_handler->m_curr->pos.col+ahead, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset+ahead);
1480 m_evt_handler->m_curr->pos.offset += ahead;
1481 m_evt_handler->m_curr->pos.col += ahead;
1482 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.stripped.len+1);
1483 m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1486 template<
class EventHandler>
1487 void ParseEngine<EventHandler>::_line_ended()
1489 _c4dbgpf(
"line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1490 m_evt_handler->m_curr->pos.line,
1491 m_evt_handler->m_curr->line_contents.full.len,
1492 m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len,
1493 m_evt_handler->m_curr->pos.col, 1);
1494 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.stripped.len + 1);
1495 m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
1496 ++m_evt_handler->m_curr->pos.line;
1497 m_evt_handler->m_curr->pos.col = 1;
1500 template<
class EventHandler>
1501 void ParseEngine<EventHandler>::_line_ended_undo()
1503 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u);
1504 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u);
1505 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len);
1506 const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
1507 _c4dbgpf(
"line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1508 m_evt_handler->m_curr->pos.offset -= delta;
1509 --m_evt_handler->m_curr->pos.line;
1510 m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.stripped.len + 1u;
1513 m_evt_handler->m_curr->line_contents.rem = m_buf.sub(m_evt_handler->m_curr->pos.offset, 0);
1518 template<
class EventHandler>
1519 void ParseEngine<EventHandler>::_set_indentation(
size_t indentation)
1521 m_evt_handler->m_curr->indref = indentation;
1522 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1525 template<
class EventHandler>
1526 void ParseEngine<EventHandler>::_save_indentation()
1528 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begin() >= m_evt_handler->m_curr->line_contents.full.begin());
1529 m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1530 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1536 template<
class EventHandler>
1537 void ParseEngine<EventHandler>::_end_map_blck()
1539 _c4dbgp(
"mapblck: end");
1542 _c4dbgp(
"mapblck: set missing val");
1543 _handle_annotations_before_blck_val_scalar();
1544 m_evt_handler->set_val_scalar_plain_empty();
1546 else if(has_any(
QMRK))
1548 _c4dbgp(
"mapblck: set missing keyval");
1549 _handle_annotations_before_blck_key_scalar();
1550 m_evt_handler->set_key_scalar_plain_empty();
1551 _handle_annotations_before_blck_val_scalar();
1552 m_evt_handler->set_val_scalar_plain_empty();
1554 m_evt_handler->end_map();
1557 template<
class EventHandler>
1558 void ParseEngine<EventHandler>::_end_seq_blck()
1562 _c4dbgp(
"seqblck: set missing val");
1563 _handle_annotations_before_blck_val_scalar();
1564 m_evt_handler->set_val_scalar_plain_empty();
1566 m_evt_handler->end_seq();
1569 template<
class EventHandler>
1570 void ParseEngine<EventHandler>::_end2_map()
1572 _c4dbgp(
"map: end");
1573 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP));
1580 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1581 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
USTY));
1582 m_evt_handler->_pop();
1586 template<
class EventHandler>
1587 void ParseEngine<EventHandler>::_end2_seq()
1589 _c4dbgp(
"seq: end");
1590 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ));
1597 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
FLOW));
1598 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
USTY));
1599 m_evt_handler->_pop();
1603 template<
class EventHandler>
1604 void ParseEngine<EventHandler>::_begin2_doc()
1608 m_evt_handler->begin_doc();
1609 m_evt_handler->m_curr->indref = 0;
1612 template<
class EventHandler>
1613 void ParseEngine<EventHandler>::_begin2_doc_expl()
1617 m_evt_handler->begin_doc_expl();
1618 m_evt_handler->m_curr->indref = 0;
1621 template<
class EventHandler>
1622 void ParseEngine<EventHandler>::_end2_doc()
1624 _c4dbgp(
"doc: end");
1625 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC));
1626 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1628 _c4dbgp(
"doc was empty; add empty val");
1629 _handle_annotations_before_blck_val_scalar();
1630 m_evt_handler->set_val_scalar_plain_empty();
1632 m_evt_handler->end_doc();
1635 template<
class EventHandler>
1636 void ParseEngine<EventHandler>::_end2_doc_expl()
1638 _c4dbgp(
"doc: end");
1639 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1641 _c4dbgp(
"doc: no children; add empty val");
1642 _handle_annotations_before_blck_val_scalar();
1643 m_evt_handler->set_val_scalar_plain_empty();
1645 m_evt_handler->end_doc_expl();
1648 template<
class EventHandler>
1649 void ParseEngine<EventHandler>::_maybe_begin_doc()
1653 _c4dbgp(
"doc must be started");
1657 template<
class EventHandler>
1658 void ParseEngine<EventHandler>::_maybe_end_doc()
1662 _c4dbgp(
"doc must be finished");
1665 else if(m_doc_empty && (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1667 _c4dbgp(
"no doc to finish, but pending annotations");
1668 m_evt_handler->begin_doc();
1669 _handle_annotations_before_blck_val_scalar();
1670 m_evt_handler->set_val_scalar_plain_empty();
1671 m_evt_handler->end_doc();
1675 template<
class EventHandler>
1676 void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1678 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
1679 if(m_evt_handler->m_stack[0].flags &
RDOC)
1681 _c4dbgp(
"root is RDOC");
1682 if(m_evt_handler->m_curr->level != 0)
1683 _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1685 else if((m_evt_handler->m_stack.size() > 1) && (m_evt_handler->m_stack[1].flags &
RDOC))
1687 _c4dbgp(
"root is STREAM");
1688 if(m_evt_handler->m_curr->level != 1)
1689 _handle_indentation_pop(&m_evt_handler->m_stack[1]);
1693 _c4err(
"internal error");
1695 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC));
1698 template<
class EventHandler>
1699 void ParseEngine<EventHandler>::_end_doc_suddenly()
1701 _c4dbgp(
"end doc suddenly");
1702 _end_doc_suddenly__pop();
1707 template<
class EventHandler>
1708 void ParseEngine<EventHandler>::_start_doc_suddenly()
1710 _c4dbgp(
"start doc suddenly");
1711 _end_doc_suddenly__pop();
1716 template<
class EventHandler>
1717 void ParseEngine<EventHandler>::_end_stream()
1719 _c4dbgpf(
"end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1721 _c4err(
"missing terminating ]");
1723 _c4err(
"missing terminating }");
1724 if(m_evt_handler->m_stack.size() > 1)
1725 _handle_indentation_pop(m_evt_handler->m_stack.begin());
1732 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1736 m_evt_handler->begin_doc();
1737 _handle_annotations_before_blck_val_scalar();
1738 m_evt_handler->set_val_scalar_plain_empty();
1739 m_evt_handler->end_doc();
1743 m_evt_handler->end_stream();
1747 template<
class EventHandler>
1748 void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState
const* popto)
1750 _c4dbgpf(
"popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ?
"s" :
""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
1751 while(m_evt_handler->m_curr != popto)
1755 _c4dbgpf(
"popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1758 else if(has_any(
RMAP))
1760 _c4dbgpf(
"popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
1768 _c4dbgpf(
"current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1771 template<
class EventHandler>
1772 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
1775 using state_type =
typename EventHandler::state;
1776 state_type
const* popto =
nullptr;
1777 auto &stack = m_evt_handler->m_stack;
1778 _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous());
1779 _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1780 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1785 for(state_type
const& s : stack)
1786 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1789 for(state_type
const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
1791 _c4dbgpf(
"searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
1792 if(s->indref == ind)
1794 _c4dbgpf(
"gotit!!! level={} node={}", s->level, s->node_id);
1799 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1801 _c4err(
"parse error: incorrect indentation?");
1803 _handle_indentation_pop(popto);
1806 template<
class EventHandler>
1807 void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
1810 using state_type =
typename EventHandler::state;
1811 auto &stack = m_evt_handler->m_stack;
1812 _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous());
1813 _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
1814 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
1815 state_type
const* popto =
nullptr;
1820 for(state_type
const& s : stack)
1821 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
1824 for(state_type
const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s)
1826 _c4dbgpf(
"searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
1831 else if(s->indref == ind)
1833 _c4dbgpf(
"same indentation!!! level={} node={}", s->level, s->node_id);
1834 if(popto && has_any(
RTOP, s) && has_none(
RMAP|
RSEQ, s))
1841 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1842 const size_t first = rem.first_not_of(
' ');
1843 _RYML_CB_ASSERT(stack.m_callbacks, first == ind || first ==
npos);
1844 rem = rem.right_of(first,
true);
1845 _c4dbgpf(
"indentless? rem='{}' first={}", rem, first);
1846 if(rem.begins_with(
'-') && _is_blck_token(rem))
1848 _c4dbgp(
"parent was indentless seq");
1854 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
1856 _c4err(
"parse error: incorrect indentation?");
1858 _handle_indentation_pop(popto);
1863 template<
class EventHandler>
1864 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
1870 size_t b = m_evt_handler->m_curr->pos.offset;
1871 substr s = m_buf.sub(b);
1872 if(s.begins_with(
' '))
1875 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1876 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1877 _line_progressed((
size_t)(s.begin() - m_buf.sub(b).begin()));
1879 b = m_evt_handler->m_curr->pos.offset;
1880 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'\''));
1883 _line_progressed(1);
1886 bool needs_filter =
false;
1888 size_t numlines = 1;
1890 while( ! _finished_file())
1892 const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1893 bool line_is_blank =
true;
1894 _c4dbgpf(
"scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_evt_handler->m_curr->pos.line, line);
1895 for(
size_t i = 0; i < line.len; ++i)
1897 const char curr = line.str[i];
1900 const char next = i+1 < line.len ? line.str[i+1] :
'~';
1908 needs_filter =
true;
1912 else if(curr !=
' ')
1914 line_is_blank =
false;
1919 needs_filter = needs_filter
1922 || (_at_line_begin() && line.begins_with(
' '));
1926 _line_progressed(line.len);
1931 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
1932 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] ==
'\'');
1933 _line_progressed(pos + 1);
1934 pos = m_evt_handler->m_curr->pos.offset - b - 1;
1944 _c4err(
"reached end of file while looking for closing quote");
1948 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
1949 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
1950 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() ==
'\'');
1951 s = s.sub(0, pos-1);
1954 _c4prscalar(
"scanned squoted scalar", s,
true);
1956 return ScannedScalar { s, needs_filter };
1961 template<
class EventHandler>
1962 typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
1968 size_t b = m_evt_handler->m_curr->pos.offset;
1969 substr s = m_buf.sub(b);
1970 if(s.begins_with(
' '))
1973 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
1974 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
1975 _line_progressed((
size_t)(s.begin() - m_buf.sub(b).begin()));
1977 b = m_evt_handler->m_curr->pos.offset;
1978 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'"'));
1981 _line_progressed(1);
1984 bool needs_filter =
false;
1986 size_t numlines = 1;
1988 while( ! _finished_file())
1990 const csubstr line = m_evt_handler->m_curr->line_contents.rem;
1991 #if defined(__GNUC__) && __GNUC__ == 11
1992 C4_DONT_OPTIMIZE(line);
1994 bool line_is_blank =
true;
1995 _c4dbgpf(
"scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, line);
1996 for(
size_t i = 0; i < line.len; ++i)
1998 const char curr = line.str[i];
2000 line_is_blank =
false;
2004 const char next = i+1 < line.len ? line.str[i+1] :
'~';
2005 needs_filter =
true;
2006 if(next ==
'"' || next ==
'\\')
2009 else if(curr ==
'"')
2017 needs_filter = needs_filter
2020 || (_at_line_begin() && line.begins_with(
' '));
2024 _line_progressed(line.len);
2029 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
2030 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] ==
'"');
2031 _line_progressed(pos + 1);
2032 pos = m_evt_handler->m_curr->pos.offset - b - 1;
2042 _c4err(
"reached end of file looking for closing quote");
2046 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
2047 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() ==
'"');
2048 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
2049 s = s.sub(0, pos-1);
2052 _c4prscalar(
"scanned dquoted scalar", s,
true);
2054 return ScannedScalar { s, needs_filter };
2059 template<
class EventHandler>
2060 void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb,
size_t indref)
2062 _c4dbgpf(
"blck: indref={}", indref);
2063 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, indref !=
npos);
2066 csubstr s = m_evt_handler->m_curr->line_contents.rem;
2067 csubstr trimmed = s.triml(
' ');
2068 if(trimmed.str > s.str)
2070 _c4dbgp(
"skipping whitespace");
2071 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, trimmed.str >= s.str);
2072 _line_progressed(
static_cast<size_t>(trimmed.str - s.str));
2075 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'|') || s.begins_with(
'>'));
2077 _c4dbgpf(
"blck: specs=[{}]~~~{}~~~", s.len, s);
2080 BlockChomp_e chomp = CHOMP_CLIP;
2081 size_t indentation =
npos;
2085 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(
"|>"));
2086 csubstr t = s.sub(1);
2087 _c4dbgpf(
"blck: spec is multichar: '{}'", t);
2088 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, t.len >= 1);
2089 size_t pos = t.first_of(
"-+");
2090 _c4dbgpf(
"blck: spec chomp char at {}", pos);
2094 chomp = CHOMP_STRIP;
2095 else if(t[pos] ==
'+')
2103 digits = t.left_of(t.first_not_of(
"0123456789"));
2104 if( ! digits.empty())
2106 if(C4_UNLIKELY(digits.len > 1))
2107 _c4err(
"parse error: invalid indentation");
2108 _c4dbgpf(
"blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
2109 if(C4_UNLIKELY( !
c4::atou(digits, &indentation)))
2110 _c4err(
"parse error: could not read indentation as decimal");
2111 if(C4_UNLIKELY( ! indentation))
2112 _c4err(
"parse error: null indentation");
2113 _c4dbgpf(
"blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2114 indentation += m_evt_handler->m_curr->indref;
2118 _c4dbgpf(
"blck: style={} chomp={} indentation={}", s.begins_with(
'>') ?
"fold" :
"literal", chomp==CHOMP_CLIP ?
"clip" : (chomp==CHOMP_STRIP ?
"strip" :
"keep"), indentation);
2121 _line_progressed(s.len);
2126 substr raw_block(m_buf.data() + m_evt_handler->m_curr->pos.offset,
size_t(0));
2127 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.begin());
2135 size_t num_lines = 0;
2136 size_t first = m_evt_handler->m_curr->pos.line;
2137 size_t provisional_indentation =
npos;
2139 while(( ! _finished_file()))
2142 lc.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
2143 #if defined(__GNUC__) && (__GNUC__ == 12 || __GNUC__ == 13)
2144 C4_DONT_OPTIMIZE(lc.rem);
2146 _c4dbgpf(
"blck: peeking at [{}]~~~{}~~~", lc.stripped.len, lc.stripped);
2148 if(indentation !=
npos)
2150 _c4dbgpf(
"blck: indentation={}", indentation);
2152 if(lc.indentation < indentation && ( ! lc.rem.trim(
" \t").empty()))
2156 _c4dbgpf(
"blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2160 _c4err(
"indentation decreased without any scalar");
2164 else if(indentation == 0)
2166 _c4dbgpf(
"blck: noindent. lc.rem=[{}]~~~{}~~~", lc.rem.len, lc.rem);
2167 if(_is_doc_token(lc.rem))
2169 _c4dbgp(
"blck: stop. indentation=0 and doc ended");
2176 const size_t fns = lc.stripped.first_not_of(
' ');
2177 _c4dbgpf(
"blck: indentation ref not set. firstnonws={}", fns);
2181 if(C4_UNLIKELY(lc.stripped.begins_with(
'\t')))
2184 _c4dbgpf(
"blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2185 if(provisional_indentation ==
npos)
2187 if(lc.indentation < indref)
2189 _c4dbgpf(
"blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2190 if(raw_block.len == 0)
2192 _c4dbgp(
"blck: was empty, undo next line");
2197 else if(lc.indentation == m_evt_handler->m_curr->indref)
2201 _c4dbgpf(
"blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2205 _c4dbgpf(
"blck: set indentation ref from this line: ref={}", lc.indentation);
2206 indentation = lc.indentation;
2210 if(lc.indentation >= provisional_indentation)
2212 _c4dbgpf(
"blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2214 indentation = lc.indentation;
2225 _c4dbgpf(
"blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.stripped.len, lc.indentation, provisional_indentation);
2226 if(provisional_indentation !=
npos)
2228 if(lc.stripped.len >= provisional_indentation)
2230 _c4dbgpf(
"blck: increase provisional_ref {} -> {}", provisional_indentation, lc.stripped.len);
2231 provisional_indentation = lc.stripped.len;
2233 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
2234 else if(lc.indentation >= provisional_indentation && lc.indentation !=
npos)
2236 _c4dbgpf(
"blck: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation);
2237 provisional_indentation = lc.indentation;
2243 provisional_indentation = lc.indentation ? lc.indentation : has_any(
RSEQ|
RVAL);
2244 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2245 if(provisional_indentation ==
npos)
2247 provisional_indentation = lc.stripped.len ? lc.stripped.len : has_any(
RSEQ|
RVAL);
2248 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2250 if(provisional_indentation < indref)
2252 provisional_indentation = indref;
2253 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2259 m_evt_handler->m_curr->line_contents = lc;
2260 _c4dbgpf(
"blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2261 raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2262 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2266 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0));
2267 C4_UNUSED(num_lines);
2270 if(indentation ==
npos)
2272 _c4dbgpf(
"blck: set indentation from provisional: {}", provisional_indentation);
2273 indentation = provisional_indentation;
2279 _c4prscalar(
"scanned block", raw_block,
true);
2281 sb->scalar = raw_block;
2282 sb->indentation = indentation;
2294 #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2296 #define _c4dbgfws(...)
2299 template<
class EventHandler>
2300 template<
class FilterProcessor>
2301 bool ParseEngine<EventHandler>::_filter_ws_handle_to_first_non_space(FilterProcessor &proc)
2303 _c4dbgfws(
"found whitespace '{}'", _c4prc(proc.curr()));
2304 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
' ' || proc.curr() ==
'\t');
2306 const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(
" \t", proc.rpos) : proc.src.first_not_of(
' ', proc.rpos);
2307 if(first_pos !=
npos)
2309 const char first_char = proc.src[first_pos];
2310 _c4dbgfws(
"firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2311 if(first_char ==
'\n' || first_char ==
'\r')
2313 _c4dbgfws(
"whitespace is trailing on line",
"");
2314 proc.skip(first_pos - proc.rpos);
2319 _c4dbgfws(
"legit whitespace. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2323 _c4dbgfws(
"whitespace is trailing on line",
"");
2327 template<
class EventHandler>
2328 template<
class FilterProcessor>
2329 void ParseEngine<EventHandler>::_filter_ws_copy_trailing(FilterProcessor &proc)
2331 if(!_filter_ws_handle_to_first_non_space(proc))
2333 _c4dbgfws(
"... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2334 proc.copy(proc.src.len - proc.rpos);
2338 template<
class EventHandler>
2339 template<
class FilterProcessor>
2340 void ParseEngine<EventHandler>::_filter_ws_skip_trailing(FilterProcessor &proc)
2342 if(!_filter_ws_handle_to_first_non_space(proc))
2344 _c4dbgfws(
"... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2345 proc.skip(proc.src.len - proc.rpos);
2359 #define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2361 #define _c4dbgfps(fmt, ...)
2364 template<
class EventHandler>
2365 template<
class FilterProcessor>
2366 void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation)
2368 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
'\n');
2370 _c4dbgfps(
"found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2371 size_t ii = proc.rpos;
2372 const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2375 proc.set(
'\n', numnl_following);
2376 _c4dbgfps(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2380 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2384 _c4dbgfps(
"single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2388 _c4dbgfps(
"last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2395 template<
class EventHandler>
2396 template<
class FilterProcessor>
2397 auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc,
size_t indentation) -> decltype(proc.result())
2399 _RYML_CB_ASSERT(this->callbacks(), indentation !=
npos);
2400 _c4dbgfps(
"before=[{}]~~~{}~~~", proc.src.len, proc.src);
2402 while(proc.has_more_chars())
2404 const char curr = proc.curr();
2405 _c4dbgfps(
"'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2410 _c4dbgfps(
"whitespace", curr);
2411 _filter_ws_skip_trailing(proc);
2414 _c4dbgfps(
"newline", curr);
2415 _filter_nl_plain(proc, indentation);
2418 _c4dbgfps(
"carriage return, ignore", curr);
2427 _c4dbgfps(
"after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2429 return proc.result();
2435 template<
class EventHandler>
2438 FilterProcessorSrcDst proc(scalar, dst);
2439 return _filter_plain(proc, indentation);
2442 template<
class EventHandler>
2445 FilterProcessorInplaceEndExtending proc(dst, cap);
2446 return _filter_plain(proc, indentation);
2457 #define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2459 #define _c4dbgfsq(fmt, ...)
2462 template<
class EventHandler>
2463 template<
class FilterProcessor>
2464 void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2466 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
'\n');
2468 _c4dbgfsq(
"found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2469 size_t ii = proc.rpos;
2470 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2473 proc.set(
'\n', numnl_following);
2474 _c4dbgfsq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2478 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2482 _c4dbgfsq(
"single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2487 _c4dbgfsq(
"single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2493 template<
class EventHandler>
2494 template<
class FilterProcessor>
2495 auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2497 _c4dbgfsq(
"before=[{}]~~~{}~~~", proc.src.len, proc.src);
2501 while(proc.has_more_chars())
2503 const char curr = proc.curr();
2504 _c4dbgfsq(
"'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2509 _c4dbgfsq(
"whitespace", curr);
2510 _filter_ws_copy_trailing(proc);
2513 _c4dbgfsq(
"newline", curr);
2514 _filter_nl_squoted(proc);
2517 _c4dbgfsq(
"skip cr", curr);
2521 _c4dbgfsq(
"squote", curr);
2522 if(proc.next() ==
'\'')
2524 _c4dbgfsq(
"two consecutive squotes", curr);
2539 _c4dbgfsq(
": #filteredchars={} after=~~~[{}]{}~~~", proc.src.len-proc.sofar().len, proc.sofar().len, proc.sofar());
2541 return proc.result();
2546 template<
class EventHandler>
2549 FilterProcessorSrcDst proc(scalar, dst);
2550 return _filter_squoted(proc);
2553 template<
class EventHandler>
2556 FilterProcessorInplaceEndExtending proc(dst, cap);
2557 return _filter_squoted(proc);
2568 #define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2570 #define _c4dbgfdq(...)
2573 template<
class EventHandler>
2574 template<
class FilterProcessor>
2575 void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2577 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
'\n');
2579 _c4dbgfdq(
"found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2580 size_t ii = proc.rpos;
2581 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2584 proc.set(
'\n', numnl_following);
2585 _c4dbgfdq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2589 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2593 _c4dbgfdq(
"single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2598 _c4dbgfdq(
"single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
2600 if(ii < proc.src.len && proc.src.str[ii] ==
'\\')
2602 _c4dbgfdq(
"backslash at [{}]", ii);
2603 const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] :
'\0';
2604 if(next ==
' ' || next ==
'\t')
2606 _c4dbgfdq(
"extend skip to backslash",
"");
2614 template<
class EventHandler>
2615 template<
class FilterProcessor>
2616 void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2618 char next = proc.next();
2619 _c4dbgfdq(
"backslash, next='{}'", _c4prc(next));
2622 if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] ==
'\n')
2626 _c4dbgfdq(
"[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2632 size_t ii = proc.rpos + 2;
2633 for( ; ii < proc.src.len; ++ii)
2636 if(proc.src.str[ii] ==
' ' || proc.src.str[ii] ==
'\t')
2641 proc.skip(ii - proc.rpos);
2643 else if(next ==
'"' || next ==
'/' || next ==
' ' || next ==
'\t')
2646 proc.translate_esc(next);
2647 _c4dbgfdq(
"here, used '{}'", _c4prc(next));
2649 else if(next ==
'\r')
2653 else if(next ==
'n')
2655 proc.translate_esc(
'\n');
2657 else if(next ==
'r')
2659 proc.translate_esc(
'\r');
2661 else if(next ==
't')
2663 proc.translate_esc(
'\t');
2665 else if(next ==
'\\')
2667 proc.translate_esc(
'\\');
2669 else if(next ==
'x')
2671 if(C4_UNLIKELY(proc.rpos + 1u + 2u >= proc.src.len))
2672 _c4err_(
"\\x requires 2 hex digits. scalar pos={}", proc.rpos);
2674 csubstr codepoint = proc.src.sub(proc.rpos + 2u, 2u);
2675 _c4dbgfdq(
"utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2676 uint32_t codepoint_val = {};
2677 if(C4_UNLIKELY(!
read_hex(codepoint, &codepoint_val)))
2678 _c4err_(
"failed to read \\x codepoint. scalar pos={}", proc.rpos);
2679 const size_t numbytes =
decode_code_point((uint8_t*)readbuf,
sizeof(readbuf), codepoint_val);
2680 if(C4_UNLIKELY(numbytes == 0))
2681 _c4err_(
"failed to decode code point={}", proc.rpos);
2682 _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2683 proc.translate_esc_bulk(readbuf, numbytes, 3u);
2684 _c4dbgfdq(
"utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2686 else if(next ==
'u')
2688 if(C4_UNLIKELY(proc.rpos + 1u + 4u >= proc.src.len))
2689 _c4err_(
"\\u requires 4 hex digits. scalar pos={}", proc.rpos);
2691 csubstr codepoint = proc.src.sub(proc.rpos + 2u, 4u);
2692 uint32_t codepoint_val = {};
2693 if(C4_UNLIKELY(!
read_hex(codepoint, &codepoint_val)))
2694 _c4err_(
"failed to parse \\u codepoint. scalar pos={}", proc.rpos);
2695 const size_t numbytes =
decode_code_point((uint8_t*)readbuf,
sizeof(readbuf), codepoint_val);
2696 if(C4_UNLIKELY(numbytes == 0))
2697 _c4err_(
"failed to decode code point={}", proc.rpos);
2698 _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2699 proc.translate_esc_bulk(readbuf, numbytes, 5u);
2701 else if(next ==
'U')
2703 if(C4_UNLIKELY(proc.rpos + 1u + 8u >= proc.src.len))
2704 _c4err_(
"\\U requires 8 hex digits. scalar pos={}", proc.rpos);
2706 csubstr codepoint = proc.src.sub(proc.rpos + 2u, 8u);
2707 uint32_t codepoint_val = {};
2708 if(C4_UNLIKELY(!
read_hex(codepoint, &codepoint_val)))
2709 _c4err_(
"failed to parse \\U codepoint. scalar pos={}", proc.rpos);
2710 const size_t numbytes =
decode_code_point((uint8_t*)readbuf,
sizeof(readbuf), codepoint_val);
2711 if(C4_UNLIKELY(numbytes == 0))
2712 _c4err_(
"failed to decode code point={}", proc.rpos);
2713 _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
2714 proc.translate_esc_bulk(readbuf, numbytes, 9u);
2717 else if(next ==
'0')
2719 proc.translate_esc(
'\0');
2721 else if(next ==
'b')
2723 proc.translate_esc(
'\b');
2725 else if(next ==
'f')
2727 proc.translate_esc(
'\f');
2729 else if(next ==
'a')
2731 proc.translate_esc(
'\a');
2733 else if(next ==
'v')
2735 proc.translate_esc(
'\v');
2737 else if(next ==
'e')
2739 proc.translate_esc(
'\x1b');
2741 else if(next ==
'_')
2744 const char payload[] = {
2745 _RYML_CHCONST(-0x3e, 0xc2),
2746 _RYML_CHCONST(-0x60, 0xa0),
2748 proc.translate_esc_bulk(payload, 2, 1);
2750 else if(next ==
'N')
2753 const char payload[] = {
2754 _RYML_CHCONST(-0x3e, 0xc2),
2755 _RYML_CHCONST(-0x7b, 0x85),
2757 proc.translate_esc_bulk(payload, 2, 1);
2759 else if(next ==
'L')
2762 const char payload[] = {
2763 _RYML_CHCONST(-0x1e, 0xe2),
2764 _RYML_CHCONST(-0x80, 0x80),
2765 _RYML_CHCONST(-0x58, 0xa8),
2767 proc.translate_esc_extending(payload, 3, 1);
2769 else if(next ==
'P')
2772 const char payload[] = {
2773 _RYML_CHCONST(-0x1e, 0xe2),
2774 _RYML_CHCONST(-0x80, 0x80),
2775 _RYML_CHCONST(-0x57, 0xa9),
2777 proc.translate_esc_extending(payload, 3, 1);
2779 else if(next ==
'\0')
2785 _c4err_(
"unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2787 _c4dbgfdq(
"backslash...sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
2791 template<
class EventHandler>
2792 template<
class FilterProcessor>
2793 auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2795 _c4dbgfdq(
"before=[{}]~~~{}~~~", proc.src.len, proc.src);
2798 while(proc.has_more_chars())
2800 const char curr = proc.curr();
2801 _c4dbgfdq(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
2807 _c4dbgfdq(
"whitespace", curr);
2808 _filter_ws_copy_trailing(proc);
2813 _c4dbgfdq(
"newline", curr);
2814 _filter_nl_dquoted(proc);
2819 _c4dbgfdq(
"carriage return, ignore", curr);
2825 _filter_dquoted_backslash(proc);
2835 _c4dbgfdq(
"after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
2836 return proc.result();
2842 template<
class EventHandler>
2845 FilterProcessorSrcDst proc(scalar, dst);
2846 return _filter_dquoted(proc);
2849 template<
class EventHandler>
2852 FilterProcessorInplaceMidExtending proc(dst, cap);
2853 return _filter_dquoted(proc);
2862 C4_NO_INLINE
inline size_t _find_last_newline_and_larger_indentation(csubstr s,
size_t indentation) noexcept
2864 if(indentation + 1 > s.len)
2866 for(
size_t i = s.len-indentation-1; i !=
size_t(-1); --i)
2868 if(s.str[i] ==
'\n')
2870 csubstr rem = s.sub(i + 1);
2871 size_t first = rem.first_not_of(
' ');
2872 first = (first !=
npos) ? first : rem.len;
2873 if(first > indentation)
2880 template<
class EventHandler>
2881 template<
class FilterProcessor>
2882 void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp,
size_t indentation)
2884 _RYML_CB_ASSERT(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP);
2885 _RYML_CB_ASSERT(this->callbacks(), proc.rem().first_not_of(
" \n\r") ==
npos);
2889 #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2891 #define _c4dbgchomp(...)
2896 size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
2899 _c4dbgchomp(
"found newline and larger indentation. last={}", last);
2900 last = proc.rpos + last + size_t(1) + indentation;
2901 _RYML_CB_ASSERT(this->callbacks(), last <= proc.src.len);
2903 while((proc.rpos < last) && proc.has_more_chars())
2905 const char curr = proc.curr();
2906 _c4dbgchomp(
"curr='{}'", _c4prc(curr));
2911 _c4dbgchomp(
"newline! remlen={}", proc.rem().len);
2914 csubstr at_next_line = proc.rem();
2915 if(at_next_line.begins_with(
' '))
2917 _c4dbgchomp(
"next line begins with spaces. indentation={}", indentation);
2919 size_t first_non_space = at_next_line.first_not_of(
' ');
2920 _c4dbgchomp(
"first_non_space={}", first_non_space);
2921 if(first_non_space ==
npos)
2923 _c4dbgchomp(
"{} spaces, to the end", at_next_line.len);
2924 first_non_space = at_next_line.len;
2926 if(first_non_space <= indentation)
2928 _c4dbgchomp(
"skip spaces={}<=indentation={}", first_non_space, indentation);
2929 proc.skip(first_non_space);
2933 _c4dbgchomp(
"skip indentation={}<spaces={}", indentation, first_non_space);
2934 proc.skip(indentation);
2936 _c4dbgchomp(
"copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
2937 proc.copy(first_non_space - indentation);
2958 bool had_one =
false;
2959 while(proc.has_more_chars())
2961 const char curr = proc.curr();
2962 _c4dbgchomp(
"CLIP: '{}'", _c4prc(curr));
2967 _c4dbgchomp(
"copy newline!", curr);
2975 _c4dbgchomp(
"skip!", curr);
2982 _c4dbgchomp(
"chomp=CLIP: add missing newline @{}", proc.wpos);
2989 _c4dbgchomp(
"chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
2990 while(proc.has_more_chars())
2992 const char curr = proc.curr();
2993 _c4dbgchomp(
"KEEP: '{}'", _c4prc(curr));
2997 _c4dbgchomp(
"copy newline!", curr);
3002 _c4dbgchomp(
"skip!", curr);
3011 _c4dbgchomp(
"chomp=STRIP: strip {} characters", proc.rem().len);
3023 #define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3025 #define _c4dbgfb(...)
3028 template<
class EventHandler>
3029 template<
class FilterProcessor>
3030 void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc,
size_t indentation)
3032 csubstr rem = proc.rem();
3035 size_t first = rem.first_not_of(
' ');
3038 _c4dbgfb(
"{} spaces follow before next nonws character", first);
3039 if(first < indentation)
3041 _c4dbgfb(
"skip {}<{} spaces from indentation", first, indentation);
3046 _c4dbgfb(
"skip {} spaces from indentation", indentation);
3047 proc.skip(indentation);
3050 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
3053 _c4dbgfb(
"all spaces to the end: {} spaces", first);
3057 if(first < indentation)
3059 _c4dbgfb(
"skip everything", first);
3060 proc.skip(proc.src.len - proc.rpos);
3064 _c4dbgfb(
"skip {} spaces from indentation", indentation);
3065 proc.skip(indentation);
3073 template<
class EventHandler>
3074 template<
class FilterProcessor>
3075 size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
3077 csubstr contents = proc.src.trimr(
" \n\r");
3078 _c4dbgfb(
"ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
3081 _c4dbgfb(
"ws: all whitespace: len={}", proc.src.len);
3082 if(chomp == CHOMP_KEEP && proc.src.len)
3084 _c4dbgfb(
"ws: chomp=KEEP all {} newlines", proc.src.count(
'\n'));
3085 while(proc.has_more_chars())
3087 const char curr = proc.curr();
3099 return contents.len;
3102 template<
class EventHandler>
3103 template<
class FilterProcessor>
3104 size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc,
size_t contents_len)
3106 _c4dbgfb(
"contents_len={}", contents_len);
3108 _RYML_CB_ASSERT(this->callbacks(), contents_len > 0u);
3112 size_t firstnewl = proc.src.first_of(
'\n', contents_len);
3113 if(firstnewl !=
npos)
3115 contents_len = firstnewl;
3116 _c4dbgfb(
"contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3120 contents_len = proc.src.len;
3121 _c4dbgfb(
"contents_len={} <--- src.len={}", contents_len, proc.src.len);
3124 return contents_len;
3136 #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3138 #define _c4dbgfbl(...)
3141 template<
class EventHandler>
3142 template<
class FilterProcessor>
3143 auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc,
size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3145 _c4dbgfbl(
"indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3147 size_t contents_len = _handle_all_whitespace(proc, chomp);
3149 return proc.result();
3151 contents_len = _extend_to_chomp(proc, contents_len);
3153 _c4dbgfbl(
"to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3155 _filter_block_indentation(proc, indentation);
3158 while(proc.has_more_chars(contents_len))
3160 const char curr = proc.curr();
3161 _c4dbgfbl(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3166 _c4dbgfbl(
"found newline. skip indentation on the next line", curr);
3168 _filter_block_indentation(proc, indentation);
3180 _c4dbgfbl(
"before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3182 _filter_chomp(proc, chomp, indentation);
3184 _c4dbgfbl(
"final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3186 return proc.result();
3191 template<
class EventHandler>
3194 FilterProcessorSrcDst proc(scalar, dst);
3195 return _filter_block_literal(proc, indentation, chomp);
3198 template<
class EventHandler>
3201 FilterProcessorInplaceEndExtending proc(scalar, cap);
3202 return _filter_block_literal(proc, indentation, chomp);
3212 #define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3214 #define _c4dbgfbf(...)
3218 template<
class EventHandler>
3219 template<
class FilterProcessor>
3220 void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len)
3222 _filter_block_indentation(proc, indentation);
3223 while(proc.has_more_chars(len))
3225 const char curr = proc.curr();
3226 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3230 _c4dbgfbf(
"newline.", curr);
3232 _filter_block_indentation(proc, indentation);
3240 size_t first = proc.rem().first_not_of(
" \t");
3241 _c4dbgfbf(
"space. first={}", first);
3243 first = proc.rem().len;
3244 _c4dbgfbf(
"... indentation increased to {}", first);
3245 _filter_block_folded_indented_block(proc, indentation, len, first);
3249 _c4dbgfbf(
"newl leading: not space, not newline. stop.", 0);
3255 template<
class EventHandler>
3256 template<
class FilterProcessor>
3257 size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc,
size_t num_newl,
size_t wpos_at_first_newl)
3262 _c4dbgfbf(
"... this is the first newline. turn into space. wpos={}", proc.wpos);
3263 wpos_at_first_newl = proc.wpos;
3268 _c4dbgfbf(
"... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3269 _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl !=
npos);
3270 _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
' ');
3271 _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos);
3273 proc.set_at(wpos_at_first_newl,
'\n');
3274 _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
'\n');
3277 _c4dbgfbf(
"... subsequent newline (num_newl={}). copy", num_newl);
3281 return wpos_at_first_newl;
3284 template<
class EventHandler>
3285 template<
class FilterProcessor>
3286 void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len)
3288 _RYML_CB_ASSERT(this->callbacks(), proc.curr() ==
'\n');
3289 size_t num_newl = 0;
3290 size_t wpos_at_first_newl =
npos;
3291 while(proc.has_more_chars(len))
3293 const char curr = proc.curr();
3294 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3299 _c4dbgfbf(
"newline. sofar={}", num_newl);
3335 wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3336 _filter_block_indentation(proc, indentation);
3342 size_t first = proc.rem().first_not_of(
" \t");
3343 _c4dbgfbf(
"space. first={}", first);
3345 first = proc.rem().len;
3346 _c4dbgfbf(
"... indentation increased to {}", first);
3349 _c4dbgfbf(
"... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3350 proc.set_at(wpos_at_first_newl,
'\n');
3354 _c4dbgfbf(
"... add missing newline", wpos_at_first_newl);
3357 _filter_block_folded_indented_block(proc, indentation, len, first);
3359 wpos_at_first_newl =
npos;
3366 _c4dbgfbf(
"not space, not newline. stop.", 0);
3373 template<
class EventHandler>
3374 template<
class FilterProcessor>
3375 void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc,
size_t indentation,
size_t len,
size_t curr_indentation) noexcept
3377 _RYML_CB_ASSERT(this->callbacks(), (proc.rem().first_not_of(
" \t") == curr_indentation) || (proc.rem().first_not_of(
" \t") ==
npos));
3378 if(curr_indentation)
3379 proc.copy(curr_indentation);
3380 while(proc.has_more_chars(len))
3382 const char curr = proc.curr();
3383 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3389 _filter_block_indentation(proc, indentation);
3390 csubstr rem = proc.rem();
3391 const size_t first = rem.first_not_of(
' ');
3392 _c4dbgfbf(
"newline. firstns={}", first);
3395 const char c = rem[first];
3396 _c4dbgfbf(
"firstns={}='{}'", first, _c4prc(c));
3397 if(c ==
'\n' || c ==
'\r')
3403 _c4dbgfbf(
"done with indented block", first);
3407 else if(first !=
npos)
3410 _c4dbgfbf(
"copy all {} spaces", first);
3428 template<
class EventHandler>
3429 template<
class FilterProcessor>
3430 auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc,
size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3432 _c4dbgfbf(
"indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
3434 size_t contents_len = _handle_all_whitespace(proc, chomp);
3436 return proc.result();
3438 contents_len = _extend_to_chomp(proc, contents_len);
3440 _c4dbgfbf(
"to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
3442 _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3445 while(proc.has_more_chars(contents_len))
3447 const char curr = proc.curr();
3448 _c4dbgfbf(
"'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
3453 _c4dbgfbf(
"found newline", curr);
3454 _filter_block_folded_newlines(proc, indentation, contents_len);
3466 _c4dbgfbf(
"before chomp: #tochomp={} sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
3468 _filter_chomp(proc, chomp, indentation);
3470 _c4dbgfbf(
"final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
3472 return proc.result();
3477 template<
class EventHandler>
3480 FilterProcessorSrcDst proc(scalar, dst);
3481 return _filter_block_folded(proc, indentation, chomp);
3484 template<
class EventHandler>
3487 FilterProcessorInplaceEndExtending proc(scalar, cap);
3488 return _filter_block_folded(proc, indentation, chomp);
3496 template<
class EventHandler>
3497 csubstr ParseEngine<EventHandler>::_filter_scalar_plain(substr s,
size_t indentation)
3499 _c4dbgpf(
"filtering plain scalar: s=[{}]~~~{}~~~", s.len, s);
3500 FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3501 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, r.valid());
3502 _c4dbgpf(
"filtering plain scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3508 template<
class EventHandler>
3509 csubstr ParseEngine<EventHandler>::_filter_scalar_squot(substr s)
3511 _c4dbgpf(
"filtering squo scalar: s=[{}]~~~{}~~~", s.len, s);
3512 FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3513 _RYML_CB_ASSERT(this->callbacks(), r.valid());
3514 _c4dbgpf(
"filtering squo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3521 template<
class EventHandler>
3522 csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
3524 _c4dbgpf(
"filtering dquo scalar: s=[{}]~~~{}~~~", s.len, s);
3525 FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3526 if(C4_LIKELY(r.valid()))
3528 _c4dbgpf(
"filtering dquo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
3533 const size_t len = r.required_len();
3534 _c4dbgpf(
"filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3535 substr dst = m_evt_handler->alloc_arena(len, &s);
3536 _c4dbgpf(
"filtering dquo scalar: dst.len={}", dst.len);
3539 _RYML_CB_ASSERT(this->callbacks(), dst.len == len);
3540 FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3541 _c4dbgpf(
"filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3542 _RYML_CB_ASSERT(this->callbacks(), rsd.required_len() <= len);
3543 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
3544 _c4dbgpf(
"filtering dquo scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
3554 template<
class EventHandler>
3555 csubstr ParseEngine<EventHandler>::_move_scalar_left_and_add_newline(substr s)
3559 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.str > m_buf.str);
3560 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.str-1 >= m_buf.str);
3562 memmove(s.str - 1, s.str, s.len);
3564 s.str[s.len] =
'\n';
3570 substr dst = m_evt_handler->alloc_arena(s.len + 1);
3572 memcpy(dst.str, s.str, s.len);
3578 template<
class EventHandler>
3579 csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s,
size_t indentation, BlockChomp_e chomp)
3581 _c4dbgpf(
"filtering block literal scalar: s=[{}]~~~{}~~~", s.len, s);
3582 FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3584 if(C4_LIKELY(r.valid()))
3590 _c4dbgpf(
"filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3591 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1);
3594 result = _move_scalar_left_and_add_newline(s);
3596 _c4dbgpf(
"filtering block literal scalar: success! s=[{}]~~~{}~~~", result.len, result);
3602 template<
class EventHandler>
3603 csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s,
size_t indentation, BlockChomp_e chomp)
3605 _c4dbgpf(
"filtering block folded scalar: s=[{}]~~~{}~~~", s.len, s);
3606 FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3608 if(C4_LIKELY(r.valid()))
3614 _c4dbgpf(
"filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3615 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1);
3618 result = _move_scalar_left_and_add_newline(s);
3620 _c4dbgpf(
"filtering block folded scalar: success! s=[{}]~~~{}~~~", result.len, result);
3627 template<
class EventHandler>
3628 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar
const& C4_RESTRICT sc,
size_t indentation)
3632 if(m_options.scalar_filtering())
3634 return _filter_scalar_plain(sc.scalar, indentation);
3638 _c4dbgp(
"plain scalar left unfiltered");
3639 m_evt_handler->mark_key_scalar_unfiltered();
3644 _c4dbgp(
"plain scalar doesn't need filtering");
3649 template<
class EventHandler>
3650 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar
const& C4_RESTRICT sc,
size_t indentation)
3654 if(m_options.scalar_filtering())
3656 return _filter_scalar_plain(sc.scalar, indentation);
3660 _c4dbgp(
"plain scalar left unfiltered");
3661 m_evt_handler->mark_val_scalar_unfiltered();
3666 _c4dbgp(
"plain scalar doesn't need filtering");
3674 template<
class EventHandler>
3675 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar
const& C4_RESTRICT sc)
3679 if(m_options.scalar_filtering())
3681 return _filter_scalar_squot(sc.scalar);
3685 _c4dbgp(
"squo key scalar left unfiltered");
3686 m_evt_handler->mark_key_scalar_unfiltered();
3691 _c4dbgp(
"squo key scalar doesn't need filtering");
3696 template<
class EventHandler>
3697 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar
const& C4_RESTRICT sc)
3701 if(m_options.scalar_filtering())
3703 return _filter_scalar_squot(sc.scalar);
3707 _c4dbgp(
"squo val scalar left unfiltered");
3708 m_evt_handler->mark_val_scalar_unfiltered();
3713 _c4dbgp(
"squo val scalar doesn't need filtering");
3721 template<
class EventHandler>
3722 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar
const& C4_RESTRICT sc)
3726 if(m_options.scalar_filtering())
3728 return _filter_scalar_dquot(sc.scalar);
3732 _c4dbgp(
"dquo scalar left unfiltered");
3733 m_evt_handler->mark_key_scalar_unfiltered();
3738 _c4dbgp(
"dquo scalar doesn't need filtering");
3743 template<
class EventHandler>
3744 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar
const& C4_RESTRICT sc)
3748 if(m_options.scalar_filtering())
3750 return _filter_scalar_dquot(sc.scalar);
3754 _c4dbgp(
"dquo scalar left unfiltered");
3755 m_evt_handler->mark_val_scalar_unfiltered();
3760 _c4dbgp(
"dquo scalar doesn't need filtering");
3768 template<
class EventHandler>
3769 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_literal(ScannedBlock
const& C4_RESTRICT sb)
3771 if(m_options.scalar_filtering())
3773 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3777 _c4dbgp(
"literal scalar left unfiltered");
3778 m_evt_handler->mark_key_scalar_unfiltered();
3783 template<
class EventHandler>
3784 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_literal(ScannedBlock
const& C4_RESTRICT sb)
3786 if(m_options.scalar_filtering())
3788 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3792 _c4dbgp(
"literal scalar left unfiltered");
3793 m_evt_handler->mark_val_scalar_unfiltered();
3801 template<
class EventHandler>
3802 csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_folded(ScannedBlock
const& C4_RESTRICT sb)
3804 if(m_options.scalar_filtering())
3806 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3810 _c4dbgp(
"folded scalar left unfiltered");
3811 m_evt_handler->mark_key_scalar_unfiltered();
3816 template<
class EventHandler>
3817 csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_folded(ScannedBlock
const& C4_RESTRICT sb)
3819 if(m_options.scalar_filtering())
3821 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3825 _c4dbgp(
"folded scalar left unfiltered");
3826 m_evt_handler->mark_val_scalar_unfiltered();
3838 template<
class EventHandler>
3839 void ParseEngine<EventHandler>::add_flags(
ParserFlag_t on, ParserState * s)
3841 char buf1_[64], buf2_[64], buf3_[64];
3842 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3843 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3844 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
3845 _c4dbgpf(
"state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
3849 template<
class EventHandler>
3852 char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
3853 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
3854 csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
3855 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
3856 csubstr buf4 = detail::_parser_flags_to_str(buf4_, ((s->flags|on)&(~off)));
3857 _c4dbgpf(
"state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
3862 template<
class EventHandler>
3863 void ParseEngine<EventHandler>::rem_flags(
ParserFlag_t off, ParserState * s)
3865 char buf1_[64], buf2_[64], buf3_[64];
3866 csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
3867 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
3868 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
3869 _c4dbgpf(
"state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
3873 inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf,
ParserFlag_t flags)
3876 bool gotone =
false;
3878 #define _prflag(fl) \
3879 if((flags & fl) == (fl)) \
3883 if(pos + 1 < buf.len) \
3887 csubstr fltxt = #fl; \
3888 if(pos + fltxt.len <= buf.len) \
3889 memcpy(buf.str + pos, fltxt.str, fltxt.len); \
3919 RYML_CHECK(pos <= buf.len);
3921 return buf.first(pos);
3931 template<
class EventHandler>
3934 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, loc.offset < m_buf.len);
3935 return m_buf.sub(loc.offset);
3938 template<
class EventHandler>
3941 if(C4_UNLIKELY(val ==
nullptr))
3942 return {m_file, 0, 0, 0};
3943 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_options.locations());
3946 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str);
3947 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len);
3948 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_options.locations());
3949 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
3950 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets !=
nullptr);
3951 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
3953 csubstr src = m_buf;
3954 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, val !=
nullptr || src.str ==
nullptr);
3955 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str ==
nullptr && val ==
nullptr));
3957 using lineptr_type =
size_t const* C4_RESTRICT;
3958 lineptr_type lineptr =
nullptr;
3959 size_t offset = (size_t)(val - src.begin());
3963 for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
3978 size_t count = m_newline_offsets_size;
3981 lineptr = m_newline_offsets;
3985 it = lineptr + step;
3997 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
3998 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
3999 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
4002 loc.offset = offset;
4003 loc.line = (size_t)(lineptr - m_newline_offsets);
4004 if(lineptr > m_newline_offsets)
4005 loc.col = (offset - *(lineptr-1) - 1u);
4011 template<
class EventHandler>
4012 void ParseEngine<EventHandler>::_prepare_locations()
4014 m_newline_offsets_buf = m_buf;
4015 size_t numnewlines = 1u + m_buf.count(
'\n');
4016 _resize_locations(numnewlines);
4017 m_newline_offsets_size = 0;
4018 for(
size_t i = 0; i < m_buf.len; i++)
4019 if(m_buf[i] ==
'\n')
4020 m_newline_offsets[m_newline_offsets_size++] = i;
4021 m_newline_offsets[m_newline_offsets_size++] = m_buf.len;
4022 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4025 template<
class EventHandler>
4026 void ParseEngine<EventHandler>::_resize_locations(
size_t numnewlines)
4028 if(numnewlines > m_newline_offsets_capacity)
4030 if(m_newline_offsets)
4031 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
4032 m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks,
size_t, numnewlines, m_newline_offsets);
4033 m_newline_offsets_capacity = numnewlines;
4037 template<
class EventHandler>
4038 bool ParseEngine<EventHandler>::_locations_dirty()
const
4040 return !m_newline_offsets_size;
4048 template<
class EventHandler>
4049 void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4052 if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4054 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
4056 _c4dbgpf(
"starts with whitespace: '{}'", _c4prc(m_evt_handler->m_curr->line_contents.rem.str[0]));
4060 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
4062 _c4dbgpf(
"it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4063 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4072 template<
class EventHandler>
4073 void ParseEngine<EventHandler>::_handle_colon()
4075 size_t curr = m_evt_handler->m_curr->pos.line;
4076 if(m_prev_colon !=
npos)
4078 if(curr == m_prev_colon)
4079 _c4err(
"two colons on same line");
4081 m_prev_colon = curr;
4084 template<
class EventHandler>
4085 void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str,
size_t indentation,
size_t line)
4087 _c4dbgpf(
"store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, str, indentation, line);
4088 if(C4_UNLIKELY(dst->num_entries >= C4_COUNTOF(dst->annotations)))
4089 _c4err(
"too many annotations");
4090 dst->annotations[dst->num_entries].str = str;
4091 dst->annotations[dst->num_entries].indentation = indentation;
4092 dst->annotations[dst->num_entries].line = line;
4096 template<
class EventHandler>
4097 void ParseEngine<EventHandler>::_clear_annotations(Annotation *C4_RESTRICT dst)
4099 dst->num_entries = 0;
4102 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
4103 template<
class EventHandler>
4104 bool ParseEngine<EventHandler>::_handle_indentation_from_annotations()
4106 if(m_pending_anchors.num_entries == 1u || m_pending_tags.num_entries == 1u)
4108 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries < 2u && m_pending_tags.num_entries < 2u);
4109 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.annotations[0].line < m_evt_handler->m_curr->pos.line);
4110 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.annotations[1].line < m_evt_handler->m_curr->pos.line);
4111 size_t to_skip = m_evt_handler->m_curr->indref;
4112 if(m_pending_anchors.num_entries)
4113 to_skip = m_pending_anchors.annotations[0].indentation > to_skip ? m_pending_anchors.annotations[0].indentation : to_skip;
4114 if(m_pending_tags.num_entries)
4115 to_skip = m_pending_tags.annotations[0].indentation > to_skip ? m_pending_tags.annotations[0].indentation : to_skip;
4116 _c4dbgpf(
"annotations pending, skip indentation up to {}!", to_skip);
4117 _maybe_skipchars_up_to(
' ', to_skip);
4124 template<
class EventHandler>
4125 bool ParseEngine<EventHandler>::_annotations_require_key_container()
const
4127 return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4130 template<
class EventHandler>
4131 void ParseEngine<EventHandler>::_check_tag(csubstr tag)
4133 if(!tag.begins_with(
"!<"))
4135 if(C4_UNLIKELY(tag.first_of(
"[]{},") !=
npos))
4136 _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks,
"tags must not contain any of '[]{},'", m_evt_handler->m_curr->pos);
4140 if(C4_UNLIKELY(!tag.ends_with(
'>')))
4141 _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks,
"malformed tag", m_evt_handler->m_curr->pos);
4145 template<
class EventHandler>
4146 void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4148 _c4dbgpf(
"annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4149 if(m_pending_tags.num_entries)
4151 _c4dbgpf(
"annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4152 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4154 _check_tag(m_pending_tags.annotations[0].str);
4155 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4156 _clear_annotations(&m_pending_tags);
4163 if(m_pending_anchors.num_entries)
4165 _c4dbgpf(
"annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4166 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4168 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4169 _clear_annotations(&m_pending_anchors);
4173 _c4err(
"too many anchors");
4178 template<
class EventHandler>
4179 void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4181 _c4dbgpf(
"annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4182 if(m_pending_tags.num_entries)
4184 _c4dbgpf(
"annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4185 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4187 _check_tag(m_pending_tags.annotations[0].str);
4188 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4189 _clear_annotations(&m_pending_tags);
4196 if(m_pending_anchors.num_entries)
4198 _c4dbgpf(
"annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4199 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4201 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4202 _clear_annotations(&m_pending_anchors);
4206 _c4err(
"too many anchors");
4211 template<
class EventHandler>
4212 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(
size_t current_line)
4214 _c4dbgpf(
"annotations_before_start_mapblck, current_line={}", current_line);
4215 if(m_pending_tags.num_entries == 2)
4217 _c4dbgp(
"2 tags, setting entry 0");
4218 _check_tag(m_pending_tags.annotations[0].str);
4219 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4221 else if(m_pending_tags.num_entries == 1)
4223 _c4dbgpf(
"1 tag. line={}, curr={}", m_pending_tags.annotations[0].line);
4224 if(m_pending_tags.annotations[0].line < current_line)
4226 _c4dbgp(
"...tag is for the map. setting it.");
4227 _check_tag(m_pending_tags.annotations[0].str);
4228 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4229 _clear_annotations(&m_pending_tags);
4233 if(m_pending_anchors.num_entries == 2)
4235 _c4dbgp(
"2 anchors, setting entry 0");
4236 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4238 else if(m_pending_anchors.num_entries == 1)
4240 _c4dbgpf(
"1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line);
4241 if(m_pending_anchors.annotations[0].line < current_line)
4243 _c4dbgp(
"...anchor is for the map. setting it.");
4244 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4245 _clear_annotations(&m_pending_anchors);
4250 template<
class EventHandler>
4251 void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4253 _c4dbgp(
"annotations_before_start_mapblck_as_key");
4254 if(m_pending_tags.num_entries == 2)
4256 _check_tag(m_pending_tags.annotations[0].str);
4257 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4259 if(m_pending_anchors.num_entries == 2)
4261 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4265 template<
class EventHandler>
4266 void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(
size_t key_indentation,
size_t key_line)
4268 _c4dbgp(
"annotations_after_start_mapblck");
4269 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2);
4270 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2);
4271 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4273 key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4274 switch(m_pending_tags.num_entries)
4277 _check_tag(m_pending_tags.annotations[0].str);
4278 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4279 _clear_annotations(&m_pending_tags);
4282 _check_tag(m_pending_tags.annotations[1].str);
4283 m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4284 _clear_annotations(&m_pending_tags);
4287 switch(m_pending_anchors.num_entries)
4290 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4291 _clear_annotations(&m_pending_anchors);
4294 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4295 _clear_annotations(&m_pending_anchors);
4299 _set_indentation(key_indentation);
4302 template<
class EventHandler>
4303 size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(
size_t val_indentation,
size_t val_line)
4305 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries || m_pending_anchors.num_entries);
4307 auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4308 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4310 auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4311 if(ann.line > curr->line)
4313 else if(ann.indentation < curr->indentation)
4316 for(
size_t j = 0; j < m_pending_tags.num_entries; ++j)
4318 auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4319 if(ann.line > curr->line)
4321 else if(ann.indentation < curr->indentation)
4324 return curr->line < val_line ? val_indentation : curr->indentation;
4327 template<
class EventHandler>
4328 void ParseEngine<EventHandler>::_handle_directive(csubstr rem)
4330 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.is_sub(m_evt_handler->m_curr->line_contents.rem));
4331 const size_t pos = rem.find(
'#');
4332 _c4dbgpf(
"handle_directive: pos={} rem={}", pos, rem);
4335 m_evt_handler->add_directive(rem);
4336 _line_progressed(rem.len);
4340 csubstr to_comment = rem.first(pos);
4341 csubstr trimmed = to_comment.trimr(
" \t");
4342 m_evt_handler->add_directive(trimmed);
4343 _line_progressed(pos);
4348 template<
class EventHandler>
4349 bool ParseEngine<EventHandler>::_handle_bom()
4351 const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4354 const csubstr rest = rem.sub(1);
4356 #define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f')
4357 if(rem.begins_with({
"\x00\x00\xfe\xff", 4}) || (rem.begins_with({
"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
4359 _c4dbgp(
"byte order mark: UTF32BE");
4361 _line_progressed(4);
4364 else if(rem.begins_with(
"\xff\xfe\x00\x00") || (rest.begins_with({
"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
4366 _c4dbgp(
"byte order mark: UTF32LE");
4368 _line_progressed(4);
4371 else if(rem.begins_with(
"\xfe\xff") || (rem.begins_with(
'\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
4373 _c4dbgp(
"byte order mark: UTF16BE");
4375 _line_progressed(2);
4378 else if(rem.begins_with(
"\xff\xfe") || (rest.begins_with(
'\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
4380 _c4dbgp(
"byte order mark: UTF16LE");
4382 _line_progressed(2);
4385 else if(rem.begins_with(
"\xef\xbb\xbf"))
4387 _c4dbgp(
"byte order mark: UTF8");
4389 _line_progressed(3);
4397 template<
class EventHandler>
4398 void ParseEngine<EventHandler>::_handle_bom(
Encoding_e enc)
4400 if(m_encoding ==
NOBOM)
4402 const bool is_beginning_of_file = m_evt_handler->m_curr->line_contents.rem.str == m_buf.str;
4403 if(enc ==
UTF8 || is_beginning_of_file)
4406 _c4err(
"non-UTF8 byte order mark can appear only at the beginning of the file");
4408 else if(enc != m_encoding)
4410 _c4err(
"byte order mark can only be set once");
4417 template<
class EventHandler>
4418 void ParseEngine<EventHandler>::_handle_seq_json()
4421 _c4dbgpf(
"handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4423 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4424 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ));
4425 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
FLOW));
4426 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT));
4427 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT));
4429 _handle_flow_skip_whitespace();
4430 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4436 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4437 const char first = rem.str[0];
4438 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
4443 _c4dbgp(
"seqjson[RVAL]: scanning double-quoted scalar");
4444 ScannedScalar sc = _scan_scalar_dquot();
4445 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4446 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4452 _c4dbgp(
"seqjson[RVAL]: start child seqjson");
4454 m_evt_handler->begin_seq_val_flow();
4456 _line_progressed(1);
4461 _c4dbgp(
"seqjson[RVAL]: start child mapjson");
4463 m_evt_handler->begin_map_val_flow();
4465 _line_progressed(1);
4466 goto seqjson_finish;
4470 _c4dbgp(
"seqjson[RVAL]: end!");
4472 m_evt_handler->end_seq();
4473 _line_progressed(1);
4475 goto seqjson_finish;
4481 if(_scan_scalar_seq_json(&sc))
4483 _c4dbgp(
"seqjson[RVAL]: it's a plain scalar.");
4484 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4485 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4497 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
4498 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4499 const char first = rem.str[0];
4500 _c4dbgpf(
"mapjson[RNXT]: '{}'", first);
4505 _c4dbgp(
"seqjson[RNXT]: expect next val");
4507 m_evt_handler->add_sibling();
4508 _line_progressed(1);
4513 _c4dbgp(
"seqjson[RNXT]: end!");
4514 m_evt_handler->end_seq();
4515 _line_progressed(1);
4516 goto seqjson_finish;
4524 _c4dbgt(
"seqjson: go again", 0);
4525 if(_finished_line())
4527 if(C4_LIKELY(!_finished_file()))
4535 _c4err(
"missing terminating ]");
4541 _c4dbgp(
"seqjson: finish");
4547 template<
class EventHandler>
4548 void ParseEngine<EventHandler>::_handle_map_json()
4551 _c4dbgpf(
"handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4553 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
4554 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
FLOW));
4555 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4556 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT));
4557 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT)));
4559 _handle_flow_skip_whitespace();
4560 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4566 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4567 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4568 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4569 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4570 const char first = rem.str[0];
4571 _c4dbgpf(
"mapjson[RKEY]: '{}'", first);
4576 _c4dbgp(
"mapjson[RKEY]: scanning double-quoted scalar");
4577 ScannedScalar sc = _scan_scalar_dquot();
4578 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4579 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4585 _c4dbgp(
"mapjson[RKEY]: end!");
4586 m_evt_handler->end_map();
4587 _line_progressed(1);
4588 goto mapjson_finish;
4594 else if(has_any(
RVAL))
4596 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4597 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4598 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4599 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4600 const char first = rem.str[0];
4601 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
4606 _c4dbgp(
"mapjson[RVAL]: scanning double-quoted scalar");
4607 ScannedScalar sc = _scan_scalar_dquot();
4608 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4609 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4615 _c4dbgp(
"mapjson[RVAL]: start val seqjson");
4617 m_evt_handler->begin_seq_val_flow();
4618 _set_indentation(m_evt_handler->m_parent->indref);
4620 _line_progressed(1);
4621 goto mapjson_finish;
4625 _c4dbgp(
"mapjson[RVAL]: start val mapjson");
4627 m_evt_handler->begin_map_val_flow();
4628 _set_indentation(m_evt_handler->m_parent->indref);
4630 _line_progressed(1);
4637 if(_scan_scalar_map_json(&sc))
4639 _c4dbgp(
"mapjson[RVAL]: plain scalar.");
4640 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4641 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4652 else if(has_any(
RKCL))
4654 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4655 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4656 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4657 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4658 const char first = rem.str[0];
4659 _c4dbgpf(
"mapjson[RKCL]: '{}'", first);
4662 _c4dbgp(
"mapjson[RKCL]: found the colon");
4664 _line_progressed(1);
4671 else if(has_any(
RNXT))
4673 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4674 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4675 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4676 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4677 _c4dbgpf(
"mapjson[RNXT]: '{}'", rem.str[0]);
4678 if(rem.begins_with(
','))
4680 _c4dbgp(
"mapjson[RNXT]: expect next keyval");
4681 m_evt_handler->add_sibling();
4683 _line_progressed(1);
4685 else if(rem.begins_with(
'}'))
4687 _c4dbgp(
"mapjson[RNXT]: end!");
4688 m_evt_handler->end_map();
4689 _line_progressed(1);
4690 goto mapjson_finish;
4699 _c4dbgt(
"mapjson: go again", 0);
4700 if(_finished_line())
4702 if(C4_LIKELY(!_finished_file()))
4710 _c4err(
"missing terminating }");
4716 _c4dbgp(
"mapjson: finish");
4722 template<
class EventHandler>
4723 void ParseEngine<EventHandler>::_handle_seq_imap()
4726 _c4dbgpf(
"handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4728 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQIMAP));
4729 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4730 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT|
QMRK|
RKCL));
4731 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == has_all(
RVAL) + has_all(
RNXT) + has_all(
QMRK) + has_all(
RKCL));
4732 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3);
4734 _handle_flow_skip_whitespace();
4735 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4741 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL));
4742 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4743 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4744 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4745 const char first = rem.str[0];
4746 _c4dbgpf(
"seqimap[RVAL]: '{}'", _c4prc(first));
4750 _c4dbgp(
"seqimap[RVAL]: scanning single-quoted scalar");
4751 sc = _scan_scalar_squot();
4752 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
4753 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
4754 m_evt_handler->end_map();
4755 goto seqimap_finish;
4757 else if(first ==
'"')
4759 _c4dbgp(
"seqimap[RVAL]: scanning double-quoted scalar");
4760 sc = _scan_scalar_dquot();
4761 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4762 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4763 m_evt_handler->end_map();
4764 goto seqimap_finish;
4767 else if(_scan_scalar_plain_map_flow(&sc))
4769 _c4dbgp(
"seqimap[RVAL]: it's a scalar.");
4770 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4771 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4772 m_evt_handler->end_map();
4773 goto seqimap_finish;
4775 else if(first ==
'[')
4777 _c4dbgp(
"seqimap[RVAL]: start child seqflow");
4779 m_evt_handler->begin_seq_val_flow();
4781 _set_indentation(m_evt_handler->m_parent->indref);
4782 _line_progressed(1);
4783 goto seqimap_finish;
4785 else if(first ==
'{')
4787 _c4dbgp(
"seqimap[RVAL]: start child mapflow");
4789 m_evt_handler->begin_map_val_flow();
4791 _set_indentation(m_evt_handler->m_parent->indref);
4792 _line_progressed(1);
4793 goto seqimap_finish;
4795 else if(first ==
',' || first ==
']')
4797 _c4dbgp(
"seqimap[RVAL]: finish without val.");
4798 m_evt_handler->set_val_scalar_plain_empty();
4799 m_evt_handler->end_map();
4800 goto seqimap_finish;
4802 else if(first ==
'&')
4804 csubstr anchor = _scan_anchor();
4805 _c4dbgp(
"seqimap[RVAL]: anchor!");
4806 m_evt_handler->set_val_anchor(anchor);
4808 else if(first ==
'*')
4810 csubstr ref = _scan_ref_seq();
4811 _c4dbgp(
"seqimap[RVAL]: ref!");
4812 m_evt_handler->set_val_ref(ref);
4820 else if(has_any(
RNXT))
4822 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
4823 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4824 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4825 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4826 const char first = rem.str[0];
4827 _c4dbgpf(
"seqimap[RNXT]: '{}'", _c4prc(first));
4828 if(first ==
',' || first ==
']')
4832 _c4dbgp(
"seqimap: done");
4833 m_evt_handler->end_map();
4834 goto seqimap_finish;
4841 else if(has_any(
QMRK))
4843 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
QMRK));
4844 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4845 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4846 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
4847 const char first = rem.str[0];
4848 _c4dbgpf(
"seqimap[QMRK]: '{}'", _c4prc(first));
4852 _c4dbgp(
"seqimap[QMRK]: scanning single-quoted scalar");
4853 sc = _scan_scalar_squot();
4854 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
4855 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
4859 else if(first ==
'"')
4861 _c4dbgp(
"seqimap[QMRK]: scanning double-quoted scalar");
4862 sc = _scan_scalar_dquot();
4863 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
4864 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
4869 else if(_scan_scalar_plain_map_flow(&sc))
4871 _c4dbgp(
"seqimap[QMRK]: it's a scalar.");
4872 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
4873 m_evt_handler->set_key_scalar_plain(maybe_filtered);
4877 else if(first ==
'[')
4879 _c4dbgp(
"seqimap[QMRK]: start child seqflow");
4881 m_evt_handler->begin_seq_key_flow();
4883 _set_indentation(m_evt_handler->m_parent->indref);
4884 _line_progressed(1);
4885 goto seqimap_finish;
4887 else if(first ==
'{')
4889 _c4dbgp(
"seqimap[QMRK]: start child mapflow");
4891 m_evt_handler->begin_map_key_flow();
4893 _set_indentation(m_evt_handler->m_parent->indref);
4894 _line_progressed(1);
4895 goto seqimap_finish;
4897 else if(first ==
',' || first ==
']')
4899 _c4dbgp(
"seqimap[QMRK]: finish without key.");
4900 m_evt_handler->set_key_scalar_plain_empty();
4901 m_evt_handler->set_val_scalar_plain_empty();
4902 m_evt_handler->end_map();
4903 goto seqimap_finish;
4905 else if(first ==
'&')
4907 csubstr anchor = _scan_anchor();
4908 _c4dbgp(
"seqimap[QMRK]: anchor!");
4909 m_evt_handler->set_key_anchor(anchor);
4911 else if(first ==
'*')
4913 csubstr ref = _scan_ref_seq();
4914 _c4dbgp(
"seqimap[QMRK]: ref!");
4915 m_evt_handler->set_key_ref(ref);
4923 else if(has_any(
RKCL))
4925 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
4926 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4927 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
4928 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKCL));
4929 const char first = rem.str[0];
4930 _c4dbgpf(
"seqimap[RKCL]: '{}'", _c4prc(first));
4933 _c4dbgp(
"seqimap[RKCL]: found ':'");
4935 _line_progressed(1);
4938 else if(first ==
',' || first ==
']')
4940 _c4dbgp(
"seqimap[RKCL]: found ','. finish without val");
4941 m_evt_handler->set_val_scalar_plain_empty();
4942 m_evt_handler->end_map();
4943 goto seqimap_finish;
4952 _c4dbgt(
"seqimap: go again", 0);
4953 if(_finished_line())
4955 if(C4_LIKELY(!_finished_file()))
4969 _c4dbgp(
"seqimap: finish");
4975 template<
class EventHandler>
4976 void ParseEngine<EventHandler>::_handle_seq_flow()
4979 _c4dbgpf(
"handle2_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4981 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
4982 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ));
4983 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
FLOW));
4984 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT));
4985 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT));
4986 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos);
4988 _handle_flow_skip_whitespace();
4990 if(!m_evt_handler->m_curr->line_contents.rem.len)
4995 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
4996 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5000 _c4dbgp(
"seqflow[RVAL]: scanning single-quoted scalar");
5001 sc = _scan_scalar_squot();
5002 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5003 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5006 else if(first ==
'"')
5008 _c4dbgp(
"seqflow[RVAL]: scanning double-quoted scalar");
5009 sc = _scan_scalar_dquot();
5010 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5011 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5015 else if(_scan_scalar_plain_seq_flow(&sc))
5017 _c4dbgp(
"seqflow[RVAL]: it's a scalar.");
5018 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5019 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5022 else if(first ==
'[')
5024 _c4dbgp(
"seqflow[RVAL]: start child seqflow");
5026 m_evt_handler->begin_seq_val_flow();
5027 _set_indentation(m_evt_handler->m_parent->indref);
5029 _line_progressed(1);
5031 else if(first ==
'{')
5033 _c4dbgp(
"seqflow[RVAL]: start child mapflow");
5035 m_evt_handler->begin_map_val_flow();
5036 _set_indentation(m_evt_handler->m_parent->indref);
5038 _line_progressed(1);
5039 goto seqflow_finish;
5041 else if(first ==
']')
5043 _c4dbgp(
"seqflow[RVAL]: end!");
5044 _line_progressed(1);
5045 m_evt_handler->end_seq();
5046 goto seqflow_finish;
5048 else if(first ==
'*')
5050 csubstr ref = _scan_ref_seq();
5051 _c4dbgpf(
"seqflow[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5052 m_evt_handler->set_val_ref(ref);
5055 else if(first ==
'&')
5057 csubstr anchor = _scan_anchor();
5058 _c4dbgpf(
"seqflow[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5059 m_evt_handler->set_val_anchor(anchor);
5060 if(_maybe_scan_following_comma())
5062 _c4dbgp(
"seqflow[RVAL]: empty scalar!");
5063 m_evt_handler->set_val_scalar_plain_empty();
5064 m_evt_handler->add_sibling();
5067 else if(first ==
'!')
5069 csubstr tag = _scan_tag();
5070 _c4dbgpf(
"seqflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5072 m_evt_handler->set_val_tag(tag);
5073 if(_maybe_scan_following_comma())
5075 _c4dbgp(
"seqflow[RVAL]: empty scalar!");
5076 m_evt_handler->set_val_scalar_plain_empty();
5077 m_evt_handler->add_sibling();
5080 else if(first ==
':')
5082 _c4dbgpf(
"seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5084 m_evt_handler->begin_map_val_flow();
5085 _set_indentation(m_evt_handler->m_parent->indref);
5086 m_evt_handler->set_key_scalar_plain_empty();
5088 _line_progressed(1);
5089 goto seqflow_finish;
5091 else if(first ==
'?')
5093 _c4dbgp(
"seqflow[RVAL]: start child mapflow, explicit key");
5095 m_was_inside_qmrk =
true;
5096 m_evt_handler->begin_map_val_flow();
5097 _set_indentation(m_evt_handler->m_parent->indref);
5099 _line_progressed(1);
5100 _maybe_skip_whitespace_tokens();
5101 goto seqflow_finish;
5110 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
5111 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5112 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5115 _c4dbgp(
"seqflow[RNXT]: expect next val");
5117 m_evt_handler->add_sibling();
5118 _line_progressed(1);
5120 else if(first ==
']')
5122 _c4dbgp(
"seqflow[RNXT]: end!");
5123 m_evt_handler->end_seq();
5124 _line_progressed(1);
5125 goto seqflow_finish;
5127 else if(first ==
':')
5129 _c4dbgpf(
"seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5130 m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5131 _set_indentation(m_evt_handler->m_parent->indref);
5132 _line_progressed(1);
5134 goto seqflow_finish;
5143 _c4dbgt(
"seqflow: go again", 0);
5144 if(_finished_line())
5146 if(C4_LIKELY(!_finished_file()))
5154 _c4err(
"missing terminating ]");
5160 _c4dbgp(
"seqflow: finish");
5166 template<
class EventHandler>
5167 void ParseEngine<EventHandler>::_handle_map_flow()
5170 _c4dbgpf(
"handle2_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5172 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
5173 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
FLOW));
5175 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)));
5177 _handle_flow_skip_whitespace();
5178 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5184 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5185 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5186 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5187 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5188 const char first = rem.str[0];
5189 _c4dbgpf(
"mapflow[RKEY]: '{}'", first);
5193 _c4dbgp(
"mapflow[RKEY]: scanning single-quoted scalar");
5194 sc = _scan_scalar_squot();
5195 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5196 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5199 else if(first ==
'"')
5201 _c4dbgp(
"mapflow[RKEY]: scanning double-quoted scalar");
5202 sc = _scan_scalar_dquot();
5203 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5204 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5208 else if(_scan_scalar_plain_map_flow(&sc))
5210 _c4dbgp(
"mapflow[RKEY]: plain scalar");
5211 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5212 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5215 else if(first ==
'?')
5217 _c4dbgp(
"mapflow[RKEY]: explicit key");
5218 _line_progressed(1);
5220 _maybe_skip_whitespace_tokens();
5222 else if(first ==
':')
5224 _c4dbgp(
"mapflow[RKEY]: setting empty key");
5225 m_evt_handler->set_key_scalar_plain_empty();
5227 _line_progressed(1);
5228 _maybe_skip_whitespace_tokens();
5230 else if(first ==
',')
5232 _c4dbgp(
"mapflow[RKEY]: empty key+val!");
5233 m_evt_handler->set_key_scalar_plain_empty();
5234 m_evt_handler->set_val_scalar_plain_empty();
5238 else if(first ==
'}')
5240 _c4dbgp(
"mapflow[RKEY]: end!");
5241 m_evt_handler->end_map();
5242 _line_progressed(1);
5243 goto mapflow_finish;
5245 else if(first ==
'&')
5247 csubstr anchor = _scan_anchor();
5248 _c4dbgpf(
"mapflow[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5249 m_evt_handler->set_key_anchor(anchor);
5251 else if(first ==
'*')
5253 csubstr ref = _scan_ref_map();
5254 _c4dbgpf(
"mapflow[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
5255 m_evt_handler->set_key_ref(ref);
5258 else if(first ==
'[')
5263 _c4dbgp(
"mapflow[RKEY]: start child seqflow (!)");
5265 m_evt_handler->begin_seq_key_flow();
5267 _set_indentation(m_evt_handler->m_parent->indref);
5268 _line_progressed(1);
5269 goto mapflow_finish;
5271 else if(first ==
'{')
5276 _c4dbgp(
"mapflow[RKEY]: start child mapflow (!)");
5278 m_evt_handler->begin_map_key_flow();
5280 _set_indentation(m_evt_handler->m_parent->indref);
5281 _line_progressed(1);
5284 else if(first ==
'!')
5286 csubstr tag = _scan_tag();
5287 _c4dbgpf(
"mapflow[RKEY]: tag! [{}]~~~{}~~~", tag.len, tag);
5289 m_evt_handler->set_key_tag(tag);
5296 else if(has_any(
RKCL))
5298 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5299 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5300 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5301 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5302 const char first = rem.str[0];
5303 _c4dbgpf(
"mapflow[RKCL]: '{}'", first);
5306 _c4dbgp(
"mapflow[RKCL]: found the colon");
5308 _line_progressed(1);
5310 else if(first ==
'}')
5312 _c4dbgp(
"mapflow[RKCL]: end with missing val!");
5314 m_evt_handler->set_val_scalar_plain_empty();
5315 m_evt_handler->end_map();
5316 _line_progressed(1);
5317 goto mapflow_finish;
5319 else if(first ==
',')
5321 _c4dbgp(
"mapflow[RKCL]: got comma. val is missing");
5322 m_evt_handler->set_val_scalar_plain_empty();
5323 m_evt_handler->add_sibling();
5325 _line_progressed(1);
5332 else if(has_any(
RVAL))
5334 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5335 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5336 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5337 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5338 const char first = rem.str[0];
5339 _c4dbgpf(
"mapflow[RVAL]: '{}'", first);
5343 _c4dbgp(
"mapflow[RVAL]: scanning single-quoted scalar");
5344 sc = _scan_scalar_squot();
5345 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5346 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5349 else if(first ==
'"')
5351 _c4dbgp(
"mapflow[RVAL]: scanning double-quoted scalar");
5352 sc = _scan_scalar_dquot();
5353 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5354 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5358 else if(_scan_scalar_plain_map_flow(&sc))
5360 _c4dbgp(
"mapflow[RVAL]: plain scalar.");
5361 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5362 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5365 else if(first ==
'[')
5367 _c4dbgp(
"mapflow[RVAL]: start val seqflow");
5369 m_evt_handler->begin_seq_val_flow();
5370 _set_indentation(m_evt_handler->m_parent->indref);
5372 _line_progressed(1);
5373 goto mapflow_finish;
5375 else if(first ==
'{')
5377 _c4dbgp(
"mapflow[RVAL]: start val mapflow");
5379 m_evt_handler->begin_map_val_flow();
5380 _set_indentation(m_evt_handler->m_parent->indref);
5382 _line_progressed(1);
5385 else if(first ==
'}')
5387 _c4dbgp(
"mapflow[RVAL]: end!");
5388 m_evt_handler->set_val_scalar_plain_empty();
5389 m_evt_handler->end_map();
5390 _line_progressed(1);
5391 goto mapflow_finish;
5393 else if(first ==
',')
5395 _c4dbgp(
"mapflow[RVAL]: empty val!");
5396 m_evt_handler->set_val_scalar_plain_empty();
5400 else if(first ==
'*')
5402 csubstr ref = _scan_ref_map();
5403 _c4dbgpf(
"mapflow[RVAL]: key ref! [{}]~~~{}~~~", ref.len, ref);
5404 m_evt_handler->set_val_ref(ref);
5407 else if(first ==
'&')
5409 csubstr anchor = _scan_anchor();
5410 _c4dbgpf(
"mapflow[RVAL]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5411 m_evt_handler->set_val_anchor(anchor);
5413 else if(first ==
'!')
5415 csubstr tag = _scan_tag();
5416 _c4dbgpf(
"mapflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
5418 m_evt_handler->set_val_tag(tag);
5425 else if(has_any(
RNXT))
5427 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5428 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5429 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5430 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
5431 _c4dbgpf(
"mapflow[RNXT]: '{}'", rem.str[0]);
5432 if(rem.begins_with(
','))
5434 _c4dbgp(
"mapflow[RNXT]: expect next keyval");
5435 m_evt_handler->add_sibling();
5437 _line_progressed(1);
5439 else if(rem.begins_with(
'}'))
5441 _c4dbgp(
"mapflow[RNXT]: end!");
5442 m_evt_handler->end_map();
5443 _line_progressed(1);
5444 goto mapflow_finish;
5451 else if(has_any(
QMRK))
5453 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
5454 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
5455 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5456 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5457 const char first = rem.str[0];
5458 _c4dbgpf(
"mapflow[QMRK]: '{}'", first);
5462 _c4dbgp(
"mapflow[QMRK]: scanning single-quoted scalar");
5463 sc = _scan_scalar_squot();
5464 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5465 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5468 else if(first ==
'"')
5470 _c4dbgp(
"mapflow[QMRK]: scanning double-quoted scalar");
5471 sc = _scan_scalar_dquot();
5472 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5473 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5477 else if(_scan_scalar_plain_map_flow(&sc))
5479 _c4dbgp(
"mapflow[QMRK]: plain scalar");
5480 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5481 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5484 else if(first ==
':')
5486 _c4dbgp(
"mapflow[QMRK]: setting empty key");
5487 m_evt_handler->set_key_scalar_plain_empty();
5489 _line_progressed(1);
5490 _maybe_skip_whitespace_tokens();
5492 else if(first ==
'}')
5494 _c4dbgp(
"mapflow[QMRK]: end!");
5495 m_evt_handler->set_key_scalar_plain_empty();
5496 m_evt_handler->set_val_scalar_plain_empty();
5497 m_evt_handler->end_map();
5498 _line_progressed(1);
5499 goto mapflow_finish;
5501 else if(first ==
',')
5503 _c4dbgp(
"mapflow[QMRK]: empty key+val!");
5504 m_evt_handler->set_key_scalar_plain_empty();
5505 m_evt_handler->set_val_scalar_plain_empty();
5508 else if(first ==
'&')
5510 csubstr anchor = _scan_anchor();
5511 _c4dbgpf(
"mapflow[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
5512 m_evt_handler->set_key_anchor(anchor);
5514 else if(first ==
'*')
5516 csubstr ref = _scan_ref_map();
5517 _c4dbgpf(
"mapflow[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
5518 m_evt_handler->set_key_ref(ref);
5521 else if(first ==
'[')
5526 _c4dbgp(
"mapflow[QMRK]: start child seqflow (!)");
5528 m_evt_handler->begin_seq_key_flow();
5530 _set_indentation(m_evt_handler->m_parent->indref);
5531 _line_progressed(1);
5532 goto mapflow_finish;
5534 else if(first ==
'{')
5539 _c4dbgp(
"mapflow[QMRK]: start child mapflow (!)");
5541 m_evt_handler->begin_map_key_flow();
5542 _set_indentation(m_evt_handler->m_parent->indref);
5544 _line_progressed(1);
5547 else if(first ==
'!')
5549 csubstr tag = _scan_tag();
5550 _c4dbgpf(
"mapflow[QMRK]: tag! [{}]~~~{}~~~", tag.len, tag);
5552 m_evt_handler->set_key_tag(tag);
5561 _c4dbgt(
"mapflow: go again", 0);
5562 if(_finished_line())
5564 if(C4_LIKELY(!_finished_file()))
5572 _c4err(
"missing terminating }");
5578 _c4dbgp(
"mapflow: finish");
5584 template<
class EventHandler>
5585 void ParseEngine<EventHandler>::_handle_seq_block()
5588 _c4dbgpf(
"handle2_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5590 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ));
5591 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
BLCK));
5592 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT));
5593 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RVAL) + has_any(
RNXT)));
5595 _maybe_skip_comment();
5596 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5602 _c4dbgpf(
"seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
5603 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
5604 if(m_evt_handler->m_curr->at_line_beginning())
5606 _c4dbgpf(
"seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5607 if(m_evt_handler->m_curr->indentation_ge())
5609 _c4dbgpf(
"seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
5610 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5611 rem = m_evt_handler->m_curr->line_contents.rem;
5615 else if(m_evt_handler->m_curr->indentation_lt())
5617 _c4dbgp(
"seqblck[RVAL]: smaller indentation!");
5618 _handle_indentation_pop_from_block_seq();
5619 goto seqblck_finish;
5621 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
5623 _c4dbgp(
"seqblck[RVAL]: empty line!");
5624 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5628 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
5636 if(rem.str[0] ==
' ')
5638 if(_handle_indentation_from_annotations())
5640 _c4dbgp(
"seqblck[RVAL]: annotations!");
5641 rem = m_evt_handler->m_curr->line_contents.rem;
5648 _RYML_CB_ASSERT(callbacks(), rem.len);
5649 _c4dbgpf(
"seqblck[RVAL]: '{}' node_id={}", rem.str[0], m_evt_handler->m_curr->node_id);
5650 const char first = rem.str[0];
5651 const size_t startline = m_evt_handler->m_curr->pos.line;
5654 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
5658 _c4dbgp(
"seqblck[RVAL]: single-quoted scalar");
5659 sc = _scan_scalar_squot();
5660 if(!_maybe_scan_following_colon())
5662 _c4dbgp(
"seqblck[RVAL]: set as val");
5663 _handle_annotations_before_blck_val_scalar();
5664 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5665 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5670 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
5672 _handle_annotations_before_start_mapblck(startline);
5674 m_evt_handler->begin_map_val_block();
5675 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5676 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5677 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5679 _maybe_skip_whitespace_tokens();
5680 goto seqblck_finish;
5683 else if(first ==
'"')
5685 _c4dbgp(
"seqblck[RVAL]: double-quoted scalar");
5686 sc = _scan_scalar_dquot();
5687 if(!_maybe_scan_following_colon())
5689 _c4dbgp(
"seqblck[RVAL]: set as val");
5690 _handle_annotations_before_blck_val_scalar();
5691 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5692 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5697 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
5699 _handle_annotations_before_start_mapblck(startline);
5701 m_evt_handler->begin_map_val_block();
5702 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5703 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5704 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5706 _maybe_skip_whitespace_tokens();
5707 goto seqblck_finish;
5713 else if(first ==
'|')
5715 _c4dbgp(
"seqblck[RVAL]: block-literal scalar");
5717 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5718 _handle_annotations_before_blck_val_scalar();
5719 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
5720 m_evt_handler->set_val_scalar_literal(maybe_filtered);
5723 else if(first ==
'>')
5725 _c4dbgp(
"seqblck[RVAL]: block-folded scalar");
5727 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
5728 _handle_annotations_before_blck_val_scalar();
5729 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
5730 m_evt_handler->set_val_scalar_folded(maybe_filtered);
5733 else if(_scan_scalar_plain_seq_blck(&sc))
5735 _c4dbgp(
"seqblck[RVAL]: plain scalar.");
5736 if(!_maybe_scan_following_colon())
5738 _c4dbgp(
"seqblck[RVAL]: set as val");
5739 _handle_annotations_before_blck_val_scalar();
5740 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5741 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5746 if(startindent > m_evt_handler->m_curr->indref)
5748 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
5750 _handle_annotations_before_start_mapblck(startline);
5752 m_evt_handler->begin_map_val_block();
5753 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5754 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5755 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5757 _maybe_skip_whitespace_tokens();
5758 goto seqblck_finish;
5760 else if(m_evt_handler->m_parent && m_evt_handler->m_parent->indref == startindent && has_any(
RMAP|
BLCK, m_evt_handler->m_parent))
5762 _c4dbgp(
"seqblck[RVAL]: empty val + end indentless seq + set key");
5763 m_evt_handler->set_val_scalar_plain_empty();
5764 m_evt_handler->end_seq();
5765 m_evt_handler->add_sibling();
5766 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5767 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5769 _maybe_skip_whitespace_tokens();
5770 goto seqblck_finish;
5778 else if(first ==
'[')
5780 _c4dbgp(
"seqblck[RVAL]: start child seqflow");
5782 _handle_annotations_before_blck_val_scalar();
5783 m_evt_handler->begin_seq_val_flow();
5785 _line_progressed(1);
5786 _set_indentation(m_evt_handler->m_parent->indref + 1u);
5787 goto seqblck_finish;
5789 else if(first ==
'{')
5791 _c4dbgp(
"seqblck[RVAL]: start child mapflow");
5793 _handle_annotations_before_blck_val_scalar();
5794 m_evt_handler->begin_map_val_flow();
5796 _line_progressed(1);
5797 _set_indentation(m_evt_handler->m_parent->indref + 1u);
5798 goto seqblck_finish;
5800 else if(first ==
'-')
5802 if(startindent == m_evt_handler->m_curr->indref)
5804 _c4dbgp(
"seqblck[RVAL]: prev val was empty");
5805 _handle_annotations_before_blck_val_scalar();
5806 m_evt_handler->set_val_scalar_plain_empty();
5808 m_evt_handler->add_sibling();
5812 _c4dbgp(
"seqblck[RVAL]: start child seqblck");
5813 _RYML_CB_ASSERT(this->callbacks(), startindent > m_evt_handler->m_curr->indref);
5815 _handle_annotations_before_blck_val_scalar();
5816 m_evt_handler->begin_seq_val_block();
5818 _save_indentation();
5821 _line_progressed(1);
5822 _maybe_skip_whitespace_tokens();
5824 else if(first ==
':')
5826 _c4dbgp(
"seqblck[RVAL]: start child mapblck with empty key");
5828 _handle_annotations_before_start_mapblck(startline);
5830 m_evt_handler->begin_map_val_block();
5831 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5832 m_evt_handler->set_key_scalar_plain_empty();
5834 _line_progressed(1);
5835 _maybe_skip_whitespace_tokens();
5836 goto seqblck_finish;
5838 else if(first ==
'&')
5840 const csubstr anchor = _scan_anchor();
5841 _c4dbgpf(
"seqblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
5844 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
5846 else if(first ==
'*')
5848 csubstr ref = _scan_ref_seq();
5849 _c4dbgpf(
"seqblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
5850 if(!_maybe_scan_following_colon())
5852 _c4dbgp(
"seqblck[RVAL]: set ref as val!");
5853 _handle_annotations_before_blck_val_scalar();
5854 m_evt_handler->set_val_ref(ref);
5859 _c4dbgp(
"seqblck[RVAL]: ref is key of map");
5861 _handle_annotations_before_start_mapblck(startline);
5862 m_evt_handler->begin_map_val_block();
5863 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
5864 m_evt_handler->set_key_ref(ref);
5866 _set_indentation(startindent);
5867 _maybe_skip_whitespace_tokens();
5868 goto seqblck_finish;
5871 else if(first ==
'!')
5873 csubstr tag = _scan_tag();
5874 _c4dbgpf(
"seqblck[RVAL]: val tag! [{}]~~~{}~~~", tag.len, tag);
5877 _add_annotation(&m_pending_tags, tag, startindent, startline);
5879 else if(first ==
'?')
5881 _c4dbgp(
"seqblck[RVAL]: start child mapblck, explicit key");
5883 m_was_inside_qmrk =
true;
5884 m_evt_handler->begin_map_val_block();
5886 _save_indentation();
5887 _line_progressed(1);
5888 _maybe_skip_whitespace_tokens();
5889 goto seqblck_finish;
5898 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
5899 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
5903 _c4dbgpf(
"seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
5904 if(C4_LIKELY(_at_line_begin()))
5906 _c4dbgp(
"seqblck[RNXT]: at line begin");
5907 if(m_evt_handler->m_curr->indentation_ge())
5909 _c4dbgpf(
"seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
5910 _line_progressed(m_evt_handler->m_curr->indref);
5911 _maybe_skip_whitespace_tokens();
5912 rem = m_evt_handler->m_curr->line_contents.rem;
5916 else if(m_evt_handler->m_curr->indentation_lt())
5918 _c4dbgp(
"seqblck[RNXT]: smaller indentation!");
5919 _handle_indentation_pop_from_block_seq();
5922 _c4dbgp(
"seqblck[RNXT]: still seqblck!");
5923 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT));
5924 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
5925 rem = m_evt_handler->m_curr->line_contents.rem;
5931 _c4dbgp(
"seqblck[RNXT]: no longer seqblck!");
5932 goto seqblck_finish;
5935 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
5937 _c4dbgpf(
"seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
5938 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
5939 rem = m_evt_handler->m_curr->line_contents.rem;
5946 _c4dbgp(
"seqblck[RNXT]: NOT at line begin");
5947 if(!rem.begins_with_any(
" \t"))
5954 rem = m_evt_handler->m_curr->line_contents.rem;
5957 _c4dbgp(
"seqblck[RNXT]: again");
5965 const char first = rem.str[0];
5966 _c4dbgpf(
"seqblck[RNXT]: '{}' node_id={}", first, m_evt_handler->m_curr->node_id);
5969 if(m_evt_handler->m_curr->indref > 0 || m_evt_handler->m_curr->line_contents.indentation > 0 || !_is_doc_begin_token(rem))
5971 _c4dbgp(
"seqblck[RNXT]: expect next val");
5973 m_evt_handler->add_sibling();
5974 _line_progressed(1);
5975 _maybe_skip_whitespace_tokens();
5979 _c4dbgp(
"seqblck[RNXT]: start doc");
5980 _start_doc_suddenly();
5981 _line_progressed(3);
5982 _maybe_skip_whitespace_tokens();
5983 goto seqblck_finish;
5986 else if(first ==
':')
5992 auto const *C4_RESTRICT prev_state = m_evt_handler->m_parent;
5993 if(C4_LIKELY(prev_state && (prev_state->flags &
RMAP)))
5995 _c4dbgp(
"seqblck[RNXT]: actually this seq was '?' key of parent map");
5996 m_evt_handler->end_seq();
5997 goto seqblck_finish;
6004 else if(first ==
'.')
6006 _c4dbgp(
"seqblck[RNXT]: maybe doc?");
6007 csubstr rs = rem.sub(1);
6008 if(rs ==
".." || rs.begins_with(
".. "))
6010 _c4dbgp(
"seqblck[RNXT]: end+start doc");
6011 _end_doc_suddenly();
6012 _line_progressed(3);
6013 _maybe_skip_whitespace_tokens();
6014 goto seqblck_finish;
6027 for(
auto const& s : m_evt_handler->m_stack)
6029 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
6032 if(m_evt_handler->m_parent && has_all(
RMAP|
BLCK, m_evt_handler->m_parent) && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
6034 _c4dbgpf(
"seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
6035 _RYML_CB_ASSERT(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent);
6036 _handle_indentation_pop(m_evt_handler->m_parent);
6037 _RYML_CB_ASSERT(this->callbacks(), has_all(
RMAP|
BLCK));
6038 m_evt_handler->add_sibling();
6040 goto seqblck_finish;
6050 _c4dbgt(
"seqblck: go again", 0);
6051 if(_finished_line())
6055 if(_finished_file())
6057 _c4dbgp(
"seqblck: finish!");
6059 goto seqblck_finish;
6066 _c4dbgp(
"seqblck: finish");
6072 template<
class EventHandler>
6073 void ParseEngine<EventHandler>::_handle_map_block()
6076 _c4dbgpf(
"handle2_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6079 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
6080 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
BLCK));
6082 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)));
6084 _maybe_skip_comment();
6085 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
6091 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6092 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6093 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6094 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6098 if(m_evt_handler->m_curr->at_line_beginning())
6100 if(m_evt_handler->m_curr->indentation_eq())
6102 _c4dbgpf(
"mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
6103 _line_progressed(m_evt_handler->m_curr->indref);
6104 rem = m_evt_handler->m_curr->line_contents.rem;
6108 else if(m_evt_handler->m_curr->indentation_lt())
6110 _c4dbgp(
"mapblck[RKEY]: smaller indentation!");
6111 _handle_indentation_pop_from_block_map();
6112 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6115 _c4dbgp(
"mapblck[RKEY]: still mapblck!");
6116 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY));
6117 rem = m_evt_handler->m_curr->line_contents.rem;
6123 _c4dbgp(
"mapblck[RKEY]: no longer mapblck!");
6124 goto mapblck_finish;
6129 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt());
6130 _c4err(
"invalid indentation");
6136 const char first = rem.str[0];
6137 const size_t startline = m_evt_handler->m_curr->pos.line;
6138 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6139 _c4dbgpf(
"mapblck[RKEY]: '{}'", first);
6143 _c4dbgp(
"mapblck[RKEY]: scanning single-quoted scalar");
6144 sc = _scan_scalar_squot();
6145 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6146 _handle_annotations_before_blck_key_scalar();
6147 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6149 if(!_maybe_scan_following_colon())
6150 _c4err(
"could not find ':' colon after key");
6151 _maybe_skip_whitespace_tokens();
6153 else if(first ==
'"')
6155 _c4dbgp(
"mapblck[RKEY]: scanning double-quoted scalar");
6156 sc = _scan_scalar_dquot();
6157 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6158 _handle_annotations_before_blck_key_scalar();
6159 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6161 if(!_maybe_scan_following_colon())
6162 _c4err(
"could not find ':' colon after key");
6163 _maybe_skip_whitespace_tokens();
6167 else if(C4_UNLIKELY(first ==
'|'))
6169 _c4err(
"block literal keys must be enclosed in '?'");
6171 else if(C4_UNLIKELY(first ==
'>'))
6173 _c4err(
"block literal keys must be enclosed in '?'");
6175 else if(_scan_scalar_plain_map_blck(&sc))
6177 _c4dbgp(
"mapblck[RKEY]: plain scalar");
6178 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6179 _handle_annotations_before_blck_key_scalar();
6180 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6182 if(!_maybe_scan_following_colon())
6183 _c4err(
"could not find ':' colon after key");
6184 _maybe_skip_whitespace_tokens();
6186 else if(first ==
'?')
6188 _c4dbgp(
"mapblck[RKEY]: key token!");
6190 _line_progressed(1);
6191 _maybe_skip_whitespace_tokens();
6192 m_was_inside_qmrk =
true;
6195 else if(first ==
':')
6197 _c4dbgp(
"mapblck[RKEY]: setting empty key");
6198 _handle_annotations_before_blck_key_scalar();
6199 m_evt_handler->set_key_scalar_plain_empty();
6201 _line_progressed(1);
6202 _maybe_skip_whitespace_tokens();
6204 else if(first ==
'*')
6206 csubstr ref = _scan_ref_map();
6207 _c4dbgpf(
"mapblck[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
6208 _handle_annotations_before_blck_key_scalar();
6209 m_evt_handler->set_key_ref(ref);
6211 if(!_maybe_scan_following_colon())
6212 _c4err(
"could not find ':' colon after key");
6213 _maybe_skip_whitespace_tokens();
6215 else if(first ==
'&')
6217 csubstr anchor = _scan_anchor();
6218 _c4dbgpf(
"mapblck[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
6219 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6221 else if(first ==
'!')
6223 csubstr tag = _scan_tag();
6224 _c4dbgpf(
"mapblck[RKEY]: key tag! [{}]~~~{}~~~", tag.len, tag);
6225 _add_annotation(&m_pending_tags, tag, startindent, startline);
6227 else if(first ==
'[')
6232 _c4dbgp(
"mapblck[RKEY]: start child seqflow (!)");
6234 _handle_annotations_before_blck_key_scalar();
6235 m_evt_handler->begin_seq_key_flow();
6237 _line_progressed(1);
6238 _set_indentation(startindent);
6239 goto mapblck_finish;
6241 else if(first ==
'{')
6246 _c4dbgp(
"mapblck[RKEY]: start child mapflow (!)");
6248 _handle_annotations_before_blck_key_scalar();
6249 m_evt_handler->begin_map_key_flow();
6251 _line_progressed(1);
6252 _set_indentation(startindent);
6253 goto mapblck_finish;
6255 else if(first ==
'-')
6257 _c4dbgp(
"mapblck[RKEY]: maybe doc?");
6258 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(rem))
6260 _c4dbgp(
"mapblck[RKEY]: end+start doc");
6261 _start_doc_suddenly();
6262 _line_progressed(3);
6263 _maybe_skip_whitespace_tokens();
6264 goto mapblck_finish;
6271 else if(first ==
'.')
6273 _c4dbgp(
"mapblck[RKEY]: maybe end doc?");
6274 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(rem))
6276 _c4dbgp(
"mapblck[RKEY]: end doc");
6277 _end_doc_suddenly();
6278 _line_progressed(3);
6279 _maybe_skip_whitespace_tokens();
6280 goto mapblck_finish;
6288 else if(first ==
'\t')
6290 _c4dbgp(
"mapblck[RKEY]: skip tabs");
6291 _maybe_skipchars(
'\t');
6298 else if(has_any(
RKCL))
6300 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6301 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6302 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6303 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6307 if(m_evt_handler->m_curr->at_line_beginning())
6309 if(m_evt_handler->m_curr->indentation_eq())
6311 _c4dbgpf(
"mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
6312 _line_progressed(m_evt_handler->m_curr->indref);
6313 rem = m_evt_handler->m_curr->line_contents.rem;
6317 else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
6319 _c4err(
"invalid indentation");
6322 const char first = rem.str[0];
6323 _c4dbgpf(
"mapblck[RKCL]: '{}'", first);
6326 _c4dbgp(
"mapblck[RKCL]: found the colon");
6328 _line_progressed(1);
6329 _maybe_skip_whitespace_tokens();
6331 else if(first ==
'?')
6333 _c4dbgp(
"mapblck[RKCL]: got '?'. val was empty");
6334 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_was_inside_qmrk);
6335 m_evt_handler->set_val_scalar_plain_empty();
6336 m_evt_handler->add_sibling();
6338 _line_progressed(1);
6339 _maybe_skip_whitespace_tokens();
6341 else if(first ==
'-')
6343 if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6345 _c4dbgp(
"mapblck[RKCL]: end+start doc");
6346 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6347 _start_doc_suddenly();
6348 _line_progressed(3);
6349 _maybe_skip_whitespace_tokens();
6350 goto mapblck_finish;
6357 else if(first ==
'.')
6359 _c4dbgp(
"mapblck[RKCL]: maybe end doc?");
6360 csubstr rs = rem.sub(1);
6361 if(rs ==
".." || rs.begins_with(
".. "))
6363 _c4dbgp(
"mapblck[RKCL]: end+start doc");
6364 _end_doc_suddenly();
6365 _line_progressed(3);
6366 goto mapblck_finish;
6373 else if(m_was_inside_qmrk)
6375 _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_eq());
6376 _c4dbgp(
"mapblck[RKCL]: missing :");
6377 m_evt_handler->set_val_scalar_plain_empty();
6378 m_evt_handler->add_sibling();
6379 m_was_inside_qmrk =
false;
6387 else if(has_any(
RVAL))
6389 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6390 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6391 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6392 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6396 if(m_evt_handler->m_curr->at_line_beginning())
6398 _c4dbgpf(
"mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6399 m_evt_handler->m_curr->more_indented =
false;
6400 if(m_evt_handler->m_curr->indref ==
npos)
6402 _c4dbgpf(
"mapblck[RVAL]: setting indentation={}", m_evt_handler->m_parent->indref);
6403 _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
6404 _line_progressed(m_evt_handler->m_curr->indref);
6405 rem = m_evt_handler->m_curr->line_contents.rem;
6409 else if(m_evt_handler->m_curr->indentation_eq())
6411 _c4dbgp(
"mapblck[RVAL]: skip indentation!");
6412 _line_progressed(m_evt_handler->m_curr->indref);
6413 rem = m_evt_handler->m_curr->line_contents.rem;
6441 else if(m_evt_handler->m_curr->indentation_gt())
6443 _c4dbgp(
"mapblck[RVAL]: more indented!");
6444 m_evt_handler->m_curr->more_indented =
true;
6445 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6446 rem = m_evt_handler->m_curr->line_contents.rem;
6450 else if(m_evt_handler->m_curr->indentation_lt())
6452 _c4dbgp(
"mapblck[RVAL]: smaller indentation!");
6453 _handle_indentation_pop_from_block_map();
6456 _c4dbgp(
"mapblck[RVAL]: still mapblck!");
6457 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6460 _c4dbgp(
"mapblck[RVAL]: speculatively expect next keyval");
6461 m_evt_handler->add_sibling();
6468 _c4dbgp(
"mapblck[RVAL]: no longer mapblck!");
6469 goto mapblck_finish;
6472 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
6474 _c4dbgp(
"mapblck[RVAL]: empty line!");
6475 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6482 const char first = rem.str[0];
6483 const size_t startline = m_evt_handler->m_curr->pos.line;
6484 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6485 _c4dbgpf(
"mapblck[RVAL]: '{}'", first);
6489 _c4dbgp(
"mapblck[RVAL]: scanning single-quoted scalar");
6490 sc = _scan_scalar_squot();
6491 if(!_maybe_scan_following_colon())
6493 _c4dbgp(
"mapblck[RVAL]: set as val");
6494 _handle_annotations_before_blck_val_scalar();
6495 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6496 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6501 if(startindent != m_evt_handler->m_curr->indref)
6503 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6504 _handle_annotations_before_start_mapblck(startline);
6507 m_evt_handler->begin_map_val_block();
6508 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6509 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6510 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6511 _maybe_skip_whitespace_tokens();
6517 _c4dbgp(
"mapblck[RVAL]: prev val empty+this is a key");
6518 m_evt_handler->set_val_scalar_plain_empty();
6519 m_evt_handler->add_sibling();
6520 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6521 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6523 _maybe_skip_whitespace_tokens();
6527 else if(first ==
'"')
6529 _c4dbgp(
"mapblck[RVAL]: scanning double-quoted scalar");
6530 sc = _scan_scalar_dquot();
6531 if(!_maybe_scan_following_colon())
6533 _c4dbgp(
"mapblck[RVAL]: set as val");
6534 _handle_annotations_before_blck_val_scalar();
6535 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6536 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6541 if(startindent != m_evt_handler->m_curr->indref)
6543 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6544 _handle_annotations_before_start_mapblck(startline);
6547 m_evt_handler->begin_map_val_block();
6548 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6549 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6550 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6551 _maybe_skip_whitespace_tokens();
6557 _c4dbgp(
"mapblck[RVAL]: prev val empty+this is a key");
6558 m_evt_handler->set_val_scalar_plain_empty();
6559 m_evt_handler->add_sibling();
6560 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6561 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6563 _maybe_skip_whitespace_tokens();
6569 else if(first ==
'|')
6571 _c4dbgp(
"mapblck[RVAL]: scanning block-literal scalar");
6573 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6574 _handle_annotations_before_blck_val_scalar();
6575 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6576 m_evt_handler->set_val_scalar_literal(maybe_filtered);
6579 else if(first ==
'>')
6581 _c4dbgp(
"mapblck[RVAL]: scanning block-folded scalar");
6583 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6584 _handle_annotations_before_blck_val_scalar();
6585 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6586 m_evt_handler->set_val_scalar_folded(maybe_filtered);
6589 else if(_scan_scalar_plain_map_blck(&sc))
6591 _c4dbgp(
"mapblck[RVAL]: plain scalar.");
6592 if(!_maybe_scan_following_colon())
6594 _c4dbgp(
"mapblck[RVAL]: set as val");
6595 _handle_annotations_before_blck_val_scalar();
6596 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6597 m_evt_handler->set_val_scalar_plain(maybe_filtered);
6602 if(startindent != m_evt_handler->m_curr->indref)
6604 _c4dbgpf(
"mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
6606 _handle_annotations_before_start_mapblck(startline);
6608 m_evt_handler->begin_map_val_block();
6609 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6610 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6611 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6612 _maybe_skip_whitespace_tokens();
6618 _c4dbgp(
"mapblck[RVAL]: prev val empty+this is a key");
6619 _handle_annotations_before_blck_val_scalar();
6620 m_evt_handler->set_val_scalar_plain_empty();
6621 m_evt_handler->add_sibling();
6622 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6623 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6625 _maybe_skip_whitespace_tokens();
6629 else if(first ==
'-')
6633 _c4dbgp(
"mapblck[RVAL]: start val seqblck");
6635 _handle_annotations_before_blck_val_scalar();
6636 m_evt_handler->begin_seq_val_block();
6638 _set_indentation(startindent);
6639 _line_progressed(1);
6640 _maybe_skip_whitespace_tokens();
6641 goto mapblck_finish;
6643 else if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
6645 _c4dbgp(
"mapblck[RVAL]: end+start doc");
6646 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
6647 _start_doc_suddenly();
6648 _line_progressed(3);
6649 _maybe_skip_whitespace_tokens();
6650 goto mapblck_finish;
6657 else if(first ==
'[')
6659 _c4dbgp(
"mapblck[RVAL]: start val seqflow");
6661 _handle_annotations_before_blck_val_scalar();
6662 m_evt_handler->begin_seq_val_flow();
6664 _set_indentation(m_evt_handler->m_curr->indref + 1u);
6665 _line_progressed(1);
6666 goto mapblck_finish;
6668 else if(first ==
'{')
6670 _c4dbgp(
"mapblck[RVAL]: start val mapflow");
6672 _handle_annotations_before_blck_val_scalar();
6673 m_evt_handler->begin_map_val_flow();
6675 m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
6676 _set_indentation(m_evt_handler->m_curr->indref + 1u);
6677 _line_progressed(1);
6678 goto mapblck_finish;
6680 else if(first ==
'*')
6682 csubstr ref = _scan_ref_map();
6683 _c4dbgpf(
"mapblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
6684 if(startindent == m_evt_handler->m_curr->indref)
6686 _c4dbgpf(
"mapblck[RVAL]: same indentation {}", startindent);
6687 m_evt_handler->set_val_ref(ref);
6692 _c4dbgpf(
"mapblck[RVAL]: larger indentation {}>{}", startindent, m_evt_handler->m_curr->indref);
6693 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref);
6694 if(_maybe_scan_following_colon())
6696 _c4dbgp(
"mapblck[RVAL]: start child map, block");
6698 _handle_annotations_before_blck_val_scalar();
6699 m_evt_handler->begin_map_val_block();
6700 m_evt_handler->set_key_ref(ref);
6701 _set_indentation(startindent);
6707 _c4dbgp(
"mapblck[RVAL]: was val ref");
6708 _handle_annotations_before_blck_val_scalar();
6709 m_evt_handler->set_val_ref(ref);
6713 _maybe_skip_whitespace_tokens();
6715 else if(first ==
'&')
6717 csubstr anchor = _scan_anchor();
6718 _c4dbgpf(
"mapblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
6719 if(startindent == m_evt_handler->m_curr->indref)
6721 _c4dbgp(
"mapblck[RVAL]: anchor for next key. val is missing!");
6722 m_evt_handler->set_val_scalar_plain_empty();
6723 m_evt_handler->add_sibling();
6728 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6730 else if(first ==
'!')
6732 csubstr tag = _scan_tag();
6733 _c4dbgpf(
"mapblck[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
6734 if(startindent == m_evt_handler->m_curr->indref)
6736 _c4dbgp(
"mapblck[RVAL]: tag for next key. val is missing!");
6737 _handle_annotations_before_blck_val_scalar();
6738 m_evt_handler->set_val_scalar_plain_empty();
6739 m_evt_handler->add_sibling();
6744 _add_annotation(&m_pending_tags, tag, startindent, startline);
6746 else if(first ==
'?')
6748 if(startindent == m_evt_handler->m_curr->indref)
6750 _c4dbgp(
"mapblck[RVAL]: got '?'. val was empty");
6751 _handle_annotations_before_blck_val_scalar();
6752 m_evt_handler->set_val_scalar_plain_empty();
6753 m_evt_handler->add_sibling();
6756 else if(startindent > m_evt_handler->m_curr->indref)
6758 _c4dbgp(
"mapblck[RVAL]: start val mapblck");
6760 _handle_annotations_before_blck_val_scalar();
6761 m_evt_handler->begin_map_val_block();
6763 _set_indentation(startindent);
6769 m_was_inside_qmrk =
true;
6770 _line_progressed(1);
6771 _maybe_skip_whitespace_tokens();
6774 else if(first ==
':')
6776 if(startindent == m_evt_handler->m_curr->indref)
6778 _c4dbgp(
"mapblck[RVAL]: got ':'. val was empty, next key as well");
6779 m_evt_handler->set_val_scalar_plain_empty();
6780 m_evt_handler->add_sibling();
6781 m_evt_handler->set_key_scalar_plain_empty();
6783 else if(startindent > m_evt_handler->m_curr->indref)
6785 _c4dbgp(
"mapblck[RVAL]: start val mapblck");
6787 _handle_annotations_before_start_mapblck(startline);
6789 m_evt_handler->begin_map_val_block();
6790 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6791 m_evt_handler->set_key_scalar_plain_empty();
6799 _line_progressed(1);
6800 _maybe_skip_whitespace_tokens();
6803 else if(first ==
'.')
6805 _c4dbgp(
"mapblck[RVAL]: maybe doc?");
6806 csubstr rs = rem.sub(1);
6807 if(rs ==
".." || rs.begins_with(
".. "))
6809 _c4dbgp(
"seqblck[RVAL]: end doc expl");
6810 _end_doc_suddenly();
6811 _line_progressed(3);
6812 _maybe_skip_whitespace_tokens();
6813 goto mapblck_finish;
6821 else if(first ==
'\t')
6823 _c4dbgp(
"mapblck[RVAL]: skip tabs");
6824 _maybe_skipchars(
'\t');
6831 else if(has_any(
RNXT))
6833 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6834 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6835 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6836 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK));
6840 if(m_evt_handler->m_curr->at_line_beginning())
6842 _c4dbgpf(
"mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6843 if(m_evt_handler->m_curr->indentation_eq())
6845 _c4dbgpf(
"mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6846 _line_progressed(m_evt_handler->m_curr->indref);
6847 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
6848 m_evt_handler->add_sibling();
6852 else if(m_evt_handler->m_curr->indentation_lt())
6854 _c4dbgp(
"mapblck[RNXT]: smaller indentation!");
6855 _handle_indentation_pop_from_block_map();
6858 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6861 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
6862 m_evt_handler->add_sibling();
6869 goto mapblck_finish;
6875 _c4dbgp(
"mapblck[RNXT]: NOT at line begin");
6876 if(!rem.begins_with_any(
" \t"))
6883 rem = m_evt_handler->m_curr->line_contents.rem;
6886 _c4dbgp(
"seqblck[RNXT]: again");
6894 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
6895 const char first = rem.str[0];
6896 _c4dbgpf(
"mapblck[RNXT]: '{}'", _c4prc(first));
6899 if(m_evt_handler->m_curr->more_indented)
6901 _c4dbgp(
"mapblck[RNXT]: start child block map");
6902 C4_NOT_IMPLEMENTED();
6904 _line_progressed(1);
6905 _set_indentation(m_evt_handler->m_curr->scalar_col);
6906 m_evt_handler->m_curr->more_indented =
false;
6914 else if(first ==
' ')
6916 _c4dbgp(
"mapblck[RNXT]: skip spaces");
6917 _maybe_skip_whitespace_tokens();
6924 else if(has_any(
QMRK))
6926 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY));
6927 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL));
6928 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL));
6929 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT));
6933 if(m_evt_handler->m_curr->at_line_beginning())
6935 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation !=
npos);
6936 if(m_evt_handler->m_curr->indentation_eq())
6938 _c4dbgpf(
"mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref);
6939 _line_progressed(m_evt_handler->m_curr->indref);
6940 rem = m_evt_handler->m_curr->line_contents.rem;
6944 else if(m_evt_handler->m_curr->indentation_lt())
6946 _c4dbgp(
"mapblck[QMRK]: smaller indentation!");
6947 _handle_indentation_pop_from_block_map();
6948 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6951 _c4dbgp(
"mapblck[QMRK]: still mapblck!");
6952 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(
QMRK));
6953 rem = m_evt_handler->m_curr->line_contents.rem;
6959 _c4dbgp(
"mapblck[QMRK]: no longer mapblck!");
6960 goto mapblck_finish;
6966 _c4dbgp(
"mapblck[QMRK]: larger indentation !");
6967 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6968 rem = m_evt_handler->m_curr->line_contents.rem;
6976 const char first = rem.str[0];
6977 const size_t startline = m_evt_handler->m_curr->pos.line;
6978 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6979 _c4dbgpf(
"mapblck[QMRK]: '{}'", first);
6983 _c4dbgp(
"mapblck[QMRK]: scanning single-quoted scalar");
6984 sc = _scan_scalar_squot();
6985 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6986 if(!_maybe_scan_following_colon())
6988 _c4dbgp(
"mapblck[QMRK]: set as key");
6989 _handle_annotations_before_blck_key_scalar();
6990 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6995 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
6997 _handle_annotations_before_start_mapblck_as_key();
6998 m_evt_handler->begin_map_key_block();
6999 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7000 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7001 _maybe_skip_whitespace_tokens();
7002 _set_indentation(startindent);
7007 else if(first ==
'"')
7009 _c4dbgp(
"mapblck[QMRK]: scanning double-quoted scalar");
7010 sc = _scan_scalar_dquot();
7011 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7012 if(!_maybe_scan_following_colon())
7014 _c4dbgp(
"mapblck[QMRK]: set as key");
7015 _handle_annotations_before_blck_key_scalar();
7016 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7021 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7023 _handle_annotations_before_start_mapblck_as_key();
7024 m_evt_handler->begin_map_key_block();
7025 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7026 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7027 _maybe_skip_whitespace_tokens();
7028 _set_indentation(startindent);
7033 else if(first ==
'|')
7035 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
7037 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7038 csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb);
7039 _handle_annotations_before_blck_key_scalar();
7040 m_evt_handler->set_key_scalar_literal(maybe_filtered);
7043 else if(first ==
'>')
7045 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
7047 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7048 csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb);
7049 _handle_annotations_before_blck_key_scalar();
7050 m_evt_handler->set_key_scalar_folded(maybe_filtered);
7053 else if(_scan_scalar_plain_map_blck(&sc))
7055 _c4dbgp(
"mapblck[QMRK]: plain scalar");
7056 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
7057 if(!_maybe_scan_following_colon())
7059 _c4dbgp(
"mapblck[QMRK]: set as key");
7060 _handle_annotations_before_blck_key_scalar();
7061 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7066 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7068 _handle_annotations_before_start_mapblck_as_key();
7069 m_evt_handler->begin_map_key_block();
7070 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7071 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7072 _maybe_skip_whitespace_tokens();
7073 _set_indentation(startindent);
7078 else if(first ==
':')
7080 if(startindent == m_evt_handler->m_curr->indref)
7082 _c4dbgp(
"mapblck[QMRK]: empty key");
7084 _handle_annotations_before_blck_key_scalar();
7085 m_evt_handler->set_key_scalar_plain_empty();
7086 _line_progressed(1);
7087 _maybe_skip_whitespace_tokens();
7091 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), empty key");
7093 _handle_annotations_before_start_mapblck_as_key();
7094 m_evt_handler->begin_map_key_block();
7095 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7096 m_evt_handler->set_key_scalar_plain_empty();
7097 _line_progressed(1);
7098 _maybe_skip_whitespace_tokens();
7099 _set_indentation(startindent);
7104 else if(first ==
'*')
7106 csubstr ref = _scan_ref_map();
7107 _c4dbgpf(
"mapblck[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
7108 if(!_maybe_scan_following_colon())
7110 _c4dbgp(
"mapblck[QMRK]: set ref as key");
7111 _handle_annotations_before_blck_key_scalar();
7112 m_evt_handler->set_key_ref(ref);
7117 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set ref as key");
7119 _handle_annotations_before_blck_key_scalar();
7120 m_evt_handler->begin_map_key_block();
7121 m_evt_handler->set_key_ref(ref);
7122 _set_indentation(startindent);
7126 _maybe_skip_whitespace_tokens();
7128 else if(first ==
'&')
7130 csubstr anchor = _scan_anchor();
7131 _c4dbgpf(
"mapblck[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
7132 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7134 else if(first ==
'!')
7136 csubstr tag = _scan_tag();
7137 _c4dbgpf(
"mapblck[QMRK]: key tag! [{}]~~~{}~~~", tag.len, tag);
7138 _add_annotation(&m_pending_tags, tag, startindent, startline);
7140 else if(first ==
'-')
7142 _c4dbgp(
"mapblck[QMRK]: maybe doc?");
7143 csubstr rs = rem.sub(1);
7144 if(rs ==
"--" || rs.begins_with(
"-- "))
7146 _c4dbgp(
"mapblck[QMRK]: end+start doc");
7147 _start_doc_suddenly();
7148 _line_progressed(3);
7152 _c4dbgp(
"mapblck[QMRK]: start child seqblck (!)");
7154 _handle_annotations_before_blck_key_scalar();
7155 m_evt_handler->begin_seq_key_block();
7157 _set_indentation(startindent);
7158 _line_progressed(1);
7160 _maybe_skip_whitespace_tokens();
7161 goto mapblck_finish;
7163 else if(first ==
'[')
7165 _c4dbgp(
"mapblck[QMRK]: start child seqflow (!)");
7167 m_evt_handler->begin_seq_key_flow();
7169 _set_indentation(m_evt_handler->m_parent->indref);
7170 _line_progressed(1);
7171 goto mapblck_finish;
7173 else if(first ==
'{')
7175 _c4dbgp(
"mapblck[QMRK]: start child mapblck (!)");
7177 m_evt_handler->begin_map_key_flow();
7179 _set_indentation(m_evt_handler->m_parent->indref);
7180 _line_progressed(1);
7181 goto mapblck_finish;
7183 else if(first ==
'?')
7185 _c4dbgp(
"mapblck[QMRK]: another QMRK '?'");
7186 m_evt_handler->set_key_scalar_plain_empty();
7187 m_evt_handler->set_val_scalar_plain_empty();
7188 m_evt_handler->add_sibling();
7189 _line_progressed(1);
7191 else if(first ==
'.')
7193 _c4dbgp(
"mapblck[QMRK]: maybe end doc?");
7194 csubstr rs = rem.sub(1);
7195 if(rs ==
".." || rs.begins_with(
".. "))
7197 _c4dbgp(
"mapblck[QMRK]: end+start doc");
7198 _end_doc_suddenly();
7199 _line_progressed(3);
7200 goto mapblck_finish;
7214 _c4dbgt(
"mapblck: again", 0);
7215 if(_finished_line())
7219 if(_finished_file())
7221 _c4dbgp(
"mapblck: file finished!");
7223 goto mapblck_finish;
7230 _c4dbgp(
"mapblck: finish");
7236 template<
class EventHandler>
7237 void ParseEngine<EventHandler>::_handle_unk_json()
7239 _c4dbgpf(
"handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7241 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP));
7242 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP));
7244 _maybe_skip_comment();
7245 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7249 size_t pos = rem.first_not_of(
" \t");
7252 pos = pos !=
npos ? pos : rem.len;
7253 _c4dbgpf(
"skipping indentation of {}", pos);
7254 _line_progressed(pos);
7255 rem = m_evt_handler->m_curr->line_contents.rem;
7258 _c4dbgpf(
"rem is now [{}]~~~{}~~~", rem.len, rem);
7261 if(rem.begins_with(
'['))
7263 _c4dbgp(
"it's a seq");
7264 m_evt_handler->check_trailing_doc_token();
7266 m_evt_handler->begin_seq_val_flow();
7268 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7269 m_doc_empty =
false;
7270 _line_progressed(1);
7272 else if(rem.begins_with(
'{'))
7274 _c4dbgp(
"it's a map");
7275 m_evt_handler->check_trailing_doc_token();
7277 m_evt_handler->begin_map_val_flow();
7279 m_doc_empty =
false;
7280 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7281 _line_progressed(1);
7283 else if(_handle_bom())
7285 _c4dbgp(
"byte order mark");
7289 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
7290 _maybe_skip_whitespace_tokens();
7291 csubstr s = m_evt_handler->m_curr->line_contents.rem;
7294 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7295 const char first = s.str[0];
7299 _c4dbgp(
"runk_json: scanning double-quoted scalar");
7300 m_evt_handler->check_trailing_doc_token();
7303 m_doc_empty =
false;
7304 sc = _scan_scalar_dquot();
7305 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7306 if(!_maybe_scan_following_colon())
7308 _c4dbgp(
"runk_json: set as val");
7309 _handle_annotations_before_blck_val_scalar();
7310 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7317 else if(_scan_scalar_plain_unk(&sc))
7319 _c4dbgp(
"runk_json: got a plain scalar");
7320 m_evt_handler->check_trailing_doc_token();
7323 m_doc_empty =
false;
7324 if(!_maybe_scan_following_colon())
7326 _c4dbgp(
"runk_json: set as val");
7327 _handle_annotations_before_blck_val_scalar();
7328 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7329 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7346 template<
class EventHandler>
7347 void ParseEngine<EventHandler>::_handle_unk()
7349 _c4dbgpf(
"handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7351 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP));
7352 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP));
7354 _maybe_skip_comment();
7355 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7359 size_t pos = rem.first_not_of(
" \t");
7362 pos = pos !=
npos ? pos : rem.len;
7363 _c4dbgpf(
"skipping {} whitespace characters", pos);
7364 _line_progressed(pos);
7365 rem = m_evt_handler->m_curr->line_contents.rem;
7368 _c4dbgpf(
"rem is now [{}]~~~{}~~~", rem.len, rem);
7371 if(m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin())
7373 _c4dbgp(
"rtop: zero indent + at line begin");
7376 _c4dbgp(
"byte order mark!");
7377 rem = m_evt_handler->m_curr->line_contents.rem;
7381 const char first = rem.str[0];
7384 _c4dbgp(
"rtop: suspecting doc");
7385 if(_is_doc_begin_token(rem))
7387 _c4dbgp(
"rtop: begin doc");
7390 _set_indentation(0);
7392 _line_progressed(3u);
7393 _maybe_skip_whitespace_tokens();
7397 else if(first ==
'.')
7399 _c4dbgp(
"rtop: suspecting doc end");
7400 if(_is_doc_end_token(rem))
7402 _c4dbgp(
"rtop: end doc");
7409 _c4dbgp(
"rtop: ignore end doc");
7412 _line_progressed(3u);
7413 _maybe_skip_whitespace_tokens();
7417 else if(first ==
'%')
7419 _c4dbgpf(
"directive: {}", rem);
7420 if(C4_UNLIKELY(!m_doc_empty && has_none(
NDOC)))
7421 _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks,
"need document footer before directives");
7422 _handle_directive(rem);
7428 char first = rem.str[0];
7432 m_evt_handler->check_trailing_doc_token();
7434 m_doc_empty =
false;
7435 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7436 if(C4_LIKELY( ! _annotations_require_key_container()))
7438 _c4dbgp(
"it's a seq, flow");
7439 _handle_annotations_before_blck_val_scalar();
7440 m_evt_handler->begin_seq_val_flow();
7442 _set_indentation(startindent);
7446 _c4dbgp(
"start new block map, set flow seq as key (!)");
7447 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7448 m_evt_handler->begin_map_val_block();
7450 _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
7451 m_evt_handler->begin_seq_key_flow();
7453 _set_indentation(startindent);
7455 _line_progressed(1);
7457 else if(first ==
'{')
7459 m_evt_handler->check_trailing_doc_token();
7461 m_doc_empty =
false;
7462 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
7463 if(C4_LIKELY( ! _annotations_require_key_container()))
7465 _c4dbgp(
"it's a map, flow");
7466 _handle_annotations_before_blck_val_scalar();
7467 m_evt_handler->begin_map_val_flow();
7469 _set_indentation(startindent);
7473 _c4dbgp(
"start new block map, set flow map as key (!)");
7474 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7475 m_evt_handler->begin_map_val_block();
7477 _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
7478 m_evt_handler->begin_map_key_flow();
7480 _set_indentation(startindent);
7482 _line_progressed(1);
7484 else if(first ==
'-' && _is_blck_token(rem))
7486 _c4dbgp(
"it's a seq, block");
7487 m_evt_handler->check_trailing_doc_token();
7489 _handle_annotations_before_blck_val_scalar();
7490 m_evt_handler->begin_seq_val_block();
7492 m_doc_empty =
false;
7493 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7494 _line_progressed(1);
7495 _maybe_skip_whitespace_tokens();
7497 else if(first ==
'?' && _is_blck_token(rem))
7499 _c4dbgp(
"it's a map + this key is complex");
7500 m_evt_handler->check_trailing_doc_token();
7502 _handle_annotations_before_blck_val_scalar();
7503 m_evt_handler->begin_map_val_block();
7505 m_doc_empty =
false;
7506 m_was_inside_qmrk =
true;
7507 _save_indentation();
7508 _line_progressed(1);
7509 _maybe_skip_whitespace_tokens();
7511 else if(first ==
':' && _is_blck_token(rem))
7515 _c4dbgp(
"it's a map with an empty key");
7516 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7517 const size_t startline = m_evt_handler->m_curr->pos.line;
7518 m_evt_handler->check_trailing_doc_token();
7520 _handle_annotations_before_start_mapblck(startline);
7522 m_evt_handler->begin_map_val_block();
7523 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7524 m_evt_handler->set_key_scalar_plain_empty();
7525 m_doc_empty =
false;
7526 _set_indentation(startindent);
7530 _c4dbgp(
"actually prev val is a key!");
7531 size_t prev_indentation = m_evt_handler->m_curr->indref;
7532 m_evt_handler->actually_val_is_first_key_of_new_map_block();
7533 _set_indentation(prev_indentation);
7536 _line_progressed(1);
7537 _maybe_skip_whitespace_tokens();
7539 else if(first ==
'&')
7541 csubstr anchor = _scan_anchor();
7542 _c4dbgpf(
"anchor! [{}]~~~{}~~~", anchor.len, anchor);
7543 m_evt_handler->check_trailing_doc_token();
7545 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7546 const size_t line = m_evt_handler->m_curr->pos.line;
7547 _add_annotation(&m_pending_anchors, anchor, indentation, line);
7548 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7549 m_doc_empty =
false;
7551 else if(first ==
'*')
7553 csubstr ref = _scan_ref_map();
7554 _c4dbgpf(
"ref! [{}]~~~{}~~~", ref.len, ref);
7555 m_evt_handler->check_trailing_doc_token();
7557 m_doc_empty =
false;
7558 if(!_maybe_scan_following_colon())
7560 _c4dbgp(
"runk: set val ref");
7561 _handle_annotations_before_blck_val_scalar();
7562 m_evt_handler->set_val_ref(ref);
7566 _c4dbgp(
"runk: start new block map, set ref as key");
7567 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7568 const size_t startline = m_evt_handler->m_curr->pos.line;
7569 _handle_annotations_before_start_mapblck(startline);
7570 m_evt_handler->begin_map_val_block();
7571 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7572 m_evt_handler->set_key_ref(ref);
7573 _maybe_skip_whitespace_tokens();
7574 _set_indentation(startindent);
7578 else if(first ==
'!')
7580 csubstr tag = _scan_tag();
7581 _c4dbgpf(
"unk: val tag! [{}]~~~{}~~~", tag.len, tag);
7584 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7585 const size_t line = m_evt_handler->m_curr->pos.line;
7586 _add_annotation(&m_pending_tags, tag, indentation, line);
7590 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
7591 _maybe_skip_whitespace_tokens();
7592 csubstr s = m_evt_handler->m_curr->line_contents.rem;
7595 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7596 const size_t startline = m_evt_handler->m_curr->pos.line;
7601 _c4dbgp(
"runk: scanning single-quoted scalar");
7602 m_evt_handler->check_trailing_doc_token();
7605 m_doc_empty =
false;
7606 sc = _scan_scalar_squot();
7607 if(!_maybe_scan_following_colon())
7609 _c4dbgp(
"runk: set as val");
7610 _handle_annotations_before_blck_val_scalar();
7611 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
7612 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
7616 _c4dbgp(
"runk: start new block map, set scalar as key");
7617 _handle_annotations_before_start_mapblck(startline);
7619 m_evt_handler->begin_map_val_block();
7620 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7621 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7622 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7623 _maybe_skip_whitespace_tokens();
7624 _set_indentation(startindent);
7628 else if(first ==
'"')
7630 _c4dbgp(
"runk: scanning double-quoted scalar");
7631 m_evt_handler->check_trailing_doc_token();
7634 m_doc_empty =
false;
7635 sc = _scan_scalar_dquot();
7636 if(!_maybe_scan_following_colon())
7638 _c4dbgp(
"runk: set as val");
7639 _handle_annotations_before_blck_val_scalar();
7640 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7641 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7645 _c4dbgp(
"runk: start new block map, set double-quoted scalar as key");
7646 _handle_annotations_before_start_mapblck(startline);
7647 m_evt_handler->begin_map_val_block();
7649 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7650 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7651 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7652 _maybe_skip_whitespace_tokens();
7653 _set_indentation(startindent);
7657 else if(first ==
'|')
7659 _c4dbgp(
"runk: scanning block-literal scalar");
7660 m_evt_handler->check_trailing_doc_token();
7663 m_doc_empty =
false;
7665 _scan_block(&sb, startindent);
7666 if(C4_LIKELY(!_maybe_scan_following_colon()))
7668 _c4dbgp(
"runk: set as val");
7669 _handle_annotations_before_blck_val_scalar();
7670 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7671 m_evt_handler->set_val_scalar_literal(maybe_filtered);
7675 _c4err(
"block literal keys must be enclosed in '?'");
7678 else if(first ==
'>')
7680 _c4dbgp(
"runk: scanning block-folded scalar");
7681 m_evt_handler->check_trailing_doc_token();
7684 m_doc_empty =
false;
7686 _scan_block(&sb, startindent);
7687 if(C4_LIKELY(!_maybe_scan_following_colon()))
7689 _c4dbgp(
"runk: set as val");
7690 _handle_annotations_before_blck_val_scalar();
7691 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7692 m_evt_handler->set_val_scalar_folded(maybe_filtered);
7696 _c4err(
"block folded keys must be enclosed in '?'");
7699 else if(_scan_scalar_plain_unk(&sc))
7701 _c4dbgp(
"runk: got a plain scalar");
7702 m_evt_handler->check_trailing_doc_token();
7705 m_doc_empty =
false;
7706 if(!_maybe_scan_following_colon())
7708 _c4dbgp(
"runk: set as val");
7709 _handle_annotations_before_blck_val_scalar();
7710 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7711 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7715 _c4dbgp(
"runk: start new block map, set scalar as key");
7716 _handle_annotations_before_start_mapblck(startline);
7718 m_evt_handler->begin_map_val_block();
7719 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7720 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7721 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7722 _maybe_skip_whitespace_tokens();
7723 _set_indentation(startindent);
7733 template<
class EventHandler>
7734 C4_COLD
void ParseEngine<EventHandler>::_handle_usty()
7736 _c4dbgpf(
"handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7738 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(
BLCK|
FLOW));
7740 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
7743 _c4dbgp(
"usty[RNXT]: finishing!");
7748 _maybe_skip_comment();
7749 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7753 size_t pos = rem.first_not_of(
" \t");
7756 pos = pos !=
npos ? pos : rem.len;
7757 _c4dbgpf(
"skipping indentation of {}", pos);
7758 _line_progressed(pos);
7759 rem = m_evt_handler->m_curr->line_contents.rem;
7762 _c4dbgpf(
"rem is now [{}]~~~{}~~~", rem.len, rem);
7765 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
7766 size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7767 char first = rem.str[0];
7770 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
RMAP));
7771 _c4dbgpf(
"usty[RSEQ]: first='{}'", _c4prc(first));
7774 _c4dbgp(
"usty[RSEQ]: it's a flow seq. merging it");
7776 m_evt_handler->_push();
7778 _set_indentation(startindent);
7779 _line_progressed(1);
7780 _maybe_skip_whitespace_tokens();
7782 else if(first ==
'-' && _is_blck_token(rem))
7784 _c4dbgp(
"usty[RSEQ]: it's a block seq. merging it");
7786 m_evt_handler->_push();
7788 _set_indentation(startindent);
7789 _line_progressed(1);
7790 _maybe_skip_whitespace_tokens();
7794 _c4err(
"can only parse a seq into an existing seq");
7797 else if(has_any(
RMAP))
7799 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ));
7800 _c4dbgpf(
"usty[RMAP]: first='{}'", _c4prc(first));
7803 _c4dbgp(
"usty[RMAP]: it's a flow map. merging it");
7805 _handle_annotations_before_blck_val_scalar();
7806 m_evt_handler->_push();
7808 _set_indentation(startindent);
7809 _line_progressed(1);
7810 _maybe_skip_whitespace_tokens();
7812 else if(first ==
'?' && _is_blck_token(rem))
7814 _c4dbgp(
"usty[RMAP]: it's a block map + this key is complex");
7816 _handle_annotations_before_blck_val_scalar();
7817 m_evt_handler->_push();
7819 m_was_inside_qmrk =
true;
7820 _save_indentation();
7821 _line_progressed(1);
7822 _maybe_skip_whitespace_tokens();
7824 else if(first ==
':' && _is_blck_token(rem))
7826 _c4dbgp(
"usty[RMAP]: it's a map with an empty key");
7828 _handle_annotations_before_blck_val_scalar();
7829 m_evt_handler->_push();
7830 m_evt_handler->set_key_scalar_plain_empty();
7832 _save_indentation();
7833 _line_progressed(1);
7834 _maybe_skip_whitespace_tokens();
7836 else if(rem.begins_with(
'&'))
7838 csubstr anchor = _scan_anchor();
7839 _c4dbgpf(
"usty[RMAP]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
7840 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7841 const size_t line = m_evt_handler->m_curr->pos.line;
7842 _add_annotation(&m_pending_anchors, anchor, indentation, line);
7843 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7845 else if(first ==
'*')
7847 csubstr ref = _scan_ref_map();
7848 _c4dbgpf(
"usty[RMAP]: ref! [{}]~~~{}~~~", ref.len, ref);
7849 if(!_maybe_scan_following_colon())
7851 _c4err(
"cannot read a VAL to a map");
7855 _c4dbgp(
"usty[RMAP]: start new block map, set ref as key");
7856 const size_t startline = m_evt_handler->m_curr->pos.line;
7858 _handle_annotations_before_start_mapblck(startline);
7859 m_evt_handler->_push();
7860 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7861 m_evt_handler->set_key_ref(ref);
7862 _maybe_skip_whitespace_tokens();
7863 _set_indentation(startindent);
7867 else if(first ==
'!')
7869 csubstr tag = _scan_tag();
7870 _c4dbgpf(
"usty[RMAP]: val tag! [{}]~~~{}~~~", tag.len, tag);
7873 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
7874 const size_t line = m_evt_handler->m_curr->pos.line;
7875 _add_annotation(&m_pending_tags, tag, indentation, line);
7877 else if(first ==
'[' || (first ==
'-' && _is_blck_token(rem)))
7879 _c4err(
"cannot parse a seq into an existing map");
7883 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
7884 startindent = m_evt_handler->m_curr->line_contents.indentation;
7885 const size_t startline = m_evt_handler->m_curr->pos.line;
7887 _c4dbgpf(
"usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
7890 _c4dbgp(
"usty[RMAP]: scanning single-quoted scalar");
7891 sc = _scan_scalar_squot();
7892 if(!_maybe_scan_following_colon())
7894 _c4err(
"cannot read a VAL to a map");
7898 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
7900 _handle_annotations_before_start_mapblck(startline);
7901 m_evt_handler->_push();
7902 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7903 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7904 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7905 _set_indentation(startindent);
7907 _maybe_skip_whitespace_tokens();
7910 else if(first ==
'"')
7912 _c4dbgp(
"usty[RMAP]: scanning double-quoted scalar");
7913 sc = _scan_scalar_dquot();
7914 if(!_maybe_scan_following_colon())
7916 _c4err(
"cannot read a VAL to a map");
7920 _c4dbgp(
"usty[RMAP]: start new block map, set double-quoted scalar as key");
7922 _handle_annotations_before_start_mapblck(startline);
7923 m_evt_handler->_push();
7924 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7925 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7926 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7927 _set_indentation(startindent);
7929 _maybe_skip_whitespace_tokens();
7932 else if(first ==
'|')
7934 _c4err(
"block literal keys must be enclosed in '?'");
7936 else if(first ==
'>')
7938 _c4err(
"block literal keys must be enclosed in '?'");
7940 else if(_scan_scalar_plain_unk(&sc))
7942 _c4dbgp(
"usty[RMAP]: got a plain scalar");
7943 if(!_maybe_scan_following_colon())
7945 _c4err(
"cannot read a VAL to a map");
7949 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
7951 _handle_annotations_before_start_mapblck(startline);
7952 m_evt_handler->_push();
7953 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7954 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
7955 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7956 _set_indentation(startindent);
7958 _maybe_skip_whitespace_tokens();
7969 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ));
7970 _c4dbgpf(
"usty[UNK]: first='{}'", _c4prc(first));
7973 _c4dbgp(
"usty[UNK]: it's a flow seq");
7975 _handle_annotations_before_blck_val_scalar();
7976 m_evt_handler->begin_seq_val_flow();
7978 _set_indentation(startindent);
7979 _line_progressed(1);
7980 _maybe_skip_whitespace_tokens();
7982 else if(first ==
'-' && _is_blck_token(rem))
7984 _c4dbgp(
"usty[UNK]: it's a block seq");
7986 _handle_annotations_before_blck_val_scalar();
7987 m_evt_handler->begin_seq_val_block();
7989 _set_indentation(startindent);
7990 _line_progressed(1);
7991 _maybe_skip_whitespace_tokens();
7993 else if(first ==
'{')
7995 _c4dbgp(
"usty[UNK]: it's a flow map");
7997 _handle_annotations_before_blck_val_scalar();
7998 m_evt_handler->begin_map_val_flow();
8000 _set_indentation(startindent);
8001 _line_progressed(1);
8002 _maybe_skip_whitespace_tokens();
8004 else if(first ==
'?' && _is_blck_token(rem))
8006 _c4dbgp(
"usty[UNK]: it's a map + this key is complex");
8008 _handle_annotations_before_blck_val_scalar();
8009 m_evt_handler->begin_map_val_block();
8011 m_was_inside_qmrk =
true;
8012 _save_indentation();
8013 _line_progressed(1);
8014 _maybe_skip_whitespace_tokens();
8016 else if(first ==
':' && _is_blck_token(rem))
8018 _c4dbgp(
"usty[UNK]: it's a map with an empty key");
8020 _handle_annotations_before_blck_val_scalar();
8021 m_evt_handler->begin_map_val_block();
8022 m_evt_handler->set_key_scalar_plain_empty();
8024 _save_indentation();
8025 _line_progressed(1);
8026 _maybe_skip_whitespace_tokens();
8028 else if(first ==
'&')
8030 csubstr anchor = _scan_anchor();
8031 _c4dbgpf(
"usty[UNK]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
8032 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8033 const size_t line = m_evt_handler->m_curr->pos.line;
8034 _add_annotation(&m_pending_anchors, anchor, indentation, line);
8035 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8037 else if(first ==
'*')
8039 csubstr ref = _scan_ref_map();
8040 _c4dbgpf(
"usty[UNK]: ref! [{}]~~~{}~~~", ref.len, ref);
8041 if(!_maybe_scan_following_colon())
8043 _c4dbgp(
"usty[UNK]: set val ref");
8044 _handle_annotations_before_blck_val_scalar();
8045 m_evt_handler->set_val_ref(ref);
8049 _c4dbgp(
"usty[UNK]: start new block map, set ref as key");
8050 const size_t startline = m_evt_handler->m_curr->pos.line;
8052 _handle_annotations_before_start_mapblck(startline);
8053 m_evt_handler->begin_map_val_block();
8054 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8055 m_evt_handler->set_key_ref(ref);
8056 _maybe_skip_whitespace_tokens();
8057 _set_indentation(startindent);
8061 else if(first ==
'!')
8063 csubstr tag = _scan_tag();
8064 _c4dbgpf(
"usty[UNK]: val tag! [{}]~~~{}~~~", tag.len, tag);
8067 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8068 const size_t line = m_evt_handler->m_curr->pos.line;
8069 _add_annotation(&m_pending_tags, tag, indentation, line);
8073 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL));
8074 startindent = m_evt_handler->m_curr->line_contents.indentation;
8075 const size_t startline = m_evt_handler->m_curr->pos.line;
8078 _c4dbgpf(
"usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
8081 _c4dbgp(
"usty[UNK]: scanning single-quoted scalar");
8082 sc = _scan_scalar_squot();
8083 if(!_maybe_scan_following_colon())
8085 _c4dbgp(
"usty[UNK]: set as val");
8086 _handle_annotations_before_blck_val_scalar();
8087 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8088 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8093 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
8095 _handle_annotations_before_start_mapblck(startline);
8096 m_evt_handler->begin_map_val_block();
8097 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8098 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8099 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8100 _set_indentation(startindent);
8102 _maybe_skip_whitespace_tokens();
8105 else if(first ==
'"')
8107 _c4dbgp(
"usty[UNK]: scanning double-quoted scalar");
8108 sc = _scan_scalar_dquot();
8109 if(!_maybe_scan_following_colon())
8111 _c4dbgp(
"usty[UNK]: set as val");
8112 _handle_annotations_before_blck_val_scalar();
8113 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8114 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8119 _c4dbgp(
"usty[UNK]: start new block map, set double-quoted scalar as key");
8121 _handle_annotations_before_start_mapblck(startline);
8122 m_evt_handler->begin_map_val_block();
8123 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8124 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8125 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8126 _set_indentation(startindent);
8128 _maybe_skip_whitespace_tokens();
8131 else if(first ==
'|')
8133 _c4dbgp(
"usty[UNK]: scanning block-literal scalar");
8135 _scan_block(&sb, startindent);
8136 _c4dbgp(
"usty[UNK]: set as val");
8137 _handle_annotations_before_blck_val_scalar();
8138 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8139 m_evt_handler->set_val_scalar_literal(maybe_filtered);
8142 else if(first ==
'>')
8144 _c4dbgp(
"usty[UNK]: scanning block-folded scalar");
8146 _scan_block(&sb, startindent);
8147 _c4dbgp(
"usty[UNK]: set as val");
8148 _handle_annotations_before_blck_val_scalar();
8149 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8150 m_evt_handler->set_val_scalar_folded(maybe_filtered);
8153 else if(_scan_scalar_plain_unk(&sc))
8155 _c4dbgp(
"usty[UNK]: got a plain scalar");
8156 if(!_maybe_scan_following_colon())
8158 _c4dbgp(
"usty[UNK]: set as val");
8159 _handle_annotations_before_blck_val_scalar();
8160 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8161 m_evt_handler->set_val_scalar_plain(maybe_filtered);
8166 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
8168 _handle_annotations_before_start_mapblck(startline);
8169 m_evt_handler->begin_map_val_block();
8170 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8171 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8172 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8173 _set_indentation(startindent);
8175 _maybe_skip_whitespace_tokens();
8189 template<
class EventHandler>
8192 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8196 m_evt_handler->start_parse(filename.str, &_s_relocate_arena,
this);
8197 m_evt_handler->begin_stream();
8198 while( ! _finished_file())
8201 while( ! _finished_line())
8204 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8209 else if(has_any(
RMAP))
8213 else if(has_any(
RUNK))
8219 _c4err(
"internal error");
8222 if(_finished_file())
8227 m_evt_handler->finish_parse();
8233 template<
class EventHandler>
8236 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8240 m_evt_handler->start_parse(filename.str, &_s_relocate_arena,
this);
8241 m_evt_handler->begin_stream();
8242 while( ! _finished_file())
8245 while( ! _finished_line())
8248 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty());
8259 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
8268 else if(has_any(
BLCK))
8272 _handle_seq_block();
8276 _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP));
8277 _handle_map_block();
8280 else if(has_any(
RUNK))
8284 else if(has_any(
USTY))
8290 _c4err(
"internal error");
8293 if(_finished_file())
8298 m_evt_handler->finish_parse();
8307 #undef _c4dbgnextline
8309 #if defined(_MSC_VER)
8310 # pragma warning(pop)
8311 #elif defined(__clang__)
8312 # pragma clang diagnostic pop
8313 #elif defined(__GNUC__)
8314 # pragma GCC diagnostic pop
Lightweight generic type-safe wrappers for converting individual values to/from strings.
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&) noexcept
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
#define RYML_ERRMSG_SIZE
size for the error message buffer
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
size_t to_chars(substr buf, uint8_t v) noexcept
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
@ npos
a null string position
@ RTOP
reading at top level
@ BLCK
reading in block mode
@ RSET
the (implicit) map being read is a !!set.
@ RNXT
read next val or keyval
@ FLOW
reading is inside explicit flow chars: [] or {}
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a scalar as key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ QSCL
stored scalar was quoted
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a scalar as val
int ParserFlag_t
data type for ParserState_e
#define _RYML_WITHOUT_TAB_TOKENS(...)
#define _ryml_relocate(s)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _RYML_WITH_TAB_TOKENS(...)
Options to give to the parser to control its behavior.
utilities for UTF and Byte Order Mark