1#ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2#define _C4_YML_PARSE_ENGINE_DEF_HPP_
4#ifndef _C4_YML_PARSE_ENGINE_HPP_
7#ifndef _C4_CHARCONV_HPP_
13#ifndef _C4_YML_FILTER_PROCESSOR_HPP_
16#ifndef _C4_YML_TAG_HPP_
19#ifndef _C4_YML_NODE_TYPE_HPP_
23#ifndef _C4_YML_DETAIL_DBGPRINT_HPP_
24#include "c4/yml/detail/dbgprint.hpp"
32 do { RYML_DEBUG_BREAK(); this->_err(RYML_LOC_HERE(), __VA_ARGS__); } while(0)
35 this->_err(RYML_LOC_HERE(), __VA_ARGS__)
37#define _c4assert(...) \
38 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, __VA_ARGS__, m_evt_handler->m_curr->pos)
41#if defined(RYML_WITH_TAB_TOKENS)
42#define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
43#define _RYML_WITHOUT_TAB_TOKENS(...)
44#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
46#define _RYML_WITH_TAB_TOKENS(...)
47#define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
48#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
52#ifndef RYML_SAVE_TEST_YAML
53#define _RYML_SAVE_TEST_YAML(filename, src)
54#define _RYML_SAVE_TEST_JSON(filename, src)
56#define _RYML_SAVE_TEST_YAML(filename, src) c4::yml::ryml_save_test_yaml(filename, src)
57#define _RYML_SAVE_TEST_JSON(filename, src) c4::yml::ryml_save_test_json(filename, src)
68#define _c4dbgnextline() \
70 _c4dbgq("\n-----------"); \
71 _c4dbgt("handling line={}, offset={}B", \
72 m_evt_handler->m_curr->pos.line, \
73 m_evt_handler->m_curr->pos.offset); \
77C4_SUPPRESS_WARNING_MSVC_PUSH
78C4_SUPPRESS_WARNING_MSVC(4296)
79C4_SUPPRESS_WARNING_MSVC(4702)
80C4_SUPPRESS_WARNING_GCC_CLANG_PUSH
81C4_SUPPRESS_WARNING_GCC_CLANG(
"-Wtype-limits")
82C4_SUPPRESS_WARNING_GCC_CLANG(
"-Wformat-nonliteral")
83C4_SUPPRESS_WARNING_GCC_CLANG(
"-Wold-style-cast")
84#if defined(__GNUC__) && (__GNUC__ >= 6)
85C4_SUPPRESS_WARNING_GCC(
"-Wnull-dereference")
87#if defined(__GNUC__) && (__GNUC__ >= 7)
88C4_SUPPRESS_WARNING_GCC(
"-Wduplicated-branches")
98C4_HOT C4_ALWAYS_INLINE
void _set_first(
substr &C4_RESTRICT subject,
size_t pos)
noexcept
101 subject.len = pos !=
npos ? pos : subject.len;
103C4_HOT C4_ALWAYS_INLINE
void _set_first(
csubstr &C4_RESTRICT subject,
size_t pos)
noexcept
106 subject.len = pos !=
npos ? pos : subject.len;
108C4_HOT C4_ALWAYS_INLINE
void _set_first_strict(
substr &C4_RESTRICT subject,
size_t pos)
RYML_NOEXCEPT
111 _RYML_ASSERT_BASIC(pos !=
npos);
114C4_HOT C4_ALWAYS_INLINE
void _set_first_strict(
csubstr &C4_RESTRICT subject,
size_t pos)
RYML_NOEXCEPT
117 _RYML_ASSERT_BASIC(pos !=
npos);
123 _RYML_ASSERT_BASIC(s.len > 0);
124 _RYML_ASSERT_BASIC(s.str[0] ==
'-' || s.str[0] ==
':' || s.str[0] ==
'?');
128C4_HOT C4_ALWAYS_INLINE
bool _is_blck_seq_token_maybe(
csubstr const& C4_RESTRICT s)
noexcept
130 return ((s.len >= 1) && (s.str[0] ==
'-') && ((s.len == 1) || ((s.str[1] ==
' ')
_RYML_WITH_TAB_TOKENS( || (s.str[1] ==
'\t')))));
135 _RYML_ASSERT_BASIC(s.begins_with(
'-'));
136 _RYML_ASSERT_BASIC(!s.ends_with(
"\n"));
137 _RYML_ASSERT_BASIC(!s.ends_with(
"\r"));
138 return (s.len >= 3 && s.str[1] ==
'-' && s.str[2] ==
'-')
144 _RYML_ASSERT_BASIC(s.begins_with(
'.'));
145 _RYML_ASSERT_BASIC(!s.ends_with(
"\n"));
146 _RYML_ASSERT_BASIC(!s.ends_with(
"\r"));
147 return (s.len >= 3 && s.str[1] ==
'.' && s.str[2] ==
'.')
151inline bool _is_doc_token(
csubstr s)
noexcept
159 return (s.str[1] ==
'-' && s.str[2] ==
'-')
163 return (s.str[1] ==
'.' && s.str[2] ==
'.')
172 _RYML_ASSERT_BASIC(s.len);
176 return s.begins_with(
"false") ? 5u : 0u;
178 return s.begins_with(
"true") ? 4u : 0u;
180 return s.begins_with(
"null") ? 4u : 0u;
188C4_ALWAYS_INLINE
size_t _extend_from_combined_newline(
char nl,
char following)
190 return (nl ==
'\n' && following ==
'\r') || (nl ==
'\r' && following ==
'\n');
196 size_t nlpos = rem.
first_of(
"\r\n");
199 const char nl = rem[nlpos];
200 rem = rem.right_of(nlpos);
203 if(_extend_from_combined_newline(nl, rem.front()))
211inline size_t _count_following_newlines(
csubstr r,
size_t *C4_RESTRICT i)
213 _RYML_ASSERT_BASIC(r[*i] ==
'\n');
214 size_t numnl_following = 0;
216 for( ; *i < r.len; ++(*i))
218 if(r.str[*i] ==
'\n')
221 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
226 return numnl_following;
231inline size_t _count_following_newlines(
csubstr r,
size_t *C4_RESTRICT i,
size_t indentation)
233 _RYML_ASSERT_BASIC(r[*i] ==
'\n');
234 size_t numnl_following = 0;
238 for( ; *i < r.len; ++(*i))
240 const char c = r.str[*i];
244 else if(c !=
' ' && c !=
'\t' && c !=
'\r')
250 for( ; *i < r.len; ++(*i))
257 size_t stop = *i + indentation;
258 for( ; *i < r.len; ++(*i))
261 if(c !=
' ' && c !=
'\r')
263 _RYML_ASSERT_BASIC(*i < stop);
268 else if(c !=
' ' && c !=
'\t' && c !=
'\r')
274 return numnl_following;
284template<
class EventHandler>
291template<
class EventHandler>
294 , m_evt_handler(evt_handler)
295 , m_pending_anchors()
297 , m_has_directives_yaml(false)
298 , m_has_directives(false)
301 , m_prev_val_end(
npos)
303 , m_newline_offsets()
304 , m_newline_offsets_size(0)
305 , m_newline_offsets_capacity(0)
307 _RYML_CHECK_BASIC(evt_handler);
310template<
class EventHandler>
312 : m_options(that.m_options)
313 , m_evt_handler(that.m_evt_handler)
314 , m_pending_anchors(that.m_pending_anchors)
315 , m_pending_tags(that.m_pending_tags)
316 , m_has_directives_yaml(that.m_has_directives_yaml)
317 , m_has_directives(that.m_has_directives)
318 , m_doc_empty(that.m_doc_empty)
320 , m_prev_val_end(
npos)
322 , m_newline_offsets(that.m_newline_offsets)
323 , m_newline_offsets_size(that.m_newline_offsets_size)
324 , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
329template<
class EventHandler>
331 : m_options(that.m_options)
332 , m_evt_handler(that.m_evt_handler)
333 , m_pending_anchors(that.m_pending_anchors)
334 , m_pending_tags(that.m_pending_tags)
335 , m_has_directives_yaml(that.m_has_directives_yaml)
336 , m_has_directives(that.m_has_directives)
337 , m_doc_empty(that.m_doc_empty)
339 , m_prev_val_end(
npos)
341 , m_newline_offsets()
342 , m_newline_offsets_size()
343 , m_newline_offsets_capacity()
345 if(that.m_newline_offsets_capacity)
347 _resize_locations(that.m_newline_offsets_capacity);
348 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
349 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
350 m_newline_offsets_size = that.m_newline_offsets_size;
354template<
class EventHandler>
358 m_options = (that.m_options);
359 m_evt_handler = that.m_evt_handler;
360 m_pending_anchors = that.m_pending_anchors;
361 m_pending_tags = that.m_pending_tags;
362 m_has_directives_yaml = that.m_has_directives_yaml;
363 m_has_directives = that.m_has_directives;
364 m_doc_empty = that.m_doc_empty;
365 m_prev_colon = that.m_prev_colon;
366 m_prev_val_end = that.m_prev_val_end;
367 m_encoding = that.m_encoding;
368 m_newline_offsets = (that.m_newline_offsets);
369 m_newline_offsets_size = (that.m_newline_offsets_size);
370 m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
375template<
class EventHandler>
381 m_options = (that.m_options);
382 m_evt_handler = that.m_evt_handler;
383 m_pending_anchors = that.m_pending_anchors;
384 m_pending_tags = that.m_pending_tags;
385 m_has_directives_yaml = that.m_has_directives_yaml;
386 m_has_directives = that.m_has_directives;
387 m_doc_empty = that.m_doc_empty;
388 m_prev_colon = that.m_prev_colon;
389 m_prev_val_end = that.m_prev_val_end;
390 m_encoding = that.m_encoding;
391 if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
392 _resize_locations(that.m_newline_offsets_capacity);
393 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
394 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
395 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size *
sizeof(
size_t));
396 m_newline_offsets_size = that.m_newline_offsets_size;
401template<
class EventHandler>
402void ParseEngine<EventHandler>::_clr()
406 m_pending_anchors = {};
408 m_has_directives_yaml =
false;
409 m_has_directives =
false;
412 m_prev_val_end =
npos;
414 m_newline_offsets = {};
415 m_newline_offsets_size = {};
416 m_newline_offsets_capacity = {};
419template<
class EventHandler>
420void ParseEngine<EventHandler>::_free()
422 if(m_newline_offsets)
424 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
425 m_newline_offsets =
nullptr;
426 m_newline_offsets_size = 0u;
427 m_newline_offsets_capacity = 0u;
434template<
class EventHandler>
435void ParseEngine<EventHandler>::_reset()
437 m_pending_anchors = {};
439 m_has_directives_yaml =
false;
440 m_has_directives =
false;
443 m_prev_val_end =
npos;
447 if(m_options.locations())
449 _prepare_locations();
456template<
class EventHandler>
457void ParseEngine<EventHandler>::_relocate_arena(
csubstr prev_arena,
substr next_arena,
substr *other)
459 _c4dbgp(
"relocate to new arena");
460 const char *pb = prev_arena.str;
461 const char *pe = prev_arena.str + prev_arena.len;
462 #define _ryml_relocate(s) \
463 if((s).str >= pb && (s).str <= pe) \
465 (s).str = next_arena.str + ((s).str - pb); \
474 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
479 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
486 for(
size_t i = 0, sz = tds.size(); i < sz; ++i)
493 TagCache &tch = m_evt_handler->tag_cache();
494 for(
id_type i = 0, sz = tch.m_entries.size(); i < sz; ++i)
504 #undef _ryml_relocate
508template<
class EventHandler>
511 csubstr prev = m_evt_handler->arena();
512 substr out = m_evt_handler->alloc_arena(len);
513 substr curr = m_evt_handler->arena();
514 if(curr.str != prev.str)
515 _relocate_arena(prev, curr, other);
524template<
class EventHandler>
525template<
class DumpFn>
528 ParserState const *
const C4_RESTRICT st = m_evt_handler->m_curr;
535 csubstr m_file = m_evt_handler->m_curr->pos.name;
538 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{}:", m_file);
539 offs += m_file.len + 1;
541 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{}:{}: ", st->pos.line, st->pos.col);
542 csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
544 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{}{} (size={})\n",
escaped_scalar(maybe_full_content,
true), maybe_ellipsis, contents.len);
546 size_t firstcol = (size_t)(lc.rem.str - lc.full.str);
547 size_t lastcol = firstcol + lc.rem.len;
550 for(
size_t i = 0; i < offs + firstcol_adj; ++i)
551 std::forward<DumpFn>(dumpfn)(
" ");
552 std::forward<DumpFn>(dumpfn)(
"^");
553 for(
size_t i = 1, e = (len < 80u ? len : 80u); i < e; ++i)
554 std::forward<DumpFn>(dumpfn)(
"~");
555 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
559 std::forward<DumpFn>(dumpfn)(
"\n");
564 _dbg_dump(std::forward<DumpFn>(dumpfn),
"top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
568template<
class EventHandler>
574 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(buf, s.flags));
578template<
class EventHandler>
582 _print_state_stack(buf);
589template<
class EventHandler>
590template<
class ...Args>
591C4_NORETURN C4_NO_INLINE
void ParseEngine<EventHandler>::_err(
Location const& cpploc,
Location const& ymlloc,
const char* fmt, Args
const& ...args)
const
593 m_evt_handler->cancel_parse();
597template<
class EventHandler>
598template<
class ...Args>
599C4_NORETURN C4_NO_INLINE
void ParseEngine<EventHandler>::_err(
Location const& cpploc,
const char *fmt, Args
const& ...args)
const
601 m_evt_handler->cancel_parse();
602 err_parse(m_evt_handler->m_stack.m_callbacks,
ErrorDataParse{cpploc, m_evt_handler->m_curr->pos}, fmt, args...);
608template<
class EventHandler>
609template<
class ...Args>
614 _dbg_printf(fmt, args...);
616 _fmt_msg(_dbg_dumper);
623template<
class EventHandler>
624bool ParseEngine<EventHandler>::_finished_file()
const
626 bool ret = m_evt_handler->m_curr->pos.offset >= _buf().len;
629 _c4dbgp(
"finished file!!!");
634template<
class EventHandler>
635C4_HOT C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_finished_line() const
637 return m_evt_handler->m_curr->line_contents.rem.empty();
643template<
class EventHandler>
644void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
646 if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' _RYML_WITH_TAB_TOKENS(|| m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')))
650 pos = m_evt_handler->m_curr->line_contents.rem.len;
651 _c4dbgpf(
"skip {} whitespace characters", pos);
652 _line_progressed(pos);
656template<
class EventHandler>
657void ParseEngine<EventHandler>::_maybe_skipchars(
char c)
659 if(m_evt_handler->m_curr->line_contents.rem.len && m_evt_handler->m_curr->line_contents.rem.str[0] == c)
661 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(c);
663 pos = m_evt_handler->m_curr->line_contents.rem.len;
664 _c4dbgpf(
"skip {}x'{}'", pos, _c4prc(c));
665 _line_progressed(pos);
669template<
class EventHandler>
671void ParseEngine<EventHandler>::_skipchars(
const char (&chars)[N])
673 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars), m_evt_handler->m_curr->pos);
674 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
676 pos = m_evt_handler->m_curr->line_contents.rem.len;
677 _c4dbgpf(
"skip {} characters", pos);
678 _line_progressed(pos);
681template<
class EventHandler>
682void ParseEngine<EventHandler>::_skip_comment()
684 LineContents const& C4_RESTRICT lc = m_evt_handler->m_curr->line_contents;
685 const size_t col = m_evt_handler->m_curr->pos.col - 1u;
686 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, lc.rem.begins_with(
'#'), m_evt_handler->m_curr->pos);
687 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, lc.rem.is_sub(lc.full), m_evt_handler->m_curr->pos);
688 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col >= 1, m_evt_handler->m_curr->pos);
689 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, col == ((
size_t)(lc.rem.str - lc.full.str)), m_evt_handler->m_curr->pos);
691 if(lc.rem.str != lc.full.str)
693 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, col > 0, m_evt_handler->m_curr->pos);
694 const char prev = lc.full.str[col - 1u];
695 if(C4_UNLIKELY(prev !=
' ' && prev !=
'\t'))
696 _c4err(
"comment not preceded by whitespace");
698 _c4dbgpf(
"comment was '{}'", m_evt_handler->m_curr->line_contents.rem);
699 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
702template<
class EventHandler>
703void ParseEngine<EventHandler>::_maybe_skip_comment_strict()
705 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
708 if(
'#' == m_evt_handler->m_curr->line_contents.rem[pos])
710 _line_progressed(pos);
716template<
class EventHandler>
717void ParseEngine<EventHandler>::_maybe_skip_comment()
719 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
722 if(
'#' == m_evt_handler->m_curr->line_contents.rem[pos])
724 _line_progressed(pos);
730 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
734template<
class EventHandler>
735bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
737 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
740 if(
':' == m_evt_handler->m_curr->line_contents.rem[pos])
744 if(++pos < m_evt_handler->m_curr->line_contents.rem.len)
746 const char next = m_evt_handler->m_curr->line_contents.rem.str[pos];
752 _line_progressed(pos);
758 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
766template<
class EventHandler>
767csubstr ParseEngine<EventHandler>::_scan_anchor()
769 csubstr s = m_evt_handler->m_curr->line_contents.rem;
770 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'&'), m_evt_handler->m_curr->pos);
772 _line_progressed(1u + anchor.len);
773 _maybe_skipchars(
' ');
777template<
class EventHandler>
778csubstr ParseEngine<EventHandler>::_scan_ref_seq()
780 csubstr s = m_evt_handler->m_curr->line_contents.rem;
781 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'), m_evt_handler->m_curr->pos);
782 _set_first(s, s.first_of(
" ,]\t"));
783 _line_progressed(s.len);
787template<
class EventHandler>
788csubstr ParseEngine<EventHandler>::_scan_ref_map()
790 csubstr s = m_evt_handler->m_curr->line_contents.rem;
791 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'), m_evt_handler->m_curr->pos);
792 _set_first(s, s.first_of(
" ,}\t"));
793 _line_progressed(s.len);
797template<
class EventHandler>
798csubstr ParseEngine<EventHandler>::_scan_tag()
800 csubstr t = m_evt_handler->m_curr->line_contents.rem;
801 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.begins_with(
'!'), m_evt_handler->m_curr->pos);
802 if(!t.begins_with(
"!<"))
804 _c4dbgp(
"begins with '!'");
805 _set_first(t, t.first_of(
" ,]}\t"));
806 if(C4_UNLIKELY(t.first_of(
"[{") !=
npos))
808 _line_progressed(t.len);
809 if(m_options.resolve_tags_all() || (m_options.resolve_tags() &&
is_custom_tag(t)))
814 _c4dbgp(
"begins with '!<'");
815 size_t pos = t.find(
'>');
816 if(C4_UNLIKELY(pos ==
npos))
818 _set_first_strict(t, pos+1);
819 _line_progressed(t.len);
822 _maybe_skip_whitespace_tokens();
826template<
class EventHandler>
829 csubstr t = m_evt_handler->m_curr->line_contents.rem;
830 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.begins_with(
'!'), m_evt_handler->m_curr->pos);
831 if(!t.begins_with(
"!<"))
833 _c4dbgp(
"begins with '!'");
834 _set_first(t, t.first_of(
" ,\t"));
835 if(C4_UNLIKELY(t.first_of(
"[{") !=
npos))
837 _line_progressed(t.len);
839 if(m_options.resolve_tags_all() || (m_options.resolve_tags() &&
is_custom_tag(t)))
844 _c4dbgp(
"begins with '!<'");
845 size_t pos = t.find(
'>');
846 if(C4_UNLIKELY(pos ==
npos))
848 _set_first_strict(t, pos+1);
849 _line_progressed(t.len);
853 _maybe_skip_whitespace_tokens();
860template<
class EventHandler>
861bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_block_token(
csubstr s)
863 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
864 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(
":-"), m_evt_handler->m_curr->pos);
865 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count(
'\n') == 0, m_evt_handler->m_curr->pos);
866 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count(
'\r') == 0, m_evt_handler->m_curr->pos);
878 _c4dbgpf(
"not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
888 _c4err(
"invalid token \":{}\"", _c4prc(s.str[1]));
903template<
class EventHandler>
904bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_qmrk(
csubstr s)
906 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
907 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] ==
'?', m_evt_handler->m_curr->pos);
908 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count(
'\n') == 0, m_evt_handler->m_curr->pos);
909 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count(
'\r') == 0, m_evt_handler->m_curr->pos);
916 _c4dbgpf(
"not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
922 _c4err(
"invalid token \"?{}\"", _c4prc(s.str[1]));
936template<
class EventHandler>
937bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(
csubstr s)
939 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.empty(), m_evt_handler->m_curr->pos);
955 _c4dbgpf(
"not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
960 _c4dbgpf(
"suspicious token='{}' len={}", _c4prc(s.str[0]), s.len);
961 return _is_valid_start_scalar_plain_flow_check_block_token(s);
963 _c4dbgpf(
"qmrk='{}' len={}", _c4prc(s.str[0]), s.len);
964 return _is_valid_start_scalar_plain_flow_check_qmrk(s);
972template<
class EventHandler>
973bool ParseEngine<EventHandler>::_scan_scalar_plain_handle_newline(
csubstr s,
size_t offs)
975 _c4dbgpf(
"newl[PLAIN]: found '\\n'. offs={} line={} sofar={}", offs, m_evt_handler->m_curr->pos.line, _prs(s.first(offs),
true));
978 _c4dbgp(
"newl[PLAIN]: buffer continues");
980 size_t next_line_indentation = next_line.
first_not_of(
' ');
981 if(next_line_indentation !=
npos)
983 _c4dbgpf(
"newl[PLAIN]: line={} indentation={} indref={}", m_evt_handler->m_curr->pos.line + 1, next_line_indentation, m_evt_handler->m_curr->indref);
984 next_line = next_line.first(next_line.first_of(
"\n\r"));
985 _c4dbgpf(
"newl[PLAIN]: has indentation. next_line={}", _prs(next_line));
986 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, next_line_indentation <= next_line.len, m_evt_handler->m_curr->pos);
987 if(C4_LIKELY(next_line_indentation >= m_evt_handler->m_curr->indref))
989 _c4dbgp(
"newl[PLAIN]: larger indentation");
990 next_line = next_line.sub(next_line_indentation);
992 else if(C4_UNLIKELY(next_line.len && next_line.triml(
' ').len))
994 _c4dbgp(
"newl[PLAIN]: err, smaller indentation");
995 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
998 if(m_evt_handler->m_curr->line_contents.indentation !=
npos)
999 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
1002 _c4dbgpf(
"newl[PLAIN]: next_line.len={}", next_line.len);
1005 size_t fno = next_line.first_not_of(
" \t");
1009 switch(next_line.str[fno])
1011 case ',':
case ']':
case '#':
1012 _c4dbgpf(
"newl[PLAIN]: found terminating character beginning next line: '{}'", next_line.str[fno]);
1015 _c4dbgp(
"newl[PLAIN]: found :");
1016 if(fno + 1 == next_line.len || _is_blck_token(next_line.sub(fno)))
1018 _c4dbgpf(
"newl[PLAIN]: found terminating character beginning next line: '{}'", next_line.str[fno]);
1027 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
1033template<
class EventHandler>
1034bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
1036 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP), m_evt_handler->m_curr->pos);
1037 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1038 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ|
RSEQIMAP), m_evt_handler->m_curr->pos);
1039 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1040 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL), m_evt_handler->m_curr->pos);
1042 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
' '), m_evt_handler->m_curr->pos);
1043 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
'\n'), m_evt_handler->m_curr->pos);
1045 if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1048 substr s = _buf().
sub(m_evt_handler->m_curr->pos.offset);
1049 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1051 _c4dbgp(
"scanning seqflow scalar...");
1053 bool needs_filter =
false;
1056 for( ; offs < s.len; ++offs, ++col)
1058 const char c = s.str[offs];
1063 _c4dbgpf(
"found terminating character at {}: '{}'", offs, c);
1064 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1067 _c4dbgpf(
"found '\\n' at col={}", col);
1068 if(!_scan_scalar_plain_handle_newline(s, offs))
1071 needs_filter =
true;
1075 needs_filter =
true;
1078 _c4dbgp(
"found suspicious ':'");
1079 if(s.len > offs + 1)
1081 char next = s.str[offs + 1];
1082 _c4dbgpf(
"next char is '{}'", _c4prc(next));
1088 next = after.str[0];
1089 _c4dbgpf(
"skip \\r to '{}'", _c4prc(next));
1093 if(next ==
' ' _RYML_WITH_TAB_TOKENS(|| next ==
'\t') || next ==
',' || next ==
'\n' || next ==
']')
1095 _c4dbgp(
"map starting!");
1100 _c4dbgp(
"':' nothing to see here");
1105 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len == offs + 1, m_evt_handler->m_curr->pos);
1106 _line_progressed(col);
1107 _c4err(
"missing termination: '{}'", c);
1112 _c4dbgp(
"found suspicious '#'");
1113 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1114 char prev = s.str[offs - 1];
1117 _c4dbgpf(
"found terminating character at {}: '{}'", offs, c);
1125 _line_progressed(col);
1126 _c4err(
"invalid character: '{}'", c);
1129 _c4dbgpf(
"doc token character: '{}', offs={}", c, offs);
1130 if(offs == 0 && m_evt_handler->m_curr->at_line_beginning())
1132 _c4dbgp(
"at line beginning");
1133 if(s.len >= 3 && s.str[1] == c && s.str[2] == c)
1146 _line_progressed(col);
1147 _set_first(s, offs);
1149 sc->needs_filter = needs_filter;
1151 _c4prscalar(
"scanned plain scalar", sc->scalar,
true);
1156template<
class EventHandler>
1157bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
1159 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ) || has_any(
RSEQIMAP), m_evt_handler->m_curr->pos);
1160 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1161 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP|
RSEQIMAP), m_evt_handler->m_curr->pos);
1162 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1163 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK), m_evt_handler->m_curr->pos);
1165 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
' '), m_evt_handler->m_curr->pos);
1166 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
'\n'), m_evt_handler->m_curr->pos);
1168 if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1171 substr s = _buf().
sub(m_evt_handler->m_curr->pos.offset);
1172 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1174 _c4dbgp(
"scanning mapflow scalar...");
1176 bool needs_filter =
false;
1179 for( ; offs < s.len; ++offs, ++col)
1181 const char c = s.str[offs];
1186 _c4dbgpf(
"found terminating character at {}: '{}'", offs, c);
1187 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1190 _c4dbgpf(
"found '\\n' at col={}", col);
1191 if(!_scan_scalar_plain_handle_newline(s, offs))
1194 needs_filter =
true;
1198 needs_filter =
true;
1201 _c4dbgpf(
"found ':'", c);
1205 const char next = s.str[offs+1];
1206 _c4dbgpf(
"next='{}'", c);
1207 if(next ==
' ' || next ==
',' || next ==
'}' || next ==
'\n' || next ==
'\r' _RYML_WITH_TAB_TOKENS(|| next ==
'\t'))
1209 _c4dbgpf(
"found terminating character: '{}'", c);
1216 _line_progressed(col);
1217 _c4err(
"invalid character: '{}'", c);
1220 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQIMAP), m_evt_handler->m_curr->pos);
1229 _line_progressed(col);
1232 sc->needs_filter = needs_filter;
1234 _c4prscalar(
"scanned plain scalar", sc->scalar,
true);
1236 return sc->scalar.len > 0u;
1239template<
class EventHandler>
1240bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1242 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP), m_evt_handler->m_curr->pos);
1243 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1244 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ), m_evt_handler->m_curr->pos);
1245 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1247 substr s = m_evt_handler->m_curr->line_contents.rem;
1248 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '), m_evt_handler->m_curr->pos);
1249 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1251 _c4dbgp(
"seq_json: scanning scalar...");
1258 _c4dbgp(
"seq_json: not a scalar.");
1263 const size_t len = _begins_with_special_json_scalar(s);
1266 char c = s.len > len ? s.str[len] :
',';
1267 if(c ==
',' || c ==
']' || c ==
' ' || c ==
'\n' || c ==
'\t' || c ==
'\r')
1269 sc->scalar = s.first(len);
1270 sc->needs_filter =
false;
1271 _c4dbgpf(
"seq_json: special scalar: '{}'", sc->scalar);
1272 _line_progressed(len);
1284 for( ; i < s.len; ++i)
1286 const char c = s.str[i];
1293 _c4dbgpf(
"seq_json: found terminating character: '{}'", c);
1302 if(C4_LIKELY(i > 0))
1304 _line_progressed(i);
1305 sc->scalar = s.first(i);
1306 sc->needs_filter =
false;
1307 _c4dbgpf(
"seq_json: scalar was {}", _prs(sc->scalar,
true));
1313template<
class EventHandler>
1314bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1316 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ), m_evt_handler->m_curr->pos);
1317 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1318 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP), m_evt_handler->m_curr->pos);
1319 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1320 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL), m_evt_handler->m_curr->pos);
1322 substr s = m_evt_handler->m_curr->line_contents.rem;
1323 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '), m_evt_handler->m_curr->pos);
1324 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1326 _c4dbgp(
"scanning scalar...");
1329 const size_t len = _begins_with_special_json_scalar(s);
1332 char c = s.len > len ? s.str[len] :
',';
1333 _c4dbgpf(
"begins with special scalar: {} next='{}'", s.first(len), _c4prc(c));
1334 if(c ==
',' || c ==
'}' || c ==
' ' || c ==
'\n' || c ==
'\t' || c ==
'\r')
1336 sc->scalar = s.first(len);
1337 sc->needs_filter =
false;
1338 _c4dbgpf(
"special json scalar: '{}'", _prs(sc->scalar));
1339 _line_progressed(len);
1351 for( ; i < s.len; ++i)
1353 const char c = s.str[i];
1360 _c4dbgpf(
"found terminating character: '{}'", c);
1369 if(C4_LIKELY(i > 0))
1371 _line_progressed(i);
1372 sc->scalar = s.first(i);
1373 sc->needs_filter =
false;
1374 _c4dbgpf(
"scalar was {}", _prs(sc->scalar));
1381template<
class EventHandler>
1382bool ParseEngine<EventHandler>::_is_doc_begin(
csubstr s)
1384 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] ==
'-', m_evt_handler->m_curr->pos);
1385 return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_begin_token(s));
1388template<
class EventHandler>
1389bool ParseEngine<EventHandler>::_is_doc_end(
csubstr s)
1391 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] ==
'.', m_evt_handler->m_curr->pos);
1392 return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_end_token(s));
1395template<
class EventHandler>
1396bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc,
size_t indentation)
1398 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1399 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP), m_evt_handler->m_curr->pos);
1400 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK|
RUNK|
USTY), m_evt_handler->m_curr->pos);
1402 substr s = m_evt_handler->m_curr->line_contents.rem;
1403 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '), m_evt_handler->m_curr->pos);
1404 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1409 if(_is_blck_token(s))
1413 else if(_is_doc_begin(s))
1415 _c4dbgp(
"token is doc start");
1421 if(_is_blck_token(s))
1436 _c4dbgp(
"token is doc end");
1442 _c4dbgpf(
"plain scalar! indentation={}", indentation);
1444 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1445 const size_t start_line = m_evt_handler->m_curr->pos.line;
1447 bool needs_filter =
false;
1450 _c4dbgpf(
"plain scalar line: {}", _prs(s));
1451 for(
size_t i = 0; i < s.len; ++i)
1453 const char curr = s.str[i];
1458 _c4dbgpf(
"[{}]: got suspicious ':'", i);
1462 _c4dbgpf(
"followed by '{}'", i+1 == s.len ?
csubstr(
"\\n") : _c4prc(s.str[i+1]));
1463 _line_progressed(i);
1465 if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1467 _c4dbgp(
"start line. scalar ends here");
1472 _c4err(
"multiline scalars cannot be used as implicit keys");
1478 while(j + 1 < s.len && s.str[j+1] ==
':')
1480 _c4dbgp(
"skip colon");
1483 i = j > i ? j-1 : i;
1484 _c4dbgp(
"nothing to see here");
1488 _c4dbgp(
"got suspicious '#'");
1489 if(!i || (s.str[i-1] ==
' ' || s.str[i-1] ==
'\t'))
1491 _c4dbgp(
"comment! scalar ends here");
1492 _line_progressed(i);
1497 _c4dbgp(
"nothing to see here");
1502 _line_progressed(s.len);
1503 csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1504 next_peeked = next_peeked.trimr(
"\n\r");
1505 const size_t next_indentation = next_peeked.first_not_of(
' ');
1506 _c4dbgpf(
"indentation curr={} next={}", indentation, next_indentation);
1507 if(next_indentation < indentation)
1509 _c4dbgp(
"smaller indentation! scalar ended");
1512 else if(next_indentation == 0 && next_peeked.len > 0)
1514 const char first = next_peeked.str[0];
1518 _c4dbgpf(
"doc begin? peeked={}", _prs(next_peeked,
size_t(3)));
1519 if(_is_doc_begin_token(next_peeked))
1521 _c4dbgp(
"doc begin! scalar ended");
1526 _c4dbgpf(
"doc end? peeked={}", _prs(next_peeked,
size_t(3)));
1527 if(_is_doc_end_token(next_peeked))
1529 _c4dbgp(
"doc end! scalar ended");
1536 _c4dbgp(
"next line!");
1537 if(!_finished_file())
1539 _c4dbgp(
"next line!");
1545 _c4dbgp(
"file finished!");
1548 s = m_evt_handler->m_curr->line_contents.rem;
1549 needs_filter =
true;
1554 sc->scalar = _buf().range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(
" \n\r\t");
1555 sc->needs_filter = needs_filter;
1557 _c4dbgpf(
"scalar was {}", _prs(sc->scalar));
1562template<
class EventHandler>
1563C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc)
1565 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP), m_evt_handler->m_curr->pos);
1566 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1567 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP), m_evt_handler->m_curr->pos);
1568 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ), m_evt_handler->m_curr->pos);
1569 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK), m_evt_handler->m_curr->pos);
1570 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL), m_evt_handler->m_curr->pos);
1571 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1574template<
class EventHandler>
1575C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc)
1577 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ), m_evt_handler->m_curr->pos);
1578 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1579 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP), m_evt_handler->m_curr->pos);
1580 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK), m_evt_handler->m_curr->pos);
1581 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK), m_evt_handler->m_curr->pos);
1582 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1585template<
class EventHandler>
1586C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc)
1588 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RUNK|
USTY), m_evt_handler->m_curr->pos);
1589 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1595template<
class EventHandler>
1596substr ParseEngine<EventHandler>::_peek_next_line(
size_t pos)
const
1600 pos = pos ==
npos ? m_evt_handler->m_curr->pos.offset : pos;
1601 if(pos >= _buf().len)
1605 rem = _from_next_line(_buf().sub(pos));
1610 nlpos = rem.first_of(
"\r\n");
1612 nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1613 rem = rem.left_of(nlpos,
true);
1615 _c4dbgpf(
"peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr(
"\r\n"));
1619 _c4dbgpf(
"peek next line @ {}: (len=0)''", pos);
1625template<
class EventHandler>
1626void ParseEngine<EventHandler>::_scan_line()
1628 if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < _buf().len))
1629 m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf(), m_evt_handler->m_curr->pos.offset);
1631 m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf().last(0), 0);
1634template<
class EventHandler>
1635void ParseEngine<EventHandler>::_line_progressed(
size_t ahead)
1637 _c4dbgpf(
"line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}",
1638 m_evt_handler->m_curr->pos.line,
1639 m_evt_handler->m_curr->line_contents.full.len,
1640 ahead, m_evt_handler->m_curr->pos.col,
1641 m_evt_handler->m_curr->pos.col+ahead,
1642 m_evt_handler->m_curr->pos.offset,
1643 m_evt_handler->m_curr->pos.offset+ahead);
1644 m_evt_handler->m_curr->pos.offset += ahead;
1645 m_evt_handler->m_curr->pos.col += ahead;
1646 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.num_cols+1, m_evt_handler->m_curr->pos);
1647 m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1650template<
class EventHandler>
1651void ParseEngine<EventHandler>::_line_ended()
1653 _c4dbgpf(
"line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1654 m_evt_handler->m_curr->pos.line,
1655 m_evt_handler->m_curr->line_contents.full.len,
1656 m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols,
1657 m_evt_handler->m_curr->pos.col, 1);
1658 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.num_cols + 1, m_evt_handler->m_curr->pos);
1659 m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1660 ++m_evt_handler->m_curr->pos.line;
1661 m_evt_handler->m_curr->pos.col = 1;
1664template<
class EventHandler>
1665void ParseEngine<EventHandler>::_line_ended_undo()
1667 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u, m_evt_handler->m_curr->pos);
1668 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u, m_evt_handler->m_curr->pos);
1669 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols, m_evt_handler->m_curr->pos);
1670 const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1671 _c4dbgpf(
"line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1672 m_evt_handler->m_curr->pos.offset -= delta;
1673 --m_evt_handler->m_curr->pos.line;
1674 m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.num_cols + 1u;
1677 m_evt_handler->m_curr->line_contents.rem = _buf().sub(m_evt_handler->m_curr->pos.offset, 0);
1682template<
class EventHandler>
1683void ParseEngine<EventHandler>::_set_indentation(
size_t indentation)
noexcept
1685 m_evt_handler->m_curr->indref = indentation;
1686 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1689template<
class EventHandler>
1690void ParseEngine<EventHandler>::_save_indentation()
1692 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full), m_evt_handler->m_curr->pos);
1693 m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1694 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1697template<
class EventHandler>
1698void ParseEngine<EventHandler>::_mark_seqflow_val_end() noexcept
1700 _c4dbgpf(
"SEQFLOW. mark val end at line={}", m_evt_handler->m_curr->pos.line);
1701 m_prev_val_end = m_evt_handler->m_curr->pos.line;
1707template<
class EventHandler>
1708void ParseEngine<EventHandler>::_flow_container_was_a_key(
size_t orig_indent)
1710 _c4dbgpf(
"flow container is followed by colon! orig_indent={}", orig_indent);
1711 m_evt_handler->actually_val_is_first_key_of_new_map_block();
1713 _set_indentation(orig_indent);
1714 _maybe_skip_whitespace_tokens();
1717template<
class EventHandler>
1718void ParseEngine<EventHandler>::_end_flow_container(
size_t orig_indent,
bool multiline)
1724 _c4dbgp(
"flow container: end as vanilla block map key!");
1725 if(C4_UNLIKELY(multiline))
1726 _c4err(
"multiline key is invalid");
1727 if(C4_UNLIKELY(!_maybe_scan_following_colon()))
1728 _c4err(
"could not find ':' colon after key");
1729 _maybe_skip_whitespace_tokens();
1732 else if(has_none(
RFLOW))
1734 _c4dbgp(
"end_flow_container: now not in flow!");
1735 if(has_any(
RUNK|
RSEQ|
RKCL) && _maybe_scan_following_colon())
1737 if(C4_UNLIKELY(multiline))
1738 _c4err(
"multiline key is invalid");
1739 _flow_container_was_a_key(orig_indent);
1743 _c4dbgp(
"end_flow_container: end map as key!");
1746 else if(has_any(
RSEQ))
1748 _c4dbgp(
"end_flow_container: now in a flow seq");
1749 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1750 _mark_seqflow_val_end();
1754template<
class EventHandler>
1755void ParseEngine<EventHandler>::_end_map_flow()
1757 bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1758 size_t orig_indent = m_evt_handler->m_curr->indref;
1759 _c4dbgpf(
"mapflow: end, multiline={}", multiline);
1760 m_evt_handler->end_map_flow(multiline && m_options.detect_flow_ml());
1761 _end_flow_container(orig_indent, multiline);
1764template<
class EventHandler>
1765void ParseEngine<EventHandler>::_end_seq_flow()
1767 bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1768 size_t orig_indent = m_evt_handler->m_curr->indref;
1769 _c4dbgpf(
"seqflow: end, multiline={}", multiline);
1770 m_evt_handler->end_seq_flow(multiline && m_options.detect_flow_ml());
1771 _end_flow_container(orig_indent, multiline);
1774template<
class EventHandler>
1775void ParseEngine<EventHandler>::_end_map_blck()
1777 _c4dbgp(
"mapblck: end");
1780 _c4dbgp(
"mapblck: set missing val");
1781 _handle_annotations_before_blck_val_scalar();
1782 m_evt_handler->set_val_scalar_plain_empty();
1784 else if(has_any(
QMRK))
1786 _c4dbgp(
"mapblck: set missing keyval");
1787 _handle_annotations_before_blck_key_scalar();
1788 m_evt_handler->set_key_scalar_plain_empty();
1789 _handle_annotations_before_blck_val_scalar();
1790 m_evt_handler->set_val_scalar_plain_empty();
1792 m_evt_handler->end_map_block();
1795template<
class EventHandler>
1796void ParseEngine<EventHandler>::_end_seq_blck()
1800 _c4dbgp(
"seqblck: set missing val");
1801 _handle_annotations_before_blck_val_scalar();
1802 m_evt_handler->set_val_scalar_plain_empty();
1804 m_evt_handler->end_seq_block();
1807template<
class EventHandler>
1808void ParseEngine<EventHandler>::_end2_map()
1810 _c4dbgp(
"map: end");
1811 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP), m_evt_handler->m_curr->pos);
1818 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1819 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
USTY), m_evt_handler->m_curr->pos);
1820 m_evt_handler->_pop();
1824template<
class EventHandler>
1825void ParseEngine<EventHandler>::_end2_seq()
1827 _c4dbgp(
"seq: end");
1828 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ), m_evt_handler->m_curr->pos);
1835 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1836 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
USTY), m_evt_handler->m_curr->pos);
1837 m_evt_handler->_pop();
1841template<
class EventHandler>
1842void ParseEngine<EventHandler>::_begin2_doc()
1844 _c4dbgp(
"begin_doc");
1845 m_has_directives_yaml =
false;
1846 m_has_directives =
false;
1849 m_evt_handler->begin_doc();
1850 m_evt_handler->m_curr->indref = 0;
1853template<
class EventHandler>
1854void ParseEngine<EventHandler>::_begin2_doc_expl()
1856 _c4dbgp(
"begin_doc_expl");
1857 m_has_directives_yaml =
false;
1858 m_has_directives =
false;
1861 m_evt_handler->begin_doc_expl();
1862 m_evt_handler->m_curr->indref = 0;
1865template<
class EventHandler>
1866void ParseEngine<EventHandler>::_end2_doc()
1868 _c4dbgp(
"doc: end");
1869 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC), m_evt_handler->m_curr->pos);
1870 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1872 _c4dbgp(
"doc was empty; add empty val");
1873 _handle_annotations_before_blck_val_scalar();
1874 m_evt_handler->set_val_scalar_plain_empty();
1876 m_evt_handler->end_doc();
1880template<
class EventHandler>
1881void ParseEngine<EventHandler>::_end2_doc_expl()
1883 _c4dbgp(
"doc: end");
1884 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1886 _c4dbgp(
"doc: no children; add empty val");
1887 _handle_annotations_before_blck_val_scalar();
1888 m_evt_handler->set_val_scalar_plain_empty();
1890 m_evt_handler->end_doc_expl();
1894template<
class EventHandler>
1895void ParseEngine<EventHandler>::_maybe_begin_doc()
1899 _c4dbgp(
"doc must be started");
1903template<
class EventHandler>
1904void ParseEngine<EventHandler>::_maybe_end_doc()
1908 _c4dbgp(
"doc must be finished");
1911 else if(m_doc_empty && (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1913 _c4dbgp(
"no doc to finish, but pending annotations");
1914 m_evt_handler->begin_doc();
1915 _handle_annotations_before_blck_val_scalar();
1916 m_evt_handler->set_val_scalar_plain_empty();
1917 m_evt_handler->end_doc();
1921template<
class EventHandler>
1922void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1924 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1, m_evt_handler->m_curr->pos);
1925 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack[0].flags &
RDOC, m_evt_handler->m_curr->pos);
1926 _c4dbgp(
"root is RDOC");
1927 if(m_evt_handler->m_curr->level != 0)
1928 _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1929 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC), m_evt_handler->m_curr->pos);
1934template<
class EventHandler>
1935void ParseEngine<EventHandler>::_check_trailing_doc_token()
1937 const bool is_root = (m_evt_handler->m_stack.size() == 1u);
1938 const bool isndoc = (m_evt_handler->m_curr->flags &
NDOC) != 0;
1939 const bool suspicious = m_evt_handler->template _has_any__<MAP|SEQ|VAL>();
1940 _c4dbgpf(
"target={} isroot={} suspicious={} ndoc={}", m_evt_handler->m_curr->node_id, is_root, suspicious, isndoc);
1941 if((is_root || m_evt_handler->template _has_any__<DOC>()) && suspicious && !isndoc)
1945template<
class EventHandler>
1946void ParseEngine<EventHandler>::_end_doc_suddenly()
1948 _c4dbgp(
"end doc suddenly");
1949 _end_doc_suddenly__pop();
1954template<
class EventHandler>
1955void ParseEngine<EventHandler>::_check_doc_end_tokens()
const
1957 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1958 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !rem.begins_with_any(
". \t"), m_evt_handler->m_curr->pos);
1959 if(C4_UNLIKELY(rem.len && !rem.begins_with(
'#')))
1965template<
class EventHandler>
1966void ParseEngine<EventHandler>::_start_doc_suddenly()
1968 _c4dbgp(
"start doc suddenly");
1969 _end_doc_suddenly__pop();
1974template<
class EventHandler>
1975void ParseEngine<EventHandler>::_end_stream()
1977 _c4dbgpf(
"end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1979 _c4err(
"missing terminating ]");
1980 else if(C4_UNLIKELY(has_all(
RMAP|
RFLOW)))
1981 _c4err(
"missing terminating }");
1982 if(m_evt_handler->m_stack.size() > 1)
1983 _handle_indentation_pop(m_evt_handler->m_stack.begin());
1990 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1994 m_evt_handler->begin_doc();
1995 _handle_annotations_before_blck_val_scalar();
1996 m_evt_handler->set_val_scalar_plain_empty();
1997 m_evt_handler->end_doc();
2001 m_evt_handler->end_stream();
2002 if(C4_UNLIKELY(m_has_directives))
2003 _c4err(
"directives cannot be used without a document");
2007template<
class EventHandler>
2008void ParseEngine<EventHandler>::_handle_indentation_pop(
ParserState const* popto)
2010 _c4dbgpf(
"popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ?
"s" :
""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
2011 while(m_evt_handler->m_curr != popto)
2015 _c4dbgpf(
"popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2018 else if(has_any(
RMAP))
2020 _c4dbgpf(
"popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2028 _c4dbgpf(
"current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
2031template<
class EventHandler>
2032void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
2035 using state_type =
typename EventHandler::state;
2036 state_type
const* popto =
nullptr;
2037 auto &stack = m_evt_handler->m_stack;
2038 _RYML_ASSERT_PARSE_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos);
2039 _RYML_ASSERT_PARSE_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2040 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2042 _print_state_stack();
2044 for(state_type
const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
2046 _c4dbgpf(
"searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
2047 if(s->indref == ind)
2049 _c4dbgpf(
"gotit!!! level={} node={}", s->level, s->node_id);
2054 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2056 _c4err(
"parse error: incorrect indentation?");
2058 _handle_indentation_pop(popto);
2061template<
class EventHandler>
2062void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
2065 using state_type =
typename EventHandler::state;
2066 auto &stack = m_evt_handler->m_stack;
2067 _RYML_ASSERT_PARSE_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos);
2068 _RYML_ASSERT_PARSE_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2069 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2070 state_type
const* popto =
nullptr;
2073 _print_state_stack(flagbuf_);
2075 for(state_type
const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s)
2077 _c4dbgpf(
"searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
2082 else if(s->indref == ind)
2084 _c4dbgpf(
"same indentation!!! level={} node={}", s->level, s->node_id);
2085 if(popto && has_any(
RTOP, s) && has_none(
RMAP|
RSEQ, s))
2092 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2094 _RYML_ASSERT_PARSE_(stack.m_callbacks, first == ind || first ==
npos, m_evt_handler->m_curr->pos);
2095 rem = rem.right_of(first,
true);
2096 _c4dbgpf(
"indentless? rem='{}' first={}", rem, first);
2097 if(rem.begins_with(
'-') && _is_blck_token(rem))
2099 _c4dbgp(
"parent was indentless seq");
2105 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2107 _c4err(
"parse error: incorrect indentation?");
2109 _handle_indentation_pop(popto);
2114template<
class EventHandler>
2115void ParseEngine<EventHandler>::_check_valid_newline_in_quoted_scalar()
2119 _c4err(
"multiline quoted keys are invalid");
2123 const size_t minindent = m_evt_handler->m_curr->indref + ((has_any(
RMAP|
RSEQ) && has_any(
RBLCK)));
2124 _c4dbgpf(
"indent={} vs minindent={} indref={}", m_evt_handler->m_curr->line_contents.indentation, minindent, m_evt_handler->m_curr->indref);
2125 if(m_evt_handler->m_curr->line_contents.indentation < minindent)
2127 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks,
2128 m_evt_handler->m_curr->line_contents.indentation == m_evt_handler->m_curr->line_contents.rem.first_not_of(
' '),
2129 m_evt_handler->m_curr->pos);
2130 csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.
sub(m_evt_handler->m_curr->line_contents.indentation);
2131 _c4dbgpf(
"trimmed.len={} line={}", trimmed.len, _prs(m_evt_handler->m_curr->line_contents.rem,
true));
2132 if(C4_UNLIKELY(!!trimmed.len))
2134 _c4err(
"bad indentation");
2142template<
class EventHandler>
2143typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
2148 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with(
'\''), m_evt_handler->m_curr->pos);
2151 substr s = _buf().
sub(m_evt_handler->m_curr->pos.offset + 1);
2152 _line_progressed(1);
2153 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
2155 bool needs_filter =
false;
2157 while( ! _finished_file())
2159 const csubstr line = m_evt_handler->m_curr->line_contents.rem;
2160 _c4dbgpf(
"scanning single quoted scalar @ line[{}]: {}", m_evt_handler->m_curr->pos.line, _prs(line));
2161 if(C4_UNLIKELY(m_evt_handler->m_curr->at_line_beginning() && _is_doc_token(line)))
2162 _c4err(
"token can not appear at line begin");
2163 for(
size_t i = 0; i < line.len; ++i)
2165 const char curr = line.str[i];
2168 const char next = i+1 < line.len ? line.str[i+1] :
'~';
2171 _line_progressed(i + 1);
2172 pos = i + (size_t)(line.str - s.str);
2177 needs_filter =
true;
2183 needs_filter =
true;
2184 _line_progressed(line.len);
2187 _check_valid_newline_in_quoted_scalar();
2190 _c4err(
"reached end of file while looking for closing quote");
2194 _c4dbgpf(
"found closing quote at: {}", pos);
2195 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos !=
npos, m_evt_handler->m_curr->pos);
2196 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2197 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2198 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() ==
'\'', m_evt_handler->m_curr->pos);
2199 _set_first_strict(s, pos);
2201 _c4prscalar(
"scanned squoted scalar", s,
true);
2203 return ScannedScalar { s, needs_filter };
2208template<
class EventHandler>
2209typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
2214 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with(
'"'), m_evt_handler->m_curr->pos);
2217 substr s = _buf().
sub(m_evt_handler->m_curr->pos.offset + 1);
2218 _line_progressed(1);
2219 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
2221 bool needs_filter =
false;
2223 while( ! _finished_file())
2225 #if defined(__GNUC__) && (__GNUC__ == 13)
2226 C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem);
2228 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2229 _c4dbgpf(
"scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, rem);
2230 if(C4_UNLIKELY(m_evt_handler->m_curr->at_line_beginning() && _is_doc_token(rem)))
2231 _c4err(
"token can not appear at line begin");
2232 for(
size_t i = 0; i < rem.len; ++i)
2234 const char curr = rem.str[i];
2238 const char next = i+1 < rem.len ? rem.str[i+1] :
'~';
2239 needs_filter =
true;
2240 if(next ==
'"' || next ==
'\\')
2243 else if(curr ==
'"')
2245 _line_progressed(i + 1);
2246 pos = i + (size_t)(rem.str - s.str);
2252 needs_filter =
true;
2253 _line_progressed(rem.len);
2256 _check_valid_newline_in_quoted_scalar();
2259 _c4err(
"reached end of file while looking for closing quote");
2263 _c4dbgpf(
"found closing quote at: {}", pos);
2264 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos !=
npos, m_evt_handler->m_curr->pos);
2265 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2266 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2267 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() ==
'"', m_evt_handler->m_curr->pos);
2268 _set_first_strict(s, pos);
2270 _c4prscalar(
"scanned dquoted scalar", s,
true);
2272 return ScannedScalar{s, needs_filter};
2277template<
class EventHandler>
2278void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb,
size_t indref)
2280 _c4dbgpf(
"blck: indref={}", indref);
2281 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, indref !=
npos, m_evt_handler->m_curr->pos);
2284 csubstr s = m_evt_handler->m_curr->line_contents.rem;
2285 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'|') || s.begins_with(
'>'), m_evt_handler->m_curr->pos);
2287 _c4dbgpf(
"blck: specs={}", _prs(s));
2290 BlockChomp_e chomp = CHOMP_CLIP;
2291 size_t indentation =
npos;
2294 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(
"|>"), m_evt_handler->m_curr->pos);
2296 _c4dbgpf(
"blck: spec is multichar: {}", _prs(t));
2297 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.len >= 1, m_evt_handler->m_curr->pos);
2298 size_t pos = t.first_of(
"-+");
2299 _c4dbgpf(
"blck: spec chomp char: pos={}", pos);
2302 _c4dbgpf(
"blck: spec chomp char: {}", _c4prc(t[pos]));
2305 _c4dbgp(
"blck: chomp=STRIP");
2306 chomp = CHOMP_STRIP;
2308 else if(t[pos] ==
'+')
2310 _c4dbgp(
"blck: chomp=KEEP");
2317 _c4dbgpf(
"blck: spec is now: {}", _prs(t));
2320 pos = t.first_not_of(
"0123456789");
2324 _c4dbgpf(
"blck: parse indentation digits: {}", _prs(rest));
2325 if(C4_UNLIKELY(rest.len > 1))
2326 _c4err(
"parse error: invalid indentation");
2327 if(C4_UNLIKELY( !
c4::atou(rest, &indentation)))
2328 _c4err(
"parse error: could not read indentation as decimal");
2329 if(C4_UNLIKELY( ! indentation))
2330 _c4err(
"parse error: null indentation");
2331 _c4dbgpf(
"blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2332 indentation += m_evt_handler->m_curr->indref;
2336 rest = t.triml(
" \t");
2337 _c4dbgpf(
"blck: digits empty. t={} trimmed={} iscomm={} t.iscomm={}", _prs(t), _prs(rest), rest.begins_with(
'#'), t.begins_with(
'#'));
2338 if(C4_UNLIKELY(rest.len && (rest.str[0] !=
'#' || t.str[0] ==
'#')))
2339 _c4err(
"parse error: invalid token");
2343 _c4dbgpf(
"blck: style={} chomp={} indentation={}", s.begins_with(
'>') ?
"fold" :
"literal", chomp==CHOMP_CLIP ?
"clip" : (chomp==CHOMP_STRIP ?
"strip" :
"keep"), indentation);
2346 _line_progressed(s.len);
2351 substr raw_block(_buf().data() + m_evt_handler->m_curr->pos.offset,
size_t(0));
2352 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.str, m_evt_handler->m_curr->pos);
2360 size_t num_lines = 0;
2361 size_t first = m_evt_handler->m_curr->pos.line;
2362 size_t provisional_indentation =
npos;
2364 while(( ! _finished_file()))
2367 lc.reset_with_next_line(_buf(), m_evt_handler->m_curr->pos.offset);
2368 #if defined(__GNUC__) && (__GNUC__ == 12 || __GNUC__ == 13)
2369 C4_DONT_OPTIMIZE(lc.rem);
2371 _c4dbgpf(
"blck: peeking at {}", _prs(lc.rem.trimr(
"\r\n"),
true));
2373 if(indentation !=
npos)
2375 _c4dbgpf(
"blck: indentation={}", indentation);
2377 if(lc.indentation < indentation && ( ! lc.rem.trim(
" \t").empty()))
2381 _c4dbgpf(
"blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2385 _c4err(
"indentation decreased without any scalar");
2389 else if(indentation == 0)
2391 _c4dbgpf(
"blck: noindent. lc.rem={}", _prs(lc.rem));
2392 if(_is_doc_token(lc.rem))
2394 _c4dbgp(
"blck: stop. indentation=0 and doc ended");
2401 const size_t fns = lc.rem.first_not_of(
' ');
2402 _c4dbgpf(
"blck: indentation ref not set. firstnonws={}", fns);
2405 _c4dbgpf(
"blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2406 if(C4_UNLIKELY(lc.full.begins_with(
'\t')))
2408 if(provisional_indentation ==
npos)
2410 if(lc.indentation < indref)
2412 _c4dbgpf(
"blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2413 if(raw_block.len == 0)
2415 _c4dbgp(
"blck: was empty, undo next line");
2420 else if(lc.indentation == m_evt_handler->m_curr->indref)
2424 _c4dbgpf(
"blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2428 _c4dbgpf(
"blck: set indentation ref from this line: ref={}", lc.indentation);
2429 indentation = lc.indentation;
2433 if(lc.indentation >= provisional_indentation)
2435 _c4dbgpf(
"blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2437 indentation = lc.indentation;
2441 if(lc.indentation >= indref)
2442 _c4err(
"parse error: first non-empty block line should have at least the original indentation");
2443 _c4dbgp(
"blck: finished");
2450 _c4dbgpf(
"blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.rem.len, lc.indentation, provisional_indentation);
2451 if(provisional_indentation !=
npos)
2453 if(lc.rem.len >= provisional_indentation)
2455 _c4dbgpf(
"blck: increase provisional_ref {} -> {}", provisional_indentation, lc.rem.len);
2456 provisional_indentation = lc.rem.len;
2461 provisional_indentation = lc.indentation ? lc.indentation : has_any(
RSEQ|
RVAL);
2462 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2463 if(provisional_indentation ==
npos)
2465 provisional_indentation = lc.rem.len ? lc.rem.len : has_any(
RSEQ|
RVAL);
2466 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2468 if(provisional_indentation < indref)
2470 provisional_indentation = indref;
2471 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2477 m_evt_handler->m_curr->line_contents = lc;
2478 _c4dbgpf(
"blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2479 raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2480 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2484 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0), m_evt_handler->m_curr->pos);
2485 C4_UNUSED(num_lines);
2488 if(indentation ==
npos)
2490 _c4dbgpf(
"blck: set indentation from provisional: {}", provisional_indentation);
2491 indentation = provisional_indentation;
2497 _c4prscalar(
"scanned block", raw_block,
true);
2499 sb->scalar = raw_block;
2500 sb->indentation = indentation;
2512#define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2514#define _c4dbgfws(...)
2517template<
class EventHandler>
2518template<
class FilterProcessor>
2521 _c4dbgfws(
"found whitespace '{}'", _c4prc(proc.curr()));
2522 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
' ' || proc.curr() ==
'\t', m_evt_handler->m_curr->pos);
2524 const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(
" \t", proc.rpos) : proc.src.first_not_of(
' ', proc.rpos);
2525 if(first_pos !=
npos)
2527 const char first_char = proc.src[first_pos];
2528 _c4dbgfws(
"firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2529 if(first_char ==
'\n' || first_char ==
'\r')
2531 _c4dbgfws(
"whitespace is trailing on line",
"");
2532 proc.skip(first_pos - proc.rpos);
2537 _c4dbgfws(
"legit whitespace. sofar={}", _prs(proc.sofar()));
2541 _c4dbgfws(
"whitespace is trailing on line",
"");
2545template<
class EventHandler>
2546template<
class FilterProcessor>
2549 if(!_filter_ws_handle_to_first_non_space(proc))
2551 _c4dbgfws(
"... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2552 proc.copy(proc.src.len - proc.rpos);
2556template<
class EventHandler>
2557template<
class FilterProcessor>
2560 if(!_filter_ws_handle_to_first_non_space(proc))
2562 _c4dbgfws(
"... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2563 proc.skip(proc.src.len - proc.rpos);
2577#define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2579#define _c4dbgfps(fmt, ...)
2582template<
class EventHandler>
2583template<
class FilterProcessor>
2586 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
2588 _c4dbgfps(
"found newline. sofar={}", _prs(proc.sofar()));
2589 size_t ii = proc.rpos;
2590 const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2593 proc.set(
'\n', numnl_following);
2594 _c4dbgfps(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2598 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2602 _c4dbgfps(
"single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2606 _c4dbgfps(
"last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2613template<
class EventHandler>
2614template<
class FilterProcessor>
2617 _RYML_ASSERT_PARSE_(this->callbacks(), indentation !=
npos, m_evt_handler->m_curr->pos);
2618 _c4dbgfps(
"before={}", _prs(proc.src));
2620 while(proc.has_more_chars())
2622 const char curr = proc.curr();
2623 _c4dbgfps(
"'{}', sofar={}", _c4prc(curr), _prs(proc.sofar()));
2628 _c4dbgfps(
"whitespace", curr);
2629 _filter_ws_skip_trailing(proc);
2632 _c4dbgfps(
"newline", curr);
2633 _filter_nl_plain(proc, indentation);
2636 _c4dbgfps(
"carriage return, ignore", curr);
2645 _c4dbgfps(
"after={}", _prs(proc.sofar()));
2647 return proc.result();
2653template<
class EventHandler>
2657 return _filter_plain(proc, indentation);
2660template<
class EventHandler>
2664 return _filter_plain(proc, indentation);
2675#define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2677#define _c4dbgfsq(fmt, ...)
2680template<
class EventHandler>
2681template<
class FilterProcessor>
2684 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
2686 _c4dbgfsq(
"found newline. sofar={}", _prs(proc.sofar()));
2687 size_t ii = proc.rpos;
2688 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2691 proc.set(
'\n', numnl_following);
2692 _c4dbgfsq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2696 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2700 _c4dbgfsq(
"single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2705 _c4dbgfsq(
"single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2711template<
class EventHandler>
2712template<
class FilterProcessor>
2715 _c4dbgfsq(
"before={}", _prs(proc.src));
2719 while(proc.has_more_chars())
2721 const char curr = proc.curr();
2722 _c4dbgfsq(
"'{}', sofar={}", _c4prc(curr), _prs(proc.sofar()));
2727 _c4dbgfsq(
"whitespace", curr);
2728 _filter_ws_copy_trailing(proc);
2731 _c4dbgfsq(
"newline", curr);
2732 _filter_nl_squoted(proc);
2735 _c4dbgfsq(
"skip cr", curr);
2739 _c4dbgfsq(
"squote", curr);
2740 if(proc.next() ==
'\'')
2742 _c4dbgfsq(
"two consecutive squotes", curr);
2757 _c4dbgfsq(
": #filteredchars={} after={}", proc.src.len-proc.sofar().len, _prs(proc.sofar()));
2759 return proc.result();
2764template<
class EventHandler>
2768 return _filter_squoted(proc);
2771template<
class EventHandler>
2775 return _filter_squoted(proc);
2786#define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2788#define _c4dbgfdq(...)
2791template<
class EventHandler>
2792template<
class FilterProcessor>
2795 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
2797 _c4dbgfdq(
"found newline. sofar={}", _prs(proc.sofar()));
2798 size_t ii = proc.rpos;
2799 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2802 proc.set(
'\n', numnl_following);
2803 _c4dbgfdq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2807 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2811 _c4dbgfdq(
"single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2816 _c4dbgfdq(
"single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2818 if(ii < proc.src.len && proc.src.str[ii] ==
'\\')
2820 _c4dbgfdq(
"backslash at [{}]", ii);
2821 const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] :
'\0';
2822 if(next ==
' ' || next ==
'\t')
2824 _c4dbgfdq(
"extend skip to backslash",
"");
2832template<
class EventHandler>
2833template<
class FilterProcessor>
2836 const size_t szp1 = sz + 1u;
2837 if(C4_UNLIKELY(proc.rpos + szp1 >= proc.src.len))
2838 _c4err(
"codepoint requires {} hex digits. scalar pos={}", sz, proc.rpos);
2840 csubstr codepoint = proc.src.
sub(proc.rpos + 2u, sz);
2841 _c4dbgfdq(
"utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2842 uint32_t codepoint_val = {};
2843 if(C4_UNLIKELY(!
read_hex(codepoint, &codepoint_val)))
2844 _c4err(
"failed to parse codepoint. scalar pos={}", proc.rpos);
2845 const size_t numbytes =
decode_code_point((uint8_t*)readbuf,
sizeof(readbuf), codepoint_val);
2846 if(C4_UNLIKELY(numbytes == 0))
2847 _c4err(
"failed to decode code point={}", proc.rpos);
2848 _RYML_ASSERT_PARSE_(callbacks(), numbytes <= 4, m_evt_handler->m_curr->pos);
2849 proc.translate_esc_bulk(readbuf, numbytes, szp1);
2850 _c4dbgfdq(
"utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2853template<
class EventHandler>
2854template<
class FilterProcessor>
2857 char next = proc.next();
2858 _c4dbgfdq(
"backslash, next='{}'", _c4prc(next));
2861 if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] ==
'\n')
2865 _c4dbgfdq(
"[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2871 size_t ii = proc.rpos + 2;
2872 for( ; ii < proc.src.len; ++ii)
2875 if(proc.src.str[ii] ==
' ' || proc.src.str[ii] ==
'\t')
2880 proc.skip(ii - proc.rpos);
2882 else if(next ==
'"' || next ==
'/' || next ==
' ' || next ==
'\t')
2885 proc.translate_esc(next);
2886 _c4dbgfdq(
"here, used '{}'", _c4prc(next));
2888 else if(next ==
'\r')
2892 else if(next ==
'n')
2894 proc.translate_esc(
'\n');
2896 else if(next ==
'r')
2898 proc.translate_esc(
'\r');
2900 else if(next ==
't')
2902 proc.translate_esc(
'\t');
2904 else if(next ==
'\\')
2906 proc.translate_esc(
'\\');
2908 else if(next ==
'x')
2910 _filter_dquoted_backslash_decode(proc, 2u);
2912 else if(next ==
'u')
2914 _filter_dquoted_backslash_decode(proc, 4u);
2916 else if(next ==
'U')
2918 _filter_dquoted_backslash_decode(proc, 8u);
2921 else if(next ==
'0')
2923 proc.translate_esc(
'\0');
2925 else if(next ==
'b')
2927 proc.translate_esc(
'\b');
2929 else if(next ==
'f')
2931 proc.translate_esc(
'\f');
2933 else if(next ==
'a')
2935 proc.translate_esc(
'\a');
2937 else if(next ==
'v')
2939 proc.translate_esc(
'\v');
2941 else if(next ==
'e')
2943 proc.translate_esc(
'\x1b');
2945 else if(next ==
'_')
2948 const char payload[] = {
2949 _RYML_CHCONST(-0x3e, 0xc2),
2950 _RYML_CHCONST(-0x60, 0xa0),
2952 proc.translate_esc_bulk(payload, 2, 1);
2954 else if(next ==
'N')
2957 const char payload[] = {
2958 _RYML_CHCONST(-0x3e, 0xc2),
2959 _RYML_CHCONST(-0x7b, 0x85),
2961 proc.translate_esc_bulk(payload, 2, 1);
2963 else if(next ==
'L')
2966 const char payload[] = {
2967 _RYML_CHCONST(-0x1e, 0xe2),
2968 _RYML_CHCONST(-0x80, 0x80),
2969 _RYML_CHCONST(-0x58, 0xa8),
2971 proc.translate_esc_extending(payload, 3, 1);
2973 else if(next ==
'P')
2976 const char payload[] = {
2977 _RYML_CHCONST(-0x1e, 0xe2),
2978 _RYML_CHCONST(-0x80, 0x80),
2979 _RYML_CHCONST(-0x57, 0xa9),
2981 proc.translate_esc_extending(payload, 3, 1);
2983 else if(next ==
'\0')
2989 _c4err(
"unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2991 _c4dbgfdq(
"backslash...sofar={}", _prs(proc.sofar()));
2995template<
class EventHandler>
2996template<
class FilterProcessor>
2999 _c4dbgfdq(
"before={}", _prs(proc.src));
3002 while(proc.has_more_chars())
3004 const char curr = proc.curr();
3005 _c4dbgfdq(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3011 _c4dbgfdq(
"whitespace", curr);
3012 _filter_ws_copy_trailing(proc);
3017 _c4dbgfdq(
"newline", curr);
3018 _filter_nl_dquoted(proc);
3023 _c4dbgfdq(
"carriage return, ignore", curr);
3029 _filter_dquoted_backslash(proc);
3039 _c4dbgfdq(
"after={}", _prs(proc.sofar()));
3040 return proc.result();
3046template<
class EventHandler>
3050 return _filter_dquoted(proc);
3053template<
class EventHandler>
3057 return _filter_dquoted(proc);
3066C4_NO_INLINE
inline size_t _find_last_newline_and_larger_indentation(
csubstr s,
size_t indentation)
noexcept
3068 if(indentation + 1 > s.len)
3070 for(
size_t i = s.len-indentation-1; i !=
size_t(-1); --i)
3072 if(s.str[i] ==
'\n')
3076 first = (first !=
npos) ? first : rem.len;
3077 if(first > indentation)
3084template<
class EventHandler>
3085template<
class FilterProcessor>
3088 _RYML_ASSERT_PARSE_(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP, m_evt_handler->m_curr->pos);
3089 _RYML_ASSERT_PARSE_(this->callbacks(), proc.rem().first_not_of(
" \n\r") ==
npos, m_evt_handler->m_curr->pos);
3093 #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3095 #define _c4dbgchomp(...)
3100 size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
3103 _c4dbgchomp(
"found newline and larger indentation. last={}", last);
3104 last = proc.rpos + last + size_t(1) + indentation;
3105 _RYML_ASSERT_PARSE_(this->callbacks(), last <= proc.src.len, m_evt_handler->m_curr->pos);
3107 while((proc.rpos < last) && proc.has_more_chars())
3109 const char curr = proc.curr();
3110 _c4dbgchomp(
"curr='{}'", _c4prc(curr));
3115 _c4dbgchomp(
"newline! remlen={}", proc.rem().len);
3118 csubstr at_next_line = proc.rem();
3119 if(at_next_line.begins_with(
' '))
3121 _c4dbgchomp(
"next line begins with spaces. indentation={}", indentation);
3123 size_t first_non_space = at_next_line.first_not_of(
' ');
3124 _c4dbgchomp(
"first_non_space={}", first_non_space);
3125 if(first_non_space ==
npos)
3127 _c4dbgchomp(
"{} spaces, to the end", at_next_line.len);
3128 first_non_space = at_next_line.len;
3130 if(first_non_space <= indentation)
3132 _c4dbgchomp(
"skip spaces={}<=indentation={}", first_non_space, indentation);
3133 proc.skip(first_non_space);
3137 _c4dbgchomp(
"skip indentation={}<spaces={}", indentation, first_non_space);
3138 proc.skip(indentation);
3140 _c4dbgchomp(
"copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
3141 proc.copy(first_non_space - indentation);
3159 bool had_one =
false;
3160 while(proc.has_more_chars())
3162 const char curr = proc.curr();
3163 _c4dbgchomp(
"CLIP: '{}'", _c4prc(curr));
3168 _c4dbgchomp(
"copy newline!", curr);
3176 _c4dbgchomp(
"skip!", curr);
3183 _c4dbgchomp(
"chomp=CLIP: add missing newline @{}", proc.wpos);
3190 _c4dbgchomp(
"chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
3191 while(proc.has_more_chars())
3193 const char curr = proc.curr();
3194 _c4dbgchomp(
"KEEP: '{}'", _c4prc(curr));
3198 _c4dbgchomp(
"copy newline!", curr);
3203 _c4dbgchomp(
"skip!", curr);
3212 _c4dbgchomp(
"chomp=STRIP: strip {} characters", proc.rem().len);
3224#define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3226#define _c4dbgfb(...)
3229template<
class EventHandler>
3230template<
class FilterProcessor>
3236 size_t first = rem.first_not_of(
' ');
3239 _c4dbgfb(
"{} spaces follow before next nonws character", first);
3240 if(first < indentation)
3242 _c4dbgfb(
"skip {}<{} spaces from indentation", first, indentation);
3247 _c4dbgfb(
"skip {} spaces from indentation", indentation);
3248 proc.skip(indentation);
3251 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
3254 _c4dbgfb(
"all spaces to the end: {} spaces", first);
3258 if(first < indentation)
3260 _c4dbgfb(
"skip everything", first);
3261 proc.skip(proc.src.len - proc.rpos);
3265 _c4dbgfb(
"skip {} spaces from indentation", indentation);
3266 proc.skip(indentation);
3274template<
class EventHandler>
3275template<
class FilterProcessor>
3279 _c4dbgfb(
"ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
3282 _c4dbgfb(
"ws: all whitespace: len={}", proc.src.len);
3283 if(chomp == CHOMP_KEEP && proc.src.len)
3285 _c4dbgfb(
"ws: chomp=KEEP all {} newlines", proc.src.count(
'\n'));
3286 while(proc.has_more_chars())
3288 const char curr = proc.curr();
3300 return contents.len;
3303template<
class EventHandler>
3304template<
class FilterProcessor>
3307 _c4dbgfb(
"contents_len={}", contents_len);
3309 _RYML_ASSERT_PARSE_(this->callbacks(), contents_len > 0u, m_evt_handler->m_curr->pos);
3313 size_t firstnewl = proc.src.first_of(
'\n', contents_len);
3314 if(firstnewl !=
npos)
3316 contents_len = firstnewl;
3317 _c4dbgfb(
"contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3321 contents_len = proc.src.len;
3322 _c4dbgfb(
"contents_len={} <--- src.len={}", contents_len, proc.src.len);
3325 return contents_len;
3337#define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3339#define _c4dbgfbl(...)
3342template<
class EventHandler>
3343template<
class FilterProcessor>
3346 _c4dbgfbl(
"indentation={} before={}", indentation, _prs(proc.src));
3348 size_t contents_len = _handle_all_whitespace(proc, chomp);
3350 return proc.result();
3352 contents_len = _extend_to_chomp(proc, contents_len);
3354 _c4dbgfbl(
"to filter={}", _prs(proc.src.first(contents_len)));
3356 _filter_block_indentation(proc, indentation);
3359 while(proc.has_more_chars(contents_len))
3361 const char curr = proc.curr();
3362 _c4dbgfbl(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3367 _c4dbgfbl(
"found newline. skip indentation on the next line", curr);
3369 _filter_block_indentation(proc, indentation);
3381 _c4dbgfbl(
"before chomp: #tochomp={} sofar={}", proc.rem().len, _prs(proc.sofar()));
3383 _filter_chomp(proc, chomp, indentation);
3385 _c4dbgfbl(
"final={}", _prs(proc.sofar()));
3387 return proc.result();
3392template<
class EventHandler>
3396 return _filter_block_literal(proc, indentation, chomp);
3399template<
class EventHandler>
3403 return _filter_block_literal(proc, indentation, chomp);
3413#define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3415#define _c4dbgfbf(...)
3419template<
class EventHandler>
3420template<
class FilterProcessor>
3423 _filter_block_indentation(proc, indentation);
3424 while(proc.has_more_chars(len))
3426 const char curr = proc.curr();
3427 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3431 _c4dbgfbf(
"newline.", curr);
3433 _filter_block_indentation(proc, indentation);
3441 size_t first = proc.rem().first_not_of(
" \t");
3442 _c4dbgfbf(
"space. first={}", first);
3444 first = proc.rem().len;
3445 _c4dbgfbf(
"... indentation increased to {}", first);
3446 _filter_block_folded_indented_block(proc, indentation, len, first);
3450 _c4dbgfbf(
"newl leading: not space, not newline. stop.", 0);
3456template<
class EventHandler>
3457template<
class FilterProcessor>
3463 _c4dbgfbf(
"... this is the first newline. turn into space. wpos={}", proc.wpos);
3464 wpos_at_first_newl = proc.wpos;
3469 _c4dbgfbf(
"... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3470 _RYML_ASSERT_PARSE_(this->callbacks(), wpos_at_first_newl !=
npos, m_evt_handler->m_curr->pos);
3471 _RYML_ASSERT_PARSE_(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
' ', m_evt_handler->m_curr->pos);
3472 _RYML_ASSERT_PARSE_(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos, m_evt_handler->m_curr->pos);
3474 proc.set_at(wpos_at_first_newl,
'\n');
3475 _RYML_ASSERT_PARSE_(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
'\n', m_evt_handler->m_curr->pos);
3478 _c4dbgfbf(
"... subsequent newline (num_newl={}). copy", num_newl);
3482 return wpos_at_first_newl;
3485template<
class EventHandler>
3486template<
class FilterProcessor>
3489 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
3490 size_t num_newl = 0;
3491 size_t wpos_at_first_newl =
npos;
3492 while(proc.has_more_chars(len))
3494 const char curr = proc.curr();
3495 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3500 _c4dbgfbf(
"newline. sofar={}", num_newl);
3536 wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3537 _filter_block_indentation(proc, indentation);
3543 size_t first = proc.rem().first_not_of(
" \t");
3544 _c4dbgfbf(
"space. first={}", first);
3546 first = proc.rem().len;
3547 _c4dbgfbf(
"... indentation increased to {}", first);
3550 _c4dbgfbf(
"... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3551 proc.set_at(wpos_at_first_newl,
'\n');
3555 _c4dbgfbf(
"... add missing newline", wpos_at_first_newl);
3558 _filter_block_folded_indented_block(proc, indentation, len, first);
3560 wpos_at_first_newl =
npos;
3567 _c4dbgfbf(
"not space, not newline. stop.", 0);
3574template<
class EventHandler>
3575template<
class FilterProcessor>
3578 _RYML_ASSERT_PARSE_(this->callbacks(), (proc.rem().first_not_of(
" \t") == curr_indentation) || (proc.rem().first_not_of(
" \t") ==
npos), m_evt_handler->m_curr->pos);
3579 if(curr_indentation)
3580 proc.copy(curr_indentation);
3581 while(proc.has_more_chars(len))
3583 const char curr = proc.curr();
3584 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3590 _filter_block_indentation(proc, indentation);
3593 _c4dbgfbf(
"newline. firstns={}", first);
3596 const char c = rem[first];
3597 _c4dbgfbf(
"firstns={}='{}'", first, _c4prc(c));
3598 if(c !=
'\n' && c !=
'\r')
3600 _c4dbgfbf(
"done with indented block", first);
3604 else if(first !=
npos)
3607 _c4dbgfbf(
"copy all {} spaces", first);
3625template<
class EventHandler>
3626template<
class FilterProcessor>
3629 _c4dbgfbf(
"indentation={} before={}", indentation, _prs(proc.src));
3631 size_t contents_len = _handle_all_whitespace(proc, chomp);
3633 return proc.result();
3635 contents_len = _extend_to_chomp(proc, contents_len);
3637 _c4dbgfbf(
"to filter={}", _prs(proc.src.first(contents_len)));
3639 _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3642 while(proc.has_more_chars(contents_len))
3644 const char curr = proc.curr();
3645 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3650 _c4dbgfbf(
"found newline", curr);
3651 _filter_block_folded_newlines(proc, indentation, contents_len);
3663 _c4dbgfbf(
"before chomp: #tochomp={} sofar={}", proc.rem().len, _prs(proc.sofar()));
3665 _filter_chomp(proc, chomp, indentation);
3667 _c4dbgfbf(
"final={}", proc.sofar().len, _prs(proc.sofar()));
3669 return proc.result();
3674template<
class EventHandler>
3678 return _filter_block_folded(proc, indentation, chomp);
3681template<
class EventHandler>
3685 return _filter_block_folded(proc, indentation, chomp);
3693template<
class EventHandler>
3696 _c4dbgpf(
"filtering plain scalar: s={}", _prs(s));
3697 FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3698 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.valid(), m_evt_handler->m_curr->pos);
3699 _c4dbgpf(
"filtering plain scalar: success! s={}", _prs(r.get()));
3705template<
class EventHandler>
3708 _c4dbgpf(
"filtering squo scalar: s={}", _prs(s));
3709 FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3710 _RYML_ASSERT_PARSE_(this->callbacks(), r.valid(), m_evt_handler->m_curr->pos);
3711 _c4dbgpf(
"filtering squo scalar: success! s={}", _prs(r.get()));
3718template<
class EventHandler>
3721 _c4dbgpf(
"filtering dquo scalar: s={}", _prs(s));
3723 if(C4_LIKELY(r.valid()))
3725 _c4dbgpf(
"filtering dquo scalar: success! s={}", _prs(r.get()));
3730 const size_t len = r.required_len();
3731 _c4dbgpf(
"filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3732 substr dst = _alloc_arena(len, &s);
3733 _c4dbgpf(
"filtering dquo scalar: dst.len={}", dst.len);
3736 _RYML_ASSERT_PARSE_(this->callbacks(), dst.len == len, m_evt_handler->m_curr->pos);
3737 FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3738 _c4dbgpf(
"filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3739 _RYML_ASSERT_PARSE_(this->callbacks(), rsd.required_len() <= len, m_evt_handler->m_curr->pos);
3740 _RYML_CHECK_PARSE_(m_evt_handler->m_stack.m_callbacks, rsd.valid(), m_evt_handler->m_curr->pos);
3741 _c4dbgpf(
"filtering dquo scalar: success! s={}", _prs(rsd.get()));
3751template<
class EventHandler>
3754 if(s.is_sub(_buf()))
3756 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.str > _buf().str, m_evt_handler->m_curr->pos);
3757 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.str-1 >= _buf().str, m_evt_handler->m_curr->pos);
3759 memmove(s.str - 1, s.str, s.len);
3761 s.str[s.len] =
'\n';
3767 substr dst = _alloc_arena(s.len + 1, &s);
3769 memcpy(dst.str, s.str, s.len);
3775template<
class EventHandler>
3778 _c4dbgpf(
"filtering block literal scalar: s={}", _prs(s));
3779 FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3781 if(C4_LIKELY(r.valid()))
3787 _c4dbgpf(
"filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3788 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3791 result = _move_scalar_left_and_add_newline(s);
3793 _c4dbgpf(
"filtering block literal scalar: success! s={}", _prs(result));
3799template<
class EventHandler>
3802 _c4dbgpf(
"filtering block folded scalar: s={}", _prs(s));
3803 FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3805 if(C4_LIKELY(r.valid()))
3811 _c4dbgpf(
"filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3812 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3815 result = _move_scalar_left_and_add_newline(s);
3817 _c4dbgpf(
"filtering block folded scalar: success! s={}", _prs(result));
3824template<
class EventHandler>
3829 if(m_options.scalar_filtering())
3831 return _filter_scalar_plain(sc.scalar, indentation);
3835 _c4dbgp(
"plain scalar left unfiltered");
3836 m_evt_handler->mark_key_scalar_unfiltered();
3841 _c4dbgp(
"plain scalar doesn't need filtering");
3846template<
class EventHandler>
3851 if(m_options.scalar_filtering())
3853 return _filter_scalar_plain(sc.scalar, indentation);
3857 _c4dbgp(
"plain scalar left unfiltered");
3858 m_evt_handler->mark_val_scalar_unfiltered();
3863 _c4dbgp(
"plain scalar doesn't need filtering");
3871template<
class EventHandler>
3876 if(m_options.scalar_filtering())
3878 return _filter_scalar_squot(sc.scalar);
3882 _c4dbgp(
"squo key scalar left unfiltered");
3883 m_evt_handler->mark_key_scalar_unfiltered();
3888 _c4dbgp(
"squo key scalar doesn't need filtering");
3893template<
class EventHandler>
3898 if(m_options.scalar_filtering())
3900 return _filter_scalar_squot(sc.scalar);
3904 _c4dbgp(
"squo val scalar left unfiltered");
3905 m_evt_handler->mark_val_scalar_unfiltered();
3910 _c4dbgp(
"squo val scalar doesn't need filtering");
3918template<
class EventHandler>
3923 if(m_options.scalar_filtering())
3925 return _filter_scalar_dquot(sc.scalar);
3929 _c4dbgp(
"dquo scalar left unfiltered");
3930 m_evt_handler->mark_key_scalar_unfiltered();
3935 _c4dbgp(
"dquo scalar doesn't need filtering");
3940template<
class EventHandler>
3945 if(m_options.scalar_filtering())
3947 return _filter_scalar_dquot(sc.scalar);
3951 _c4dbgp(
"dquo scalar left unfiltered");
3952 m_evt_handler->mark_val_scalar_unfiltered();
3957 _c4dbgp(
"dquo scalar doesn't need filtering");
3965template<
class EventHandler>
3968 if(m_options.scalar_filtering())
3970 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3974 _c4dbgp(
"literal scalar left unfiltered");
3975 m_evt_handler->mark_key_scalar_unfiltered();
3980template<
class EventHandler>
3983 if(m_options.scalar_filtering())
3985 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3989 _c4dbgp(
"literal scalar left unfiltered");
3990 m_evt_handler->mark_val_scalar_unfiltered();
3998template<
class EventHandler>
4001 if(m_options.scalar_filtering())
4003 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
4007 _c4dbgp(
"folded scalar left unfiltered");
4008 m_evt_handler->mark_key_scalar_unfiltered();
4013template<
class EventHandler>
4016 if(m_options.scalar_filtering())
4018 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
4022 _c4dbgp(
"folded scalar left unfiltered");
4023 m_evt_handler->mark_val_scalar_unfiltered();
4035template<
class EventHandler>
4036void ParseEngine<EventHandler>::add_flags(
ParserFlag_t on)
4039 char buf1_[64], buf2_[64], buf3_[64];
4040 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4041 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4042 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
4043 _c4dbgpf(
"state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
4047template<
class EventHandler>
4051 char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
4052 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4053 csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
4054 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
4055 csubstr buf4 = detail::_parser_flags_to_str(buf4_, (~off)&((s->flags|on)));
4056 _c4dbgpf(
"state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
4062template<
class EventHandler>
4063void ParseEngine<EventHandler>::rem_flags(
ParserFlag_t off)
4066 char buf1_[64], buf2_[64], buf3_[64];
4067 csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
4068 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4069 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
4070 _c4dbgpf(
"state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
4077 bool gotone =
false;
4079 #define _prflag(fl) \
4080 if((flags & fl) == (fl)) \
4084 if(pos + 1 < buf.len) \
4088 csubstr fltxt = #fl; \
4089 if(pos + fltxt.len <= buf.len) \
4090 memcpy(buf.str + pos, fltxt.str, fltxt.len); \
4120 _RYML_CHECK_BASIC(pos <= buf.len);
4122 return buf.first(pos);
4132template<
class EventHandler>
4135 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, loc.offset < _buf().len);
4136 return _buf().sub(loc.offset);
4139template<
class EventHandler>
4142 if(C4_UNLIKELY(val ==
nullptr))
4143 return {m_evt_handler->m_curr->pos.name, 0, 0, 0};
4144 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4147 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4148 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
4149 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets !=
nullptr);
4150 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
4153 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, val !=
nullptr || src.str ==
nullptr);
4154 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str ==
nullptr && val ==
nullptr));
4156 using lineptr_type =
size_t const* C4_RESTRICT;
4157 lineptr_type lineptr =
nullptr;
4158 size_t offset = (size_t)(val - src.begin());
4162 for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
4177 size_t count = m_newline_offsets_size;
4178 lineptr = m_newline_offsets;
4181 size_t step = count >> 1;
4182 lineptr_type it = lineptr + step;
4194 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
4195 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
4196 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
4198 loc.
name = m_evt_handler->m_curr->pos.name;
4199 loc.offset = offset;
4200 loc.line = (size_t)(lineptr - m_newline_offsets);
4201 if(lineptr > m_newline_offsets)
4202 loc.col = (offset - *(lineptr-1) - 1u);
4208template<
class EventHandler>
4209void ParseEngine<EventHandler>::_prepare_locations()
4212 size_t numnewlines = 1u + src.count(
'\n');
4213 _resize_locations(numnewlines);
4214 m_newline_offsets_size = 0;
4215 for(
size_t i = 0; i < src.len; i++)
4216 if(src.str[i] ==
'\n')
4217 m_newline_offsets[m_newline_offsets_size++] = i;
4218 m_newline_offsets[m_newline_offsets_size++] = src.len;
4219 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4222template<
class EventHandler>
4223void ParseEngine<EventHandler>::_resize_locations(
size_t numnewlines)
4225 numnewlines = numnewlines >= 16 ? numnewlines : 16;
4226 if(numnewlines > m_newline_offsets_capacity)
4228 if(m_newline_offsets)
4229 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
4230 m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks,
size_t, numnewlines, m_newline_offsets);
4231 m_newline_offsets_capacity = numnewlines;
4235template<
class EventHandler>
4236bool ParseEngine<EventHandler>::_locations_dirty()
const
4238 return !m_newline_offsets_size;
4246template<
class EventHandler>
4247void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4250 if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4252 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
4254 _c4dbgpf(
"starts with whitespace: '{}'", _c4prc(m_evt_handler->m_curr->line_contents.rem.str[0]));
4258 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
4260 _c4dbgpf(
"it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4261 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4267template<
class EventHandler>
4268void ParseEngine<EventHandler>::_handle_flow_line_beginning()
4270 _c4dbgpf(
"flow: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
4271 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
4272 if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
4274 csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.
sub(m_evt_handler->m_curr->line_contents.indentation);
4275 _c4dbgpf(
"flow: after indentation={}", _prs(trimmed));
4276 if(trimmed.len && trimmed.triml(
" \t").len)
4278 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
4279 _c4err(
"bad indentation");
4284template<
class EventHandler>
4285size_t ParseEngine<EventHandler>::_handle_block_skip_leading_whitespace()
4287 const size_t mark = m_evt_handler->m_curr->pos.offset;
4288 const size_t firstpos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
4289 _c4dbgpf(
"block: mark={} firstpos={}", mark, firstpos);
4290 if(firstpos !=
npos)
4292 _c4dbgp(
"block: non empty line");
4293 _line_progressed(firstpos);
4298 _c4dbgp(
"block: rest of line is whitespace");
4299 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4304template<
class EventHandler>
4305void ParseEngine<EventHandler>::_handle_block_check_leading_tabs(
size_t start_mark,
size_t end_mark)
4307 _c4dbgpf(
"block: start_mark={} end_mark={}", start_mark, end_mark);
4308 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, end_mark >= start_mark, m_evt_handler->m_curr->pos);
4309 if(end_mark != start_mark)
4312 _c4dbgpf(
"block: leading[{}-{}]={}", start_mark, end_mark, _prs(leading,
true));
4313 size_t pos = leading.find(
'\t');
4316 size_t fno = leading.first_not_of(
" \t");
4317 if(fno ==
npos || pos < fno)
4318 _c4err(
"invalid tab character to the left");
4328template<
class EventHandler>
4329void ParseEngine<EventHandler>::_handle_colon()
4331 size_t curr = m_evt_handler->m_curr->pos.line;
4332 if(C4_UNLIKELY(m_prev_colon !=
npos && curr == m_prev_colon))
4334 _c4dbgpf(
"colon: prevline={} currline={}", m_prev_colon, curr);
4335 _c4err(
"two colons on same line");
4337 _c4dbgpf(
"colon: set prevline={}->{}", m_prev_colon, curr);
4338 m_prev_colon = curr;
4341template<
class EventHandler>
4342void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst,
csubstr str)
4344 _c4dbgpf(
"store annotation[{}]: {}", dst->num_entries, _prs(str));
4345 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos);
4346 dst->annotations[dst->num_entries].str = str;
4347 dst->annotations[dst->num_entries].indentation = {};
4348 dst->annotations[dst->num_entries].line = {};
4349 dst->annotations[dst->num_entries].orig = {};
4353template<
class EventHandler>
4354void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst,
csubstr str,
size_t indentation,
size_t line)
4356 _c4dbgpf(
"store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, _maybe_null_str(str), indentation, line);
4357 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos);
4358 if(C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line))
4362 dst->annotations[dst->num_entries].str = str;
4363 dst->annotations[dst->num_entries].indentation = indentation;
4364 dst->annotations[dst->num_entries].line = line;
4365 dst->annotations[dst->num_entries].orig = {};
4369template<
class EventHandler>
4370void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst,
csubstr str,
size_t indentation,
size_t line,
csubstr orig)
4372 _c4dbgpf(
"store annotation[{}]: '{}'->'{}' indentation={} line={}", dst->num_entries, orig, _maybe_null_str(str), indentation, line);
4373 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos);
4374 if(C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line))
4378 dst->annotations[dst->num_entries].str = str;
4379 dst->annotations[dst->num_entries].indentation = indentation;
4380 dst->annotations[dst->num_entries].line = line;
4381 dst->annotations[dst->num_entries].orig = orig;
4385template<
class EventHandler>
4386bool ParseEngine<EventHandler>::_annotations_require_key_container()
const
4388 return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4391template<
class EventHandler>
4392bool ParseEngine<EventHandler>::_handle_annotations_before_unexpected_flow_token_rkey()
4394 if(!(m_pending_tags.num_entries | m_pending_anchors.num_entries))
4396 _c4dbgpf(
"handle_annotations_before_unexpected_flow_comma_rkey, node={}", m_evt_handler->m_curr->node_id);
4397 if(m_pending_tags.num_entries)
4399 _c4dbgpf(
"handle_annotations_before_unexpected_flow_comma_rkey, #tags={}", m_pending_tags.num_entries);
4400 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4402 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4403 _clear_annotations(&m_pending_tags);
4410 if(m_pending_anchors.num_entries)
4412 _c4dbgpf(
"handle_annotations_before_unexpected_flow_comma, #anchors={}", m_pending_tags.num_entries);
4413 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4415 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4416 _clear_annotations(&m_pending_anchors);
4420 _c4err(
"too many anchors");
4423 m_evt_handler->set_key_scalar_plain_empty();
4424 m_evt_handler->set_val_scalar_plain_empty();
4428template<
class EventHandler>
4429void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4431 _c4dbgpf(
"annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4432 if(m_pending_tags.num_entries)
4434 _c4dbgpf(
"annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4435 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4437 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4438 _clear_annotations(&m_pending_tags);
4445 if(m_pending_anchors.num_entries)
4447 _c4dbgpf(
"annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4448 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4450 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4451 _clear_annotations(&m_pending_anchors);
4455 _c4err(
"too many anchors");
4460template<
class EventHandler>
4461void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4463 _c4dbgpf(
"annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4464 if(m_pending_tags.num_entries)
4466 _c4dbgpf(
"annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4467 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4469 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4470 _clear_annotations(&m_pending_tags);
4477 if(m_pending_anchors.num_entries)
4479 _c4dbgpf(
"annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4480 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4482 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4483 _clear_annotations(&m_pending_anchors);
4487 _c4err(
"too many anchors");
4492template<
class EventHandler>
4493void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(
size_t current_line)
4495 _c4dbgpf(
"annotations_before_start_mapblck, current_line={}", current_line);
4496 if(m_pending_tags.num_entries == 2)
4498 _c4dbgp(
"2 tags, setting entry 0");
4499 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4501 else if(m_pending_tags.num_entries == 1)
4503 _c4dbgpf(
"1 tag. line={}, curr={}", m_pending_tags.annotations[0].line, current_line);
4504 if(m_pending_tags.annotations[0].line < current_line)
4506 _c4dbgp(
"...tag is for the map. setting it.");
4507 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4508 _clear_annotations(&m_pending_tags);
4512 if(m_pending_anchors.num_entries == 2)
4514 _c4dbgp(
"2 anchors, setting entry 0");
4515 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4517 else if(m_pending_anchors.num_entries == 1)
4519 _c4dbgpf(
"1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line, current_line);
4520 if(m_pending_anchors.annotations[0].line < current_line)
4522 _c4dbgp(
"...anchor is for the map. setting it.");
4523 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4524 _clear_annotations(&m_pending_anchors);
4529template<
class EventHandler>
4530void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4532 _c4dbgp(
"annotations_before_start_mapblck_as_key");
4533 switch(m_pending_tags.num_entries)
4536 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 1 tag={} line={} currline=", _prs(m_pending_tags.annotations[0].str), m_pending_tags.annotations[0].line, m_evt_handler->m_curr->pos.line);
4537 if(m_pending_tags.annotations[0].line != m_evt_handler->m_curr->pos.line)
4539 _c4dbgp(
"annotations_after_start_mapblck_as_key: is map tag");
4540 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4541 _clear_annotations(&m_pending_tags);
4545 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 2 tags: {} -> {}", _prs(m_pending_tags.annotations[0].str), _prs(m_pending_tags.annotations[1].str));
4546 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4549 switch(m_pending_anchors.num_entries)
4552 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 1 anchor={} line={} currline=", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[0].line, m_evt_handler->m_curr->pos.line);
4553 if(m_pending_anchors.annotations[0].line != m_evt_handler->m_curr->pos.line)
4555 _c4dbgp(
"annotations_after_start_mapblck_as_key: is map anchor");
4556 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4557 _clear_annotations(&m_pending_anchors);
4561 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4562 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4567template<
class EventHandler>
4568void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(
size_t key_indentation,
size_t key_line)
4570 _c4dbgp(
"annotations_after_start_mapblck");
4571 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2, m_evt_handler->m_curr->pos);
4572 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2, m_evt_handler->m_curr->pos);
4573 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4575 key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4576 switch(m_pending_tags.num_entries)
4579 _c4dbgpf(
"annotations_after_start_mapblck: 1 tag: {}", _prs(m_pending_tags.annotations[0].str));
4580 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4581 _clear_annotations(&m_pending_tags);
4584 _c4dbgpf(
"annotations_after_start_mapblck: 2 tags: {} -> {}", _prs(m_pending_tags.annotations[0].str), _prs(m_pending_tags.annotations[1].str));
4585 m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4586 _clear_annotations(&m_pending_tags);
4589 switch(m_pending_anchors.num_entries)
4592 _c4dbgpf(
"annotations_after_start_mapblck: 1 anchors: {} -> {}", m_pending_anchors.annotations[0].str);
4593 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4594 _clear_annotations(&m_pending_anchors);
4597 _c4dbgpf(
"annotations_after_start_mapblck: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4598 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4599 _clear_annotations(&m_pending_anchors);
4603 _set_indentation(key_indentation);
4606template<
class EventHandler>
4607size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(
size_t val_indentation,
size_t val_line)
4609 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries | m_pending_anchors.num_entries, m_evt_handler->m_curr->pos);
4611 auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4612 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4614 auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4615 if(ann.line > curr->line)
4617 else if(ann.indentation < curr->indentation)
4620 for(
size_t j = 0; j < m_pending_tags.num_entries; ++j)
4622 auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4623 if(ann.line > curr->line)
4625 else if(ann.indentation < curr->indentation)
4628 return curr->line < val_line ? val_indentation : curr->indentation;
4631template<
class EventHandler>
4632void ParseEngine<EventHandler>::_handle_keyref(
csubstr alias)
4634 if(C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries)))
4635 m_evt_handler->set_key_ref(alias);
4637 _c4err(
"aliases cannot have anchors or tags");
4640template<
class EventHandler>
4641void ParseEngine<EventHandler>::_handle_valref(
csubstr alias)
4643 if(C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries)))
4644 m_evt_handler->set_val_ref(alias);
4646 _c4err(
"aliases cannot have anchors or tags");
4649template<
class EventHandler>
4652 _c4dbgpf(
"resolving tag: {} curr_doc={}", _prs(tag), m_evt_handler->m_curr_doc);
4657 _c4dbgpf(
"resolving tag: found in cache[{}]: {}", ret.pos, _prs(ret.resolved));
4658 return ret.resolved;
4660 _c4dbgpf(
"resolving tag: not in cache: {} curr_doc={}", _prs(tag), m_evt_handler->m_curr_doc);
4662 substr buf = m_evt_handler->arena_rem();
4663 TagDirectives const& C4_RESTRICT tds = m_evt_handler->tag_directives();
4664 csubstr ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4665 m_evt_handler->m_curr->pos,
4666 m_evt_handler->m_stack.m_callbacks);
4667 _c4dbgpf(
"resolving tag: bufsz={} ttag.len={} !!ttag.str={}", bufsz, ttag.len, !!ttag.str);
4668 _c4assert((bufsz > buf.len) == (!ttag.str));
4669 _c4assert(!!bufsz == (ttag.len == bufsz));
4673 _c4dbgpf(
"tag requires arena, but it was small. arena.len={} arena.slack={} tag.required={}", m_evt_handler->arena_rem().len, m_evt_handler->arena().len, ttag.len);
4675 buf = _alloc_arena(bufsz, &tag);
4678 ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4679 m_evt_handler->m_curr->pos,
4680 m_evt_handler->m_stack.m_callbacks);
4683 _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4687 _c4dbgp(
"tag required arena. update size");
4690 (void)_alloc_arena(bufsz);
4692 C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4127)
4693 if C4_IF_CONSTEXPR (EventHandler::requires_strings_on_buffers)
4695 _c4dbgpf(
"handler requires tags in buffers. !!ttag.str={} in_arena={} in_src={}", !!ttag.str, ttag.is_sub(m_evt_handler->arena()), ttag.is_sub(_buf()));
4697 if(ttag.str && !ttag.is_sub(m_evt_handler->arena()) && !ttag.is_sub(_buf()))
4699 _c4dbgpf(
"copying resolved tag to arena: slack={} required={}", m_evt_handler->arena_rem().len, ttag.len);
4700 buf = _alloc_arena(ttag.len, &tag);
4702 memcpy(buf.str, ttag.str, ttag.len);
4704 _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4707 C4_SUPPRESS_WARNING_MSVC_POP
4708 _c4dbgpf(
"resolved tag: {} --> [{}]~~~{}~~~", _prs(tag), ttag.len, _maybe_null_str(ttag));
4711 m_evt_handler->tag_cache().add(tag, ttag, m_evt_handler->m_curr_doc, ret.pos);
4715template<
class EventHandler>
4716bool ParseEngine<EventHandler>::_validate_directive_yaml(
csubstr *C4_RESTRICT directive,
csubstr *C4_RESTRICT
version)
const
4718 _c4assert(directive->begins_with(
"%YAML"));
4719 size_t version_start = directive->first_not_of(
" \t", 5);
4720 if(version_start !=
npos)
4722 csubstr digits =
"0123456789";
4723 size_t major_end = directive->
first_not_of(digits, version_start);
4724 if(major_end !=
npos && directive->str[major_end] ==
'.')
4726 size_t minor_end = directive->first_not_of(digits, major_end + 1);
4727 if(minor_end ==
npos)
4728 minor_end = directive->len;
4729 _set_first_strict(*directive, minor_end);
4730 *
version = directive->range(version_start, minor_end);
4731 _c4dbgpf(
"%YAML: version={} full={}", *
version, _prs(*directive,
true));
4738template<
class EventHandler>
4739bool ParseEngine<EventHandler>::_validate_directive_tag(
csubstr *C4_RESTRICT directive,
csubstr *C4_RESTRICT handle,
csubstr *C4_RESTRICT prefix)
const
4741 _c4assert(directive->begins_with(
"%TAG"));
4743 size_t handle_start = directive->
first_not_of(whitespace, 4);
4744 if(handle_start !=
npos && directive->str[handle_start] ==
'!')
4746 size_t handle_end = directive->first_of(whitespace, handle_start);
4747 if(handle_end !=
npos)
4749 size_t prefix_start = directive->first_not_of(whitespace, handle_end);
4750 if(prefix_start !=
npos)
4752 size_t prefix_end = directive->first_of(whitespace, prefix_start);
4753 if(prefix_end ==
npos)
4754 prefix_end = directive->len;
4755 _set_first_strict(*directive, prefix_end);
4756 *handle = directive->range(handle_start, handle_end);
4757 *prefix = directive->range(prefix_start, prefix_end);
4758 _c4dbgpf(
"%TAG: handle={} prefix={} full={}", *handle, *prefix, _prs(*directive,
true));
4767template<
class EventHandler>
4768void ParseEngine<EventHandler>::_handle_directive(
csubstr directive)
4770 _c4dbgpf(
"handle_directive: rem={}", _prs(directive,
true));
4771 _c4assert(m_evt_handler->m_curr->line_contents.rem.begins_with(
'%'));
4772 _c4assert(directive.str == m_evt_handler->m_curr->line_contents.rem.str);
4773 const char *err =
nullptr;
4777 if(str.begins_with(dir))
4780 return (!rest.len || rest.str[0] ==
' ' || rest.str[0] ==
'\t');
4784 if(isdirective(directive,
"%TAG"))
4788 if(C4_UNLIKELY(!_validate_directive_tag(&directive, &handle, &prefix)))
4790 err =
"invalid %TAG directive";
4791 goto directive_error;
4793 m_evt_handler->add_directive_tag(handle, prefix);
4795 else if(isdirective(directive,
"%YAML"))
4798 if(C4_UNLIKELY(!_validate_directive_yaml(&directive, &
version)))
4800 err =
"invalid %YAML directive";
4801 goto directive_error;
4803 if(C4_UNLIKELY(m_has_directives_yaml))
4805 err =
"multiple %YAML directives";
4806 goto directive_error;
4808 m_has_directives_yaml =
true;
4809 m_evt_handler->add_directive_yaml(
version);
4811 m_has_directives =
true;
4812 rem = m_evt_handler->m_curr->line_contents.rem;
4814 pos = pos !=
npos ? pos : rem.len;
4815 _line_progressed(pos);
4817 _c4dbgpf(
"handle_directive: rest={}", _prs(rem));
4818 if(C4_UNLIKELY(rem.len && !rem.begins_with(
'#')))
4820 err =
"invalid tokens after directive";
4821 goto directive_error;
4824 if(C4_UNLIKELY(err !=
nullptr))
4828template<
class EventHandler>
4829bool ParseEngine<EventHandler>::_handle_bom()
4831 const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4836 #define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f')
4837 if(rem.begins_with(
csubstr{
"\x00\x00\xfe\xff", 4}) || (rem.begins_with(
csubstr{
"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
4839 _c4dbgp(
"byte order mark: UTF32BE");
4841 _line_progressed(4);
4845 else if(rem.begins_with(
csubstr{
"\xff\xfe\x00\x00", 4}) || (rest.begins_with(
csubstr{
"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
4847 _c4dbgp(
"byte order mark: UTF32LE");
4849 _line_progressed(4);
4853 else if(rem.begins_with(
"\xfe\xff") || (rem.begins_with(
'\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
4855 _c4dbgp(
"byte order mark: UTF16BE");
4857 _line_progressed(2);
4861 else if(rem.begins_with(
"\xff\xfe") || (rest.begins_with(
'\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
4863 _c4dbgp(
"byte order mark: UTF16LE");
4865 _line_progressed(2);
4869 else if(rem.begins_with(
"\xef\xbb\xbf"))
4871 _c4dbgp(
"byte order mark: UTF8");
4873 _line_progressed(3);
4882template<
class EventHandler>
4883void ParseEngine<EventHandler>::_handle_bom(
Encoding_e enc)
4885 if(m_encoding ==
NOBOM)
4887 if(enc ==
UTF8 || (m_evt_handler->m_curr->line_contents.rem.str == _buf().str))
4890 _c4err(
"non-UTF8 byte order mark can appear only at the beginning of the file");
4892 else if(enc != m_encoding)
4894 _c4err(
"byte order mark can only be set once");
4901template<
class EventHandler>
4902void ParseEngine<EventHandler>::_handle_seq_json()
4905 _c4dbgpf(
"handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4907 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
4908 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ), m_evt_handler->m_curr->pos);
4909 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
4910 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT), m_evt_handler->m_curr->pos);
4911 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT), m_evt_handler->m_curr->pos);
4913 _handle_flow_skip_whitespace();
4914 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4920 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
4921 const char first = rem.str[0];
4922 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
4927 _c4dbgp(
"seqjson[RVAL]: scanning double-quoted scalar");
4928 ScannedScalar sc = _scan_scalar_dquot();
4929 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4930 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4936 _c4dbgp(
"seqjson[RVAL]: start child seqjson");
4938 m_evt_handler->begin_seq_val_flow();
4940 _line_progressed(1);
4945 _c4dbgp(
"seqjson[RVAL]: start child mapjson");
4947 m_evt_handler->begin_map_val_flow();
4949 _line_progressed(1);
4950 goto seqjson_finish;
4954 _c4dbgp(
"seqjson[RVAL]: end!");
4957 _line_progressed(1);
4959 goto seqjson_finish;
4965 if(_scan_scalar_seq_json(&sc))
4967 _c4dbgp(
"seqjson[RVAL]: it's a plain scalar.");
4968 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4969 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4981 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
4982 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
4983 const char first = rem.str[0];
4984 _c4dbgpf(
"mapjson[RNXT]: '{}'", first);
4989 _c4dbgp(
"seqjson[RNXT]: expect next val");
4991 m_evt_handler->add_sibling();
4992 _line_progressed(1);
4997 _c4dbgp(
"seqjson[RNXT]: end!");
4999 _line_progressed(1);
5000 goto seqjson_finish;
5008 _c4dbgt(
"seqjson: go again", 0);
5009 if(_finished_line())
5011 if(C4_LIKELY(!_finished_file()))
5019 _c4err(
"missing terminating ]");
5025 _c4dbgp(
"seqjson: finish");
5031template<
class EventHandler>
5032void ParseEngine<EventHandler>::_handle_map_json()
5035 _c4dbgpf(
"handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5037 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
5038 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
5039 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5040 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT), m_evt_handler->m_curr->pos);
5041 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT)), m_evt_handler->m_curr->pos);
5043 _handle_flow_skip_whitespace();
5044 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5050 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5051 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5052 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5053 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5054 const char first = rem.str[0];
5055 _c4dbgpf(
"mapjson[RKEY]: '{}'", first);
5060 _c4dbgp(
"mapjson[RKEY]: scanning double-quoted scalar");
5061 ScannedScalar sc = _scan_scalar_dquot();
5062 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5063 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5069 _c4dbgp(
"mapjson[RKEY]: end!");
5071 _line_progressed(1);
5072 goto mapjson_finish;
5078 else if(has_any(
RVAL))
5080 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5081 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5082 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5083 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5084 const char first = rem.str[0];
5085 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
5090 _c4dbgp(
"mapjson[RVAL]: scanning double-quoted scalar");
5091 ScannedScalar sc = _scan_scalar_dquot();
5092 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5093 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5099 _c4dbgp(
"mapjson[RVAL]: start val seqjson");
5101 m_evt_handler->begin_seq_val_flow();
5102 _set_indentation(m_evt_handler->m_parent->indref);
5104 _line_progressed(1);
5105 goto mapjson_finish;
5109 _c4dbgp(
"mapjson[RVAL]: start val mapjson");
5111 m_evt_handler->begin_map_val_flow();
5112 _set_indentation(m_evt_handler->m_parent->indref);
5114 _line_progressed(1);
5121 if(_scan_scalar_map_json(&sc))
5123 _c4dbgp(
"mapjson[RVAL]: plain scalar.");
5124 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5125 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5136 else if(has_any(
RKCL))
5138 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5139 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5140 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5141 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5142 const char first = rem.str[0];
5143 _c4dbgpf(
"mapjson[RKCL]: '{}'", first);
5146 _c4dbgp(
"mapjson[RKCL]: found the colon");
5148 _line_progressed(1);
5155 else if(has_any(
RNXT))
5157 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5158 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5159 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5160 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5161 _c4dbgpf(
"mapjson[RNXT]: '{}'", rem.str[0]);
5162 if(rem.begins_with(
','))
5164 _c4dbgp(
"mapjson[RNXT]: expect next keyval");
5165 m_evt_handler->add_sibling();
5167 _line_progressed(1);
5169 else if(rem.begins_with(
'}'))
5171 _c4dbgp(
"mapjson[RNXT]: end!");
5173 _line_progressed(1);
5174 goto mapjson_finish;
5183 _c4dbgt(
"mapjson: go again", 0);
5184 if(_finished_line())
5186 if(C4_LIKELY(!_finished_file()))
5194 _c4err(
"missing terminating }");
5200 _c4dbgp(
"mapjson: finish");
5206template<
class EventHandler>
5207void ParseEngine<EventHandler>::_handle_seq_imap()
5210 _c4dbgpf(
"handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5212 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQIMAP), m_evt_handler->m_curr->pos);
5213 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5214 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT|
QMRK|
RKCL), m_evt_handler->m_curr->pos);
5215 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == has_all(
RVAL) + has_all(
RNXT) + has_all(
QMRK) + has_all(
RKCL), m_evt_handler->m_curr->pos);
5216 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3, m_evt_handler->m_curr->pos);
5218 _handle_flow_skip_whitespace();
5219 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5225 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL), m_evt_handler->m_curr->pos);
5226 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5227 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5228 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5229 const char first = rem.str[0];
5230 _c4dbgpf(
"seqimap[RVAL]: '{}'", _c4prc(first));
5234 _c4dbgp(
"seqimap[RVAL]: scanning single-quoted scalar");
5235 sc = _scan_scalar_squot();
5236 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5237 _handle_annotations_before_blck_val_scalar();
5238 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5240 goto seqimap_finish;
5242 else if(first ==
'"')
5244 _c4dbgp(
"seqimap[RVAL]: scanning double-quoted scalar");
5245 sc = _scan_scalar_dquot();
5246 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5247 _handle_annotations_before_blck_val_scalar();
5248 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5250 goto seqimap_finish;
5253 else if(_scan_scalar_plain_map_flow(&sc))
5255 _c4dbgp(
"seqimap[RVAL]: it's a scalar.");
5256 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5257 _handle_annotations_before_blck_val_scalar();
5258 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5260 goto seqimap_finish;
5262 else if(first ==
'[')
5264 _c4dbgp(
"seqimap[RVAL]: start child seqflow");
5266 _handle_annotations_before_blck_val_scalar();
5267 m_evt_handler->begin_seq_val_flow();
5269 _set_indentation(m_evt_handler->m_parent->indref);
5270 _line_progressed(1);
5271 goto seqimap_finish;
5273 else if(first ==
'{')
5275 _c4dbgp(
"seqimap[RVAL]: start child mapflow");
5277 _handle_annotations_before_blck_val_scalar();
5278 m_evt_handler->begin_map_val_flow();
5280 _set_indentation(m_evt_handler->m_parent->indref);
5281 _line_progressed(1);
5282 goto seqimap_finish;
5284 else if(first ==
',' || first ==
']')
5286 _c4dbgp(
"seqimap[RVAL]: finish without val.");
5287 _handle_annotations_before_blck_val_scalar();
5288 m_evt_handler->set_val_scalar_plain_empty();
5290 goto seqimap_finish;
5292 else if(first ==
'*')
5294 csubstr ref = _scan_ref_seq();
5295 _c4dbgpf(
"seqimap[RVAL]: ref! {}", _prs(ref));
5296 _handle_valref(ref);
5299 else if(first ==
'&')
5301 csubstr anchor = _scan_anchor();
5302 _c4dbgpf(
"seqimap[RVAL]: anchor! {}", _prs(anchor));
5303 _add_annotation(&m_pending_anchors, anchor);
5305 else if(first ==
'!')
5308 _c4dbgpf(
"seqimap[RVAL]: tag! {}", _prs(tag));
5309 _add_annotation(&m_pending_tags, tag);
5316 else if(has_any(
RNXT))
5318 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
5319 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5320 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5321 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5322 const char first = rem.str[0];
5323 _c4dbgpf(
"seqimap[RNXT]: '{}'", _c4prc(first));
5324 if(first ==
',' || first ==
']')
5328 _c4dbgp(
"seqimap: done");
5330 goto seqimap_finish;
5337 else if(has_any(
QMRK))
5339 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
QMRK), m_evt_handler->m_curr->pos);
5340 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5341 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5342 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5343 const char first = rem.str[0];
5344 _c4dbgpf(
"seqimap[QMRK]: '{}'", _c4prc(first));
5348 _c4dbgp(
"seqimap[QMRK]: scanning single-quoted scalar");
5349 sc = _scan_scalar_squot();
5350 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5351 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5355 else if(first ==
'"')
5357 _c4dbgp(
"seqimap[QMRK]: scanning double-quoted scalar");
5358 sc = _scan_scalar_dquot();
5359 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5360 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5365 else if(_scan_scalar_plain_map_flow(&sc))
5367 _c4dbgp(
"seqimap[QMRK]: it's a scalar.");
5368 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5369 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5373 else if(first ==
'[')
5375 _c4dbgp(
"seqimap[QMRK]: start child seqflow");
5377 m_evt_handler->begin_seq_key_flow();
5379 _set_indentation(m_evt_handler->m_parent->indref);
5380 _line_progressed(1);
5381 goto seqimap_finish;
5383 else if(first ==
'{')
5385 _c4dbgp(
"seqimap[QMRK]: start child mapflow");
5387 m_evt_handler->begin_map_key_flow();
5389 _set_indentation(m_evt_handler->m_parent->indref);
5390 _line_progressed(1);
5391 goto seqimap_finish;
5393 else if(first ==
',' || first ==
']')
5395 _c4dbgp(
"seqimap[QMRK]: finish without key.");
5396 m_evt_handler->set_key_scalar_plain_empty();
5397 m_evt_handler->set_val_scalar_plain_empty();
5399 goto seqimap_finish;
5401 else if(first ==
'&')
5403 csubstr anchor = _scan_anchor();
5404 _c4dbgp(
"seqimap[QMRK]: anchor!");
5405 m_evt_handler->set_key_anchor(anchor);
5407 else if(first ==
'*')
5409 csubstr ref = _scan_ref_seq();
5410 _c4dbgp(
"seqimap[QMRK]: ref!");
5411 _handle_keyref(ref);
5419 else if(has_any(
RKCL))
5421 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5422 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5423 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5424 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKCL), m_evt_handler->m_curr->pos);
5425 const char first = rem.str[0];
5426 _c4dbgpf(
"seqimap[RKCL]: '{}'", _c4prc(first));
5429 _c4dbgp(
"seqimap[RKCL]: found ':'");
5431 _line_progressed(1);
5434 else if(first ==
',' || first ==
']')
5436 _c4dbgp(
"seqimap[RKCL]: found ','. finish without val");
5437 m_evt_handler->set_val_scalar_plain_empty();
5439 goto seqimap_finish;
5448 _c4dbgt(
"seqimap: go again", 0);
5449 if(_finished_line())
5451 if(C4_LIKELY(!_finished_file()))
5465 _c4dbgp(
"seqimap: finish");
5471template<
class EventHandler>
5472void ParseEngine<EventHandler>::_handle_seq_flow()
5475 _c4dbgpf(
"handle_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5477 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5478 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ), m_evt_handler->m_curr->pos);
5479 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
5480 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT), m_evt_handler->m_curr->pos);
5481 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT), m_evt_handler->m_curr->pos);
5482 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
5484 if(m_evt_handler->m_curr->at_line_beginning())
5486 _handle_flow_line_beginning();
5489 _handle_flow_skip_whitespace();
5490 if(!m_evt_handler->m_curr->line_contents.rem.len)
5495 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5496 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5500 _c4dbgp(
"seqflow[RVAL]: scanning single-quoted scalar");
5501 sc = _scan_scalar_squot();
5502 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5503 _handle_annotations_before_blck_val_scalar();
5504 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5506 _mark_seqflow_val_end();
5508 else if(first ==
'"')
5510 _c4dbgp(
"seqflow[RVAL]: scanning double-quoted scalar");
5511 sc = _scan_scalar_dquot();
5512 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5513 _handle_annotations_before_blck_val_scalar();
5514 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5516 _mark_seqflow_val_end();
5519 else if(_scan_scalar_plain_seq_flow(&sc))
5521 _c4dbgp(
"seqflow[RVAL]: it's a scalar.");
5522 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5523 _handle_annotations_before_blck_val_scalar();
5524 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5526 _mark_seqflow_val_end();
5528 else if(first ==
'[')
5530 _c4dbgp(
"seqflow[RVAL]: start child seqflow");
5532 _handle_annotations_before_blck_val_scalar();
5533 m_evt_handler->begin_seq_val_flow();
5534 _set_indentation(m_evt_handler->m_parent->indref);
5536 _line_progressed(1);
5538 else if(first ==
'{')
5540 _c4dbgp(
"seqflow[RVAL]: start child mapflow");
5542 _handle_annotations_before_blck_val_scalar();
5543 m_evt_handler->begin_map_val_flow();
5544 _set_indentation(m_evt_handler->m_parent->indref);
5546 _line_progressed(1);
5547 goto seqflow_finish;
5549 else if(first ==
']')
5551 _c4dbgp(
"seqflow[RVAL]: end!");
5552 if(m_pending_anchors.num_entries | m_pending_tags.num_entries)
5554 _c4dbgp(
"seqflow[RVAL]: add pending annotations");
5555 _handle_annotations_before_blck_val_scalar();
5556 m_evt_handler->set_val_scalar_plain_empty();
5558 _line_progressed(1);
5560 goto seqflow_finish;
5562 else if(first ==
'*')
5564 csubstr ref = _scan_ref_seq();
5565 _c4dbgpf(
"seqflow[RVAL]: ref! {}", _prs(ref));
5566 _handle_valref(ref);
5569 else if(first ==
'&')
5571 csubstr anchor = _scan_anchor();
5572 _c4dbgpf(
"seqflow[RVAL]: anchor! {}", _prs(anchor));
5573 _add_annotation(&m_pending_anchors, anchor);
5575 else if(first ==
'!')
5578 _c4dbgpf(
"seqflow[RVAL]: tag! {}", _prs(tag));
5579 _add_annotation(&m_pending_tags, tag);
5581 else if(first ==
':')
5583 _c4dbgpf(
"seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5585 m_evt_handler->begin_map_val_flow();
5586 _set_indentation(m_evt_handler->m_parent->indref);
5587 _handle_annotations_before_blck_key_scalar();
5588 m_evt_handler->set_key_scalar_plain_empty();
5590 _line_progressed(1);
5591 goto seqflow_finish;
5593 else if(first ==
'?')
5595 _c4dbgp(
"seqflow[RVAL]: start child mapflow, explicit key");
5597 m_evt_handler->begin_map_val_flow();
5598 _set_indentation(m_evt_handler->m_parent->indref);
5600 _line_progressed(1);
5601 _maybe_skip_whitespace_tokens();
5602 goto seqflow_finish;
5604 else if(first ==
',')
5606 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
5608 _c4dbgp(
"seqflow[RVAL]: add pending annotations");
5609 _handle_annotations_before_blck_val_scalar();
5610 m_evt_handler->set_val_scalar_plain_empty();
5612 _mark_seqflow_val_end();
5626 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
5627 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5628 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5631 _c4dbgp(
"seqflow[RNXT]: expect next val");
5633 m_evt_handler->add_sibling();
5634 _line_progressed(1);
5635 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
5637 _c4err(
"parse error: invalid comment after comma");
5639 _mark_seqflow_val_end();
5641 else if(first ==
']')
5643 _c4dbgp(
"seqflow[RNXT]: end!");
5644 _line_progressed(1);
5646 goto seqflow_finish;
5648 else if(first ==
':')
5650 _c4dbgpf(
"seqflow[RNXT]: line@valend={} line@now={}", m_prev_val_end, m_evt_handler->m_curr->pos.line);
5651 if(m_prev_val_end !=
NONE && m_evt_handler->m_curr->pos.line == m_prev_val_end)
5653 _c4dbgpf(
"seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5654 m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5655 _set_indentation(m_evt_handler->m_parent->indref);
5656 _line_progressed(1);
5658 goto seqflow_finish;
5672 _c4dbgt(
"seqflow: go again", 0);
5673 if(_finished_line())
5675 if(C4_LIKELY(!_finished_file()))
5683 _c4err(
"missing terminating ]");
5689 _c4dbgp(
"seqflow: finish");
5695template<
class EventHandler>
5696void ParseEngine<EventHandler>::_handle_map_flow()
5699 _c4dbgpf(
"handle_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5701 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
5702 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
5703 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT|
QMRK), m_evt_handler->m_curr->pos);
5704 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)), m_evt_handler->m_curr->pos);
5706 if(m_evt_handler->m_curr->at_line_beginning())
5708 _handle_flow_line_beginning();
5711 _handle_flow_skip_whitespace();
5712 if(!m_evt_handler->m_curr->line_contents.rem.len)
5717 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5718 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5719 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5720 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5721 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5722 _c4dbgpf(
"mapflow[RKEY]: '{}'", first);
5726 _c4dbgp(
"mapflow[RKEY]: scanning single-quoted scalar");
5727 sc = _scan_scalar_squot();
5728 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5729 _handle_annotations_before_blck_key_scalar();
5730 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5733 else if(first ==
'"')
5735 _c4dbgp(
"mapflow[RKEY]: scanning double-quoted scalar");
5736 sc = _scan_scalar_dquot();
5737 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5738 _handle_annotations_before_blck_key_scalar();
5739 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5743 else if(_scan_scalar_plain_map_flow(&sc))
5745 _c4dbgp(
"mapflow[RKEY]: plain scalar");
5746 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5747 _handle_annotations_before_blck_key_scalar();
5748 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5751 else if(first ==
'?')
5753 _c4dbgp(
"mapflow[RKEY]: explicit key");
5754 _handle_annotations_before_blck_key_scalar();
5756 _line_progressed(1);
5757 _maybe_skip_whitespace_tokens();
5759 else if(first ==
':')
5761 _c4dbgp(
"mapflow[RKEY]: setting empty key");
5762 _handle_annotations_before_blck_key_scalar();
5763 m_evt_handler->set_key_scalar_plain_empty();
5765 _line_progressed(1);
5766 _maybe_skip_whitespace_tokens();
5768 else if(first ==
',')
5770 _c4dbgp(
"mapflow[RKEY]: comma!");
5771 if(!_handle_annotations_before_unexpected_flow_token_rkey())
5772 _c4err(
"unexpected comma");
5776 else if(first ==
'}')
5778 _c4dbgp(
"mapflow[RKEY]: end!");
5779 (void)_handle_annotations_before_unexpected_flow_token_rkey();
5780 _line_progressed(1);
5782 goto mapflow_finish;
5784 else if(first ==
'&')
5786 csubstr anchor = _scan_anchor();
5787 _c4dbgpf(
"mapflow[RKEY]: key anchor! {}", _prs(anchor));
5788 _add_annotation(&m_pending_anchors, anchor);
5790 else if(first ==
'!')
5793 _c4dbgpf(
"mapflow[RKEY]: tag! {}", _prs(tag));
5794 _add_annotation(&m_pending_tags, tag);
5796 else if(first ==
'*')
5798 csubstr ref = _scan_ref_map();
5799 _c4dbgpf(
"mapflow[RKEY]: key ref! {}", _prs(ref));
5800 _handle_keyref(ref);
5803 else if(first ==
'[')
5808 _c4dbgp(
"mapflow[RKEY]: start child seqflow (!)");
5809 _handle_annotations_before_blck_key_scalar();
5811 m_evt_handler->begin_seq_key_flow();
5813 _set_indentation(m_evt_handler->m_parent->indref);
5814 _line_progressed(1);
5815 goto mapflow_finish;
5817 else if(first ==
'{')
5822 _c4dbgp(
"mapflow[RKEY]: start child mapflow (!)");
5823 _handle_annotations_before_blck_key_scalar();
5825 m_evt_handler->begin_map_key_flow();
5827 _set_indentation(m_evt_handler->m_parent->indref);
5828 _line_progressed(1);
5836 else if(has_any(
RKCL))
5838 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5839 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5840 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5841 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5842 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5843 _c4dbgpf(
"mapflow[RKCL]: '{}'", first);
5846 _c4dbgp(
"mapflow[RKCL]: found the colon");
5848 _line_progressed(1);
5850 else if(first ==
'}')
5852 _c4dbgp(
"mapflow[RKCL]: end with missing val!");
5854 m_evt_handler->set_val_scalar_plain_empty();
5855 _line_progressed(1);
5857 goto mapflow_finish;
5859 else if(first ==
',')
5861 _c4dbgp(
"mapflow[RKCL]: got comma. val is missing");
5862 m_evt_handler->set_val_scalar_plain_empty();
5863 m_evt_handler->add_sibling();
5865 _line_progressed(1);
5866 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
5868 _c4err(
"parse error: invalid comment after comma");
5876 else if(has_any(
RVAL))
5878 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5879 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5880 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5881 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5882 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5883 _c4dbgpf(
"mapflow[RVAL]: '{}'", first);
5887 _c4dbgp(
"mapflow[RVAL]: scanning single-quoted scalar");
5888 sc = _scan_scalar_squot();
5889 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5890 _handle_annotations_before_blck_val_scalar();
5891 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5894 else if(first ==
'"')
5896 _c4dbgp(
"mapflow[RVAL]: scanning double-quoted scalar");
5897 sc = _scan_scalar_dquot();
5898 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5899 _handle_annotations_before_blck_val_scalar();
5900 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5904 else if(_scan_scalar_plain_map_flow(&sc))
5906 _c4dbgp(
"mapflow[RVAL]: plain scalar.");
5907 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5908 _handle_annotations_before_blck_val_scalar();
5909 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5912 else if(first ==
'[')
5914 _c4dbgp(
"mapflow[RVAL]: start val seqflow");
5916 _handle_annotations_before_blck_val_scalar();
5917 m_evt_handler->begin_seq_val_flow();
5918 _set_indentation(m_evt_handler->m_parent->indref);
5920 _line_progressed(1);
5921 goto mapflow_finish;
5923 else if(first ==
'{')
5925 _c4dbgp(
"mapflow[RVAL]: start val mapflow");
5927 _handle_annotations_before_blck_val_scalar();
5928 m_evt_handler->begin_map_val_flow();
5929 _set_indentation(m_evt_handler->m_parent->indref);
5931 _line_progressed(1);
5934 else if(first ==
'}')
5936 _c4dbgp(
"mapflow[RVAL]: end!");
5937 _handle_annotations_before_blck_val_scalar();
5938 m_evt_handler->set_val_scalar_plain_empty();
5939 _line_progressed(1);
5941 goto mapflow_finish;
5943 else if(first ==
',')
5945 _c4dbgp(
"mapflow[RVAL]: empty val!");
5946 _handle_annotations_before_blck_val_scalar();
5947 m_evt_handler->set_val_scalar_plain_empty();
5951 else if(first ==
'*')
5953 csubstr ref = _scan_ref_map();
5954 _c4dbgpf(
"mapflow[RVAL]: key ref! {}", _prs(ref));
5955 _handle_valref(ref);
5958 else if(first ==
'&')
5960 csubstr anchor = _scan_anchor();
5961 _c4dbgpf(
"mapflow[RVAL]: key anchor! {}", _prs(anchor));
5962 _add_annotation(&m_pending_anchors, anchor);
5964 else if(first ==
'!')
5967 _c4dbgpf(
"mapflow[RVAL]: tag! {}", _prs(tag));
5968 _add_annotation(&m_pending_tags, tag);
5975 else if(has_any(
RNXT))
5977 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5978 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5979 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5980 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5981 _c4dbgpf(
"mapflow[RNXT]: '{}'", m_evt_handler->m_curr->line_contents.rem.str[0]);
5982 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
','))
5984 _c4dbgp(
"mapflow[RNXT]: expect next keyval");
5985 m_evt_handler->add_sibling();
5987 _line_progressed(1);
5988 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
5990 _c4err(
"parse error: invalid comment after comma");
5993 else if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'}'))
5995 _c4dbgp(
"mapflow[RNXT]: end!");
5996 _line_progressed(1);
5998 goto mapflow_finish;
6005 else if(has_any(
QMRK))
6007 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
6008 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
6009 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
6010 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
6011 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6012 _c4dbgpf(
"mapflow[QMRK]: '{}'", first);
6016 _c4dbgp(
"mapflow[QMRK]: scanning single-quoted scalar");
6017 sc = _scan_scalar_squot();
6018 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6019 _handle_annotations_before_blck_key_scalar();
6020 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6023 else if(first ==
'"')
6025 _c4dbgp(
"mapflow[QMRK]: scanning double-quoted scalar");
6026 sc = _scan_scalar_dquot();
6027 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6028 _handle_annotations_before_blck_key_scalar();
6029 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6033 else if(_scan_scalar_plain_map_flow(&sc))
6035 _c4dbgp(
"mapflow[QMRK]: plain scalar");
6036 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6037 _handle_annotations_before_blck_key_scalar();
6038 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6041 else if(first ==
':')
6043 _c4dbgp(
"mapflow[QMRK]: setting empty key");
6044 _handle_annotations_before_blck_key_scalar();
6045 m_evt_handler->set_key_scalar_plain_empty();
6047 _line_progressed(1);
6048 _maybe_skip_whitespace_tokens();
6050 else if(first ==
'}')
6052 _c4dbgp(
"mapflow[QMRK]: end!");
6053 _handle_annotations_before_blck_key_scalar();
6054 m_evt_handler->set_key_scalar_plain_empty();
6055 m_evt_handler->set_val_scalar_plain_empty();
6057 _line_progressed(1);
6058 goto mapflow_finish;
6060 else if(first ==
',')
6062 _c4dbgp(
"mapflow[QMRK]: empty key+val!");
6063 _handle_annotations_before_blck_key_scalar();
6064 m_evt_handler->set_key_scalar_plain_empty();
6065 m_evt_handler->set_val_scalar_plain_empty();
6068 else if(first ==
'&')
6070 csubstr anchor = _scan_anchor();
6071 _c4dbgpf(
"mapflow[QMRK]: key anchor! {}", _prs(anchor));
6072 _add_annotation(&m_pending_anchors, anchor);
6074 else if(first ==
'*')
6076 csubstr ref = _scan_ref_map();
6077 _c4dbgpf(
"mapflow[QMRK]: key ref! {}", _prs(ref));
6078 _handle_keyref(ref);
6081 else if(first ==
'[')
6086 _c4dbgp(
"mapflow[QMRK]: start child seqflow (!)");
6088 _handle_annotations_before_blck_key_scalar();
6089 m_evt_handler->begin_seq_key_flow();
6091 _set_indentation(m_evt_handler->m_parent->indref);
6092 _line_progressed(1);
6093 goto mapflow_finish;
6095 else if(first ==
'{')
6100 _c4dbgp(
"mapflow[QMRK]: start child mapflow (!)");
6102 _handle_annotations_before_blck_key_scalar();
6103 m_evt_handler->begin_map_key_flow();
6104 _set_indentation(m_evt_handler->m_parent->indref);
6106 _line_progressed(1);
6109 else if(first ==
'!')
6112 _c4dbgpf(
"mapflow[QMRK]: tag! {}", _prs(tag));
6113 _add_annotation(&m_pending_tags, tag);
6122 _c4dbgt(
"mapflow: go again", 0);
6123 if(_finished_line())
6125 if(C4_LIKELY(!_finished_file()))
6133 _c4err(
"missing terminating }");
6139 _c4dbgp(
"mapflow: finish");
6145template<
class EventHandler>
6146void ParseEngine<EventHandler>::_handle_seq_block()
6149 _c4dbgpf(
"handle_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6151 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ), m_evt_handler->m_curr->pos);
6152 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RBLCK), m_evt_handler->m_curr->pos);
6153 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT), m_evt_handler->m_curr->pos);
6154 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RVAL) + has_any(
RNXT)), m_evt_handler->m_curr->pos);
6156 _maybe_skip_comment_strict();
6157 if(!m_evt_handler->m_curr->line_contents.rem.len)
6162 _c4dbgpf(
"seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
6163 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
6164 if(m_evt_handler->m_curr->at_line_beginning())
6166 _c4dbgpf(
"seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6167 if(m_evt_handler->m_curr->indentation_ge_extra())
6169 _c4dbgpf(
"seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
6170 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6171 if(!m_evt_handler->m_curr->line_contents.rem.len)
6174 else if(m_evt_handler->m_curr->indentation_lt_extra())
6176 _c4dbgp(
"seqblck[RVAL]: smaller indentation than RVAL!");
6177 if(m_evt_handler->m_curr->indentation_eq())
6179 _c4dbgp(
"seqblck[RVAL]: smaller indentation than RVAL!");
6180 _handle_annotations_before_blck_val_scalar();
6181 m_evt_handler->set_val_scalar_plain_empty();
6187 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6188 _c4dbgp(
"seqblck[RVAL]: smaller indentation!");
6189 _handle_indentation_pop_from_block_seq();
6190 goto seqblck_finish;
6193 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
6195 _c4dbgp(
"seqblck[RVAL]: empty line!");
6196 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6200 _RYML_ASSERT_PARSE_(callbacks(), m_evt_handler->m_curr->line_contents.rem.len, m_evt_handler->m_curr->pos);
6201 const size_t startmark = _handle_block_skip_leading_whitespace();
6202 _c4dbgpf(
"seqblck[RVAL]: startmark={}", startmark);
6203 if(startmark ==
npos)
6205 _c4dbgp(
"seqblck[RVAL]: whitespace only");
6208 const size_t tabmark = _handle_block_get_whitespace_mark();
6209 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6210 _c4dbgpf(
"seqblck[RVAL]: first='{}' currcol={}", first, m_evt_handler->m_curr->pos.col - 1);
6211 const size_t startline = m_evt_handler->m_curr->pos.line;
6212 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col() - m_bom_len;
6216 _c4dbgp(
"seqblck[RVAL]: single-quoted scalar");
6217 sc = _scan_scalar_squot();
6218 if(!_maybe_scan_following_colon())
6220 _c4dbgp(
"seqblck[RVAL]: set as val");
6221 _handle_annotations_before_blck_val_scalar();
6222 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6223 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6228 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
6229 _handle_block_check_leading_tabs(startmark);
6231 _handle_annotations_before_start_mapblck(startline);
6233 m_evt_handler->begin_map_val_block();
6234 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6235 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6236 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6238 _maybe_skip_whitespace_tokens();
6239 goto seqblck_finish;
6242 else if(first ==
'"')
6244 _c4dbgp(
"seqblck[RVAL]: double-quoted scalar");
6245 sc = _scan_scalar_dquot();
6246 if(!_maybe_scan_following_colon())
6248 _c4dbgp(
"seqblck[RVAL]: set as val");
6249 _handle_annotations_before_blck_val_scalar();
6250 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6251 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6256 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
6258 _handle_block_check_leading_tabs(startmark);
6259 _handle_annotations_before_start_mapblck(startline);
6261 m_evt_handler->begin_map_val_block();
6262 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6263 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6264 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6266 _maybe_skip_whitespace_tokens();
6267 goto seqblck_finish;
6273 else if(first ==
'|')
6275 _c4dbgp(
"seqblck[RVAL]: block-literal scalar");
6277 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6278 _handle_annotations_before_blck_val_scalar();
6279 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6280 m_evt_handler->set_val_scalar_literal(maybe_filtered);
6283 else if(first ==
'>')
6285 _c4dbgp(
"seqblck[RVAL]: block-folded scalar");
6287 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6288 _handle_annotations_before_blck_val_scalar();
6289 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6290 m_evt_handler->set_val_scalar_folded(maybe_filtered);
6293 else if(_scan_scalar_plain_seq_blck(&sc))
6295 _c4dbgp(
"seqblck[RVAL]: plain scalar.");
6296 if(!_maybe_scan_following_colon())
6298 _c4dbgp(
"seqblck[RVAL]: set as val");
6299 _handle_annotations_before_blck_val_scalar();
6300 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6301 m_evt_handler->set_val_scalar_plain(maybe_filtered);
6306 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
6307 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6308 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
6309 _handle_block_check_leading_tabs(startmark, tabmark);
6311 _handle_annotations_before_start_mapblck(startline);
6313 m_evt_handler->begin_map_val_block();
6314 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6315 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6316 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6318 _maybe_skip_whitespace_tokens();
6319 goto seqblck_finish;
6322 else if(first ==
'[')
6324 _c4dbgp(
"seqblck[RVAL]: start child seqflow");
6326 _handle_annotations_before_blck_val_scalar();
6327 m_evt_handler->begin_seq_val_flow();
6329 _line_progressed(1);
6330 _set_indentation(m_evt_handler->m_parent->indref + 1u);
6331 goto seqblck_finish;
6333 else if(first ==
'{')
6335 _c4dbgp(
"seqblck[RVAL]: start child mapflow");
6337 _handle_annotations_before_blck_val_scalar();
6338 m_evt_handler->begin_map_val_flow();
6340 _line_progressed(1);
6341 _set_indentation(m_evt_handler->m_parent->indref + 1u);
6342 goto seqblck_finish;
6344 else if(first ==
'-')
6346 _c4dbgp(
"seqblck[RVAL]: dash");
6347 _handle_block_check_leading_tabs(startmark);
6348 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
6349 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6350 _c4dbgp(
"seqblck[RVAL]: start child seqblck");
6351 _RYML_ASSERT_PARSE_(this->callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6353 _handle_annotations_before_blck_val_scalar();
6354 m_evt_handler->begin_seq_val_block();
6356 _set_indentation(startindent);
6358 _line_progressed(1);
6360 else if(first ==
':')
6362 _c4dbgp(
"seqblck[RVAL]: start child mapblck with empty key");
6364 _handle_annotations_before_start_mapblck(startline);
6366 m_evt_handler->begin_map_val_block();
6367 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6368 m_evt_handler->set_key_scalar_plain_empty();
6370 _line_progressed(1);
6371 _maybe_skip_whitespace_tokens();
6372 goto seqblck_finish;
6374 else if(first ==
'&')
6376 const csubstr anchor = _scan_anchor();
6377 _c4dbgpf(
"seqblck[RVAL]: anchor! {}", _prs(anchor));
6380 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6382 else if(first ==
'*')
6384 csubstr ref = _scan_ref_seq();
6385 _c4dbgpf(
"seqblck[RVAL]: ref! {}", _prs(ref));
6386 if(!_maybe_scan_following_colon())
6388 _c4dbgp(
"seqblck[RVAL]: set ref as val!");
6389 _handle_valref(ref);
6394 _c4dbgp(
"seqblck[RVAL]: ref is key of map");
6396 _handle_annotations_before_start_mapblck(startline);
6397 m_evt_handler->begin_map_val_block();
6398 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6399 _handle_keyref(ref);
6401 _set_indentation(startindent);
6402 _maybe_skip_whitespace_tokens();
6403 goto seqblck_finish;
6406 else if(first ==
'!')
6409 _c4dbgpf(
"seqblck[RVAL]: val tag! {}", _prs(tag));
6412 _add_annotation(&m_pending_tags, tag, startindent, startline);
6414 else if(first ==
'?')
6416 _c4dbgp(
"seqblck[RVAL]: start child mapblck, explicit key");
6418 m_evt_handler->begin_map_val_block();
6420 _set_indentation(startindent);
6421 _line_progressed(1);
6422 _maybe_skipchars(
' ');
6423 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6425 _c4dbgp(
"seqblck[RVAL]: seqblck starts after ?");
6427 m_evt_handler->begin_seq_key_block();
6429 _save_indentation();
6430 _line_progressed(1);
6431 _maybe_skipchars(
' ');
6433 goto seqblck_finish;
6442 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
6443 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
6447 _c4dbgpf(
"seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6448 if(C4_LIKELY(m_evt_handler->m_curr->at_line_beginning()))
6450 _c4dbgp(
"seqblck[RNXT]: at line begin");
6451 if(m_evt_handler->m_curr->indentation_ge())
6453 _c4dbgpf(
"seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6454 _line_progressed(m_evt_handler->m_curr->indref);
6455 if(!m_evt_handler->m_curr->line_contents.rem.len)
6458 else if(m_evt_handler->m_curr->indentation_lt())
6460 _c4dbgp(
"seqblck[RNXT]: smaller indentation!");
6461 _handle_indentation_pop_from_block_seq();
6464 _c4dbgp(
"seqblck[RNXT]: still seqblck!");
6465 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
6466 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6467 if(!m_evt_handler->m_curr->line_contents.rem.len)
6472 _c4dbgp(
"seqblck[RNXT]: no longer seqblck!");
6473 goto seqblck_finish;
6476 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
6478 _c4dbgpf(
"seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
6479 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6480 if(!m_evt_handler->m_curr->line_contents.rem.len)
6486 _c4dbgp(
"seqblck[RNXT]: NOT at line begin");
6487 if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(
" \t"))
6494 if(!m_evt_handler->m_curr->line_contents.rem.len)
6496 _c4dbgp(
"seqblck[RNXT]: again");
6504 _c4assert(m_evt_handler->m_curr->line_contents.rem.len > 0);
6505 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6506 _c4dbgpf(
"seqblck[RNXT]: '{}' node_id={}", _c4prc(first), m_evt_handler->m_curr->node_id);
6509 if(m_evt_handler->m_curr->indref > 0
6510 || m_evt_handler->m_curr->line_contents.indentation > 0
6511 || !_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6513 if(C4_LIKELY(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem)))
6515 _c4dbgp(
"seqblck[RNXT]: expect next val");
6517 m_evt_handler->add_sibling();
6518 _line_progressed(1);
6527 _c4dbgp(
"seqblck[RNXT]: start doc");
6528 _start_doc_suddenly();
6529 _line_progressed(3);
6530 _maybe_skip_whitespace_tokens();
6531 goto seqblck_finish;
6534 else if(first ==
':')
6540 if(C4_LIKELY(m_evt_handler->m_parent && (m_evt_handler->m_parent->flags &
RMAP)))
6542 _c4dbgp(
"seqblck[RNXT]: actually this seq was '?' key of parent map");
6543 m_evt_handler->end_seq_block();
6544 goto seqblck_finish;
6551 else if(first ==
'.')
6553 _c4dbgp(
"seqblck[RNXT]: maybe doc?");
6554 if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6556 _c4dbgp(
"seqblck[RNXT]: end doc");
6557 _end_doc_suddenly();
6558 _line_progressed(3);
6559 _maybe_skip_whitespace_tokens();
6560 _check_doc_end_tokens();
6561 goto seqblck_finish;
6572 _print_state_stack();
6574 if(m_evt_handler->m_parent
6575 && has_all(
RMAP|
RBLCK, m_evt_handler->m_parent)
6576 && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
6578 _c4dbgpf(
"seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
6579 _RYML_ASSERT_PARSE_(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent, m_evt_handler->m_curr->pos);
6580 _handle_indentation_pop(m_evt_handler->m_parent);
6581 _RYML_ASSERT_PARSE_(this->callbacks(), has_all(
RMAP|
RBLCK), m_evt_handler->m_curr->pos);
6582 m_evt_handler->add_sibling();
6584 goto seqblck_finish;
6586 else if(first ==
'\t')
6588 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
'\t');
6591 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6600 _c4dbgt(
"seqblck: go again", 0);
6601 if(_finished_line())
6606 if(_finished_file())
6608 _c4dbgp(
"seqblck: finish!");
6610 goto seqblck_finish;
6617 _c4dbgp(
"seqblck: finish");
6623template<
class EventHandler>
6624void ParseEngine<EventHandler>::_handle_map_block()
6627 _c4dbgpf(
"handle_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6631 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
6632 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RBLCK), m_evt_handler->m_curr->pos);
6633 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT|
QMRK), m_evt_handler->m_curr->pos);
6634 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)), m_evt_handler->m_curr->pos);
6636 _maybe_skip_comment();
6637 if(!m_evt_handler->m_curr->line_contents.rem.len)
6642 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
6643 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
6644 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
6645 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
6649 if(m_evt_handler->m_curr->at_line_beginning())
6651 if(m_evt_handler->m_curr->indentation_eq())
6653 _c4dbgpf(
"mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
6654 _line_progressed(m_evt_handler->m_curr->indref);
6655 if(!m_evt_handler->m_curr->line_contents.rem.len)
6658 else if(m_evt_handler->m_curr->indentation_lt())
6660 _c4dbgp(
"mapblck[RKEY]: smaller indentation!");
6661 _handle_indentation_pop_from_block_map();
6662 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6665 _c4dbgp(
"mapblck[RKEY]: still mapblck!");
6666 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY), m_evt_handler->m_curr->pos);
6667 if(!m_evt_handler->m_curr->line_contents.rem.len)
6672 _c4dbgp(
"mapblck[RKEY]: no longer mapblck!");
6673 goto mapblck_finish;
6678 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt(), m_evt_handler->m_curr->pos);
6679 _c4err(
"invalid indentation");
6685 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6686 const size_t startline = m_evt_handler->m_curr->pos.line;
6687 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6688 _c4dbgpf(
"mapblck[RKEY]: '{}'", _c4prc(first));
6692 _c4dbgp(
"mapblck[RKEY]: scanning single-quoted scalar");
6693 sc = _scan_scalar_squot();
6694 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6695 _handle_annotations_before_blck_key_scalar();
6696 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6698 if(!_maybe_scan_following_colon())
6699 _c4err(
"could not find ':' colon after key");
6701 _maybe_skip_whitespace_tokens();
6703 else if(first ==
'"')
6705 _c4dbgp(
"mapblck[RKEY]: scanning double-quoted scalar");
6706 sc = _scan_scalar_dquot();
6707 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6708 _handle_annotations_before_blck_key_scalar();
6709 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6711 if(!_maybe_scan_following_colon())
6712 _c4err(
"could not find ':' colon after key");
6714 _maybe_skip_whitespace_tokens();
6718 else if(C4_UNLIKELY(first ==
'|'))
6720 _c4err(
"block map: literal keys must be enclosed in '?'");
6722 else if(C4_UNLIKELY(first ==
'>'))
6724 _c4err(
"block map: folded keys must be enclosed in '?'");
6726 else if(_scan_scalar_plain_map_blck(&sc))
6728 _c4dbgp(
"mapblck[RKEY]: plain scalar");
6729 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6730 _handle_annotations_before_blck_key_scalar();
6731 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6733 if(!_maybe_scan_following_colon())
6734 _c4err(
"could not find ':' colon after key");
6736 _maybe_skip_whitespace_tokens();
6738 else if(first ==
'?')
6740 _c4dbgp(
"mapblck[RKEY]: key token!");
6742 _line_progressed(1);
6743 _maybe_skipchars(
' ');
6744 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6746 _c4dbgp(
"mapblck[RKEY]: seqblck starts after ?");
6748 m_evt_handler->begin_seq_key_block();
6750 _save_indentation();
6751 _line_progressed(1);
6752 _maybe_skipchars(
' ');
6753 goto mapblck_finish;
6757 else if(first ==
':')
6759 _c4dbgp(
"mapblck[RKEY]: setting empty key");
6760 _handle_annotations_before_blck_key_scalar();
6761 m_evt_handler->set_key_scalar_plain_empty();
6763 _line_progressed(1);
6765 _maybe_skip_whitespace_tokens();
6767 else if(first ==
'*')
6769 csubstr ref = _scan_ref_map();
6770 _c4dbgpf(
"mapblck[RKEY]: key ref! {}", _prs(ref));
6771 _handle_keyref(ref);
6773 if(!_maybe_scan_following_colon())
6774 _c4err(
"could not find ':' colon after key");
6776 _maybe_skip_whitespace_tokens();
6778 else if(first ==
'&')
6780 csubstr anchor = _scan_anchor();
6781 _c4dbgpf(
"mapblck[RKEY]: key anchor! {}", _prs(anchor));
6782 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6784 else if(first ==
'!')
6787 _c4dbgpf(
"mapblck[RKEY]: key tag! {}", _prs(tag));
6788 _add_annotation(&m_pending_tags, tag, startindent, startline);
6790 else if(first ==
'[')
6795 _c4dbgp(
"mapblck[RKEY]: start child seqflow (!)");
6796 _handle_annotations_before_blck_key_scalar();
6797 m_evt_handler->begin_seq_key_flow();
6799 _line_progressed(1);
6800 _set_indentation(startindent);
6801 goto mapblck_finish;
6803 else if(first ==
'{')
6808 _c4dbgp(
"mapblck[RKEY]: start child mapflow (!)");
6809 _handle_annotations_before_blck_key_scalar();
6810 m_evt_handler->begin_map_key_flow();
6812 _line_progressed(1);
6813 _set_indentation(startindent);
6814 goto mapblck_finish;
6816 else if(first ==
'-')
6818 _c4dbgp(
"mapblck[RKEY]: maybe doc?");
6819 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6821 _c4dbgp(
"mapblck[RKEY]: end+start doc");
6822 _start_doc_suddenly();
6823 _line_progressed(3);
6824 _maybe_skip_whitespace_tokens();
6825 goto mapblck_finish;
6832 else if(first ==
'.')
6834 _c4dbgp(
"mapblck[RKEY]: maybe end doc?");
6835 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6837 _c4dbgp(
"mapblck[RKEY]: end doc");
6838 _end_doc_suddenly();
6839 _line_progressed(3);
6840 _maybe_skip_whitespace_tokens();
6841 _check_doc_end_tokens();
6842 goto mapblck_finish;
6854 else if(has_any(
RVAL))
6856 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
6857 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
6858 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
6859 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
6863 if(m_evt_handler->m_curr->at_line_beginning())
6865 _c4dbgpf(
"mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6866 m_evt_handler->m_curr->more_indented =
false;
6867 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
6868 if(m_evt_handler->m_curr->indentation_eq_extra())
6870 _c4dbgp(
"mapblck[RVAL]: skip indentation!");
6871 _line_progressed(m_evt_handler->m_curr->indref + 1);
6872 if(!m_evt_handler->m_curr->line_contents.rem.len)
6875 else if(m_evt_handler->m_curr->indentation_gt_extra())
6877 _c4dbgp(
"mapblck[RVAL]: more indented!");
6878 m_evt_handler->m_curr->more_indented =
true;
6879 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6880 if(!m_evt_handler->m_curr->line_contents.rem.len)
6883 else if(m_evt_handler->m_curr->indentation_lt_extra())
6885 if(m_evt_handler->m_curr->indentation_eq())
6887 _c4dbgp(
"mapblck[RVAL]: smaller indentation than RVAL!");
6889 if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation)))
6891 _c4dbgp(
"mapblck[RVAL]: smaller indentation than RVAL!");
6892 _handle_annotations_before_blck_val_scalar();
6893 m_evt_handler->set_val_scalar_plain_empty();
6900 _c4dbgp(
"mapblck[RVAL]: smaller indentation than RKEY!");
6901 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6902 _handle_indentation_pop_from_block_map();
6905 _c4dbgp(
"mapblck[RVAL]: still mapblck!");
6906 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6909 _c4dbgp(
"mapblck[RVAL]: speculatively expect next keyval");
6910 m_evt_handler->add_sibling();
6917 _c4dbgp(
"mapblck[RVAL]: no longer mapblck!");
6918 goto mapblck_finish;
6923 const size_t startcol = _handle_block_skip_leading_whitespace();
6924 if(startcol ==
npos)
6926 _c4dbgp(
"mapblck[RVAL]: whitespace only");
6929 const size_t tabmark = _handle_block_get_whitespace_mark();
6933 _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
6934 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6935 const size_t startline = m_evt_handler->m_curr->pos.line;
6936 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6937 _c4dbgpf(
"mapblck[RVAL]: '{}'", _c4prc(first));
6941 _c4dbgp(
"mapblck[RVAL]: scanning single-quoted scalar");
6942 sc = _scan_scalar_squot();
6943 if(!_maybe_scan_following_colon())
6945 _c4dbgp(
"mapblck[RVAL]: set as val");
6946 _handle_annotations_before_blck_val_scalar();
6947 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6948 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6954 _c4assert(startindent > m_evt_handler->m_curr->indref);
6955 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6956 _handle_block_check_leading_tabs(startcol);
6957 _handle_annotations_before_start_mapblck(startline);
6960 m_evt_handler->begin_map_val_block();
6961 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6962 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6963 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6964 _maybe_skip_whitespace_tokens();
6969 else if(first ==
'"')
6971 _c4dbgp(
"mapblck[RVAL]: scanning double-quoted scalar");
6972 sc = _scan_scalar_dquot();
6973 if(!_maybe_scan_following_colon())
6975 _c4dbgp(
"mapblck[RVAL]: set as val");
6976 _handle_annotations_before_blck_val_scalar();
6977 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6978 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6984 _c4assert(startindent > m_evt_handler->m_curr->indref);
6985 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6986 _handle_block_check_leading_tabs(startcol);
6987 _handle_annotations_before_start_mapblck(startline);
6990 m_evt_handler->begin_map_val_block();
6991 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6992 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6993 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6994 _maybe_skip_whitespace_tokens();
7001 else if(first ==
'|')
7003 _c4dbgp(
"mapblck[RVAL]: scanning block-literal scalar");
7005 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7006 _handle_annotations_before_blck_val_scalar();
7007 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7008 m_evt_handler->set_val_scalar_literal(maybe_filtered);
7011 else if(first ==
'>')
7013 _c4dbgp(
"mapblck[RVAL]: scanning block-folded scalar");
7015 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7016 _handle_annotations_before_blck_val_scalar();
7017 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7018 m_evt_handler->set_val_scalar_folded(maybe_filtered);
7021 else if(_scan_scalar_plain_map_blck(&sc))
7023 _c4dbgp(
"mapblck[RVAL]: plain scalar.");
7024 if(!_maybe_scan_following_colon())
7026 _c4dbgp(
"mapblck[RVAL]: set as val");
7027 _handle_annotations_before_blck_val_scalar();
7028 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
7029 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7035 _c4assert(startindent > m_evt_handler->m_curr->indref);
7036 _c4dbgpf(
"mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
7037 _handle_block_check_leading_tabs(startcol, tabmark);
7039 _handle_annotations_before_start_mapblck(startline);
7041 m_evt_handler->begin_map_val_block();
7042 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7043 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
7044 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7045 _maybe_skip_whitespace_tokens();
7050 else if(first ==
'-' && _is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7052 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7054 _c4dbgp(
"mapblck[RVAL]: start val seqblck");
7055 _handle_block_check_leading_tabs(startcol);
7057 _handle_annotations_before_blck_val_scalar();
7058 m_evt_handler->begin_seq_val_block();
7060 _set_indentation(startindent);
7061 _line_progressed(1);
7062 _maybe_skip_whitespace_tokens();
7063 goto mapblck_finish;
7065 else if(first ==
'[')
7067 _c4dbgp(
"mapblck[RVAL]: start val seqflow");
7069 _handle_annotations_before_blck_val_scalar();
7070 m_evt_handler->begin_seq_val_flow();
7072 _set_indentation(m_evt_handler->m_parent->indref + 1u);
7073 _line_progressed(1);
7074 goto mapblck_finish;
7076 else if(first ==
'{')
7078 _c4dbgp(
"mapblck[RVAL]: start val mapflow");
7080 _handle_annotations_before_blck_val_scalar();
7081 m_evt_handler->begin_map_val_flow();
7083 m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
7084 _set_indentation(m_evt_handler->m_parent->indref + 1u);
7085 _line_progressed(1);
7086 goto mapblck_finish;
7088 else if(first ==
'*')
7090 csubstr ref = _scan_ref_map();
7091 _c4dbgpf(
"mapblck[RVAL]: ref! {}", _prs(ref));
7092 if(_maybe_scan_following_colon())
7094 _c4dbgp(
"mapblck[RVAL]: start child map, block");
7096 _handle_annotations_before_blck_val_scalar();
7097 m_evt_handler->begin_map_val_block();
7098 _handle_keyref(ref);
7099 _set_indentation(startindent);
7105 _c4dbgp(
"mapblck[RVAL]: was val ref");
7106 _handle_valref(ref);
7109 _maybe_skip_whitespace_tokens();
7111 else if(first ==
'&')
7113 csubstr anchor = _scan_anchor();
7114 _c4dbgpf(
"mapblck[RVAL]: anchor! {}", _prs(anchor));
7117 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7119 else if(first ==
'!')
7122 _c4dbgpf(
"mapblck[RVAL]: tag! {}", _prs(tag));
7125 _add_annotation(&m_pending_tags, tag, startindent, startline);
7127 else if(first ==
'?')
7129 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7131 _c4dbgp(
"mapblck[RVAL]: start val mapblck");
7133 _handle_annotations_before_blck_val_scalar();
7134 m_evt_handler->begin_map_val_block();
7136 _set_indentation(startindent);
7137 _line_progressed(1);
7138 _maybe_skipchars(
' ');
7139 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7141 _c4dbgp(
"mapblck[RVAL]: seqblck starts after ?");
7143 m_evt_handler->begin_seq_key_block();
7145 _save_indentation();
7146 _line_progressed(1);
7147 _maybe_skipchars(
' ');
7148 goto mapblck_finish;
7152 else if(first ==
':')
7154 _c4dbgp(
"mapblck[RVAL]: start val mapblck");
7156 _handle_annotations_before_start_mapblck(startline);
7158 m_evt_handler->begin_map_val_block();
7159 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7160 m_evt_handler->set_key_scalar_plain_empty();
7163 _line_progressed(1);
7164 _maybe_skip_whitespace_tokens();
7172 else if(has_any(
RNXT))
7174 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
7175 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
7176 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
7177 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
7181 if(m_evt_handler->m_curr->at_line_beginning())
7183 _c4dbgpf(
"mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
7184 if(m_evt_handler->m_curr->indentation_eq())
7186 _c4dbgpf(
"mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
7187 _line_progressed(m_evt_handler->m_curr->indref);
7188 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
7189 m_evt_handler->add_sibling();
7193 else if(m_evt_handler->m_curr->indentation_lt())
7195 _c4dbgp(
"mapblck[RNXT]: smaller indentation!");
7196 _handle_indentation_pop_from_block_map();
7199 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7202 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
7203 m_evt_handler->add_sibling();
7210 goto mapblck_finish;
7216 _c4dbgp(
"mapblck[RNXT]: NOT at line begin");
7217 if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(
" \t"))
7224 if(!m_evt_handler->m_curr->line_contents.rem.len)
7226 _c4dbgp(
"seqblck[RNXT]: again");
7234 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7235 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7236 _c4dbgpf(
"mapblck[RNXT]: '{}'", _c4prc(first));
7239 _c4dbgp(
"mapblck[RNXT]: skip spaces");
7240 _maybe_skip_whitespace_tokens();
7247 else if(has_any(
QMRK))
7249 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
7250 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
7251 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
7252 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
7253 if(_handle_map_block_qmrk())
7256 goto mapblck_finish;
7258 else if(has_any(
RKCL))
7260 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
7261 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
7262 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
7263 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
7264 if(_handle_map_block_rkcl())
7267 goto mapblck_finish;
7271 _c4dbgt(
"mapblck: again", 0);
7272 if(_finished_line())
7276 if(_finished_file())
7278 _c4dbgp(
"mapblck: file finished!");
7280 goto mapblck_finish;
7287 _c4dbgp(
"mapblck: finish");
7294template<
class EventHandler>
7295bool ParseEngine<EventHandler>::_handle_map_block_qmrk()
7300 if(m_evt_handler->m_curr->at_line_beginning())
7302 _c4dbgpf(
"mapblck[QMRK]: at line beginning. ind={} indref={}", m_evt_handler->m_curr->line_contents.indentation, m_evt_handler->m_curr->indref);
7303 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation !=
npos, m_evt_handler->m_curr->pos);
7304 if(m_evt_handler->m_curr->indentation_eq_extra())
7306 _c4dbgpf(
"mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref + 1);
7307 _line_progressed(m_evt_handler->m_curr->indref + 1);
7308 if(!m_evt_handler->m_curr->line_contents.rem.len)
7312 else if(m_evt_handler->m_curr->indentation_gt_extra())
7314 _c4dbgp(
"mapblck[QMRK]: larger indentation !");
7315 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7316 if(!m_evt_handler->m_curr->line_contents.rem.len)
7321 _c4dbgp(
"mapblck[QMRK]: smaller indentation!");
7322 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt_extra(), m_evt_handler->m_curr->pos);
7323 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7324 if(m_evt_handler->m_curr->indentation_eq()
7326 && m_evt_handler->m_curr->line_contents.rem.str[0] !=
'-')
7328 _c4dbgp(
"mapblck[QMRK]: QMRK finished!");
7329 _handle_annotations_before_blck_key_scalar();
7330 m_evt_handler->set_key_scalar_plain_empty();
7334 else if(m_evt_handler->m_curr->indentation_lt())
7336 _c4dbgp(
"mapblck[QMRK]: indentation pop!");
7337 _handle_indentation_pop_from_block_map();
7338 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7341 _c4dbgp(
"mapblck[QMRK]: still mapblck!");
7346 _c4dbgp(
"mapblck[QMRK]: no longer mapblck!");
7355 _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
7356 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7357 const size_t startline = m_evt_handler->m_curr->pos.line;
7358 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
7359 _c4dbgpf(
"mapblck[QMRK]: '{}'", first);
7363 _c4dbgp(
"mapblck[QMRK]: scanning single-quoted scalar");
7364 sc = _scan_scalar_squot();
7365 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7367 if(!_maybe_scan_following_colon())
7369 _c4dbgp(
"mapblck[QMRK]: set as key");
7370 _handle_annotations_before_blck_key_scalar();
7371 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7375 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7376 _handle_annotations_before_start_mapblck_as_key();
7377 m_evt_handler->begin_map_key_block();
7378 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7379 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7380 _maybe_skip_whitespace_tokens();
7381 _set_indentation(startindent);
7386 else if(first ==
'"')
7388 _c4dbgp(
"mapblck[QMRK]: scanning double-quoted scalar");
7389 sc = _scan_scalar_dquot();
7390 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7392 if(!_maybe_scan_following_colon())
7394 _c4dbgp(
"mapblck[QMRK]: set as key");
7395 _handle_annotations_before_blck_key_scalar();
7396 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7400 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7401 _handle_annotations_before_start_mapblck_as_key();
7402 m_evt_handler->begin_map_key_block();
7403 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7404 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7405 _maybe_skip_whitespace_tokens();
7406 _set_indentation(startindent);
7411 else if(first ==
'|')
7413 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
7415 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7416 csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb);
7417 _handle_annotations_before_blck_key_scalar();
7418 m_evt_handler->set_key_scalar_literal(maybe_filtered);
7421 else if(first ==
'>')
7423 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
7425 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7426 csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb);
7427 _handle_annotations_before_blck_key_scalar();
7428 m_evt_handler->set_key_scalar_folded(maybe_filtered);
7431 else if(_scan_scalar_plain_map_blck(&sc))
7433 _c4dbgp(
"mapblck[QMRK]: plain scalar");
7434 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
7436 if(!_maybe_scan_following_colon())
7438 _c4dbgp(
"mapblck[QMRK]: set as key");
7439 _handle_annotations_before_blck_key_scalar();
7440 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7444 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7445 _handle_annotations_before_start_mapblck_as_key();
7446 m_evt_handler->begin_map_key_block();
7447 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7448 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7449 _maybe_skip_whitespace_tokens();
7450 _set_indentation(startindent);
7455 else if(first ==
':')
7457 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), empty key");
7459 _handle_annotations_before_start_mapblck_as_key();
7460 m_evt_handler->begin_map_key_block();
7461 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7462 m_evt_handler->set_key_scalar_plain_empty();
7463 _line_progressed(1);
7464 _maybe_skip_whitespace_tokens();
7465 _set_indentation(startindent);
7469 else if(first ==
'*')
7471 csubstr ref = _scan_ref_map();
7472 _c4dbgpf(
"mapblck[QMRK]: key ref! {}", _prs(ref));
7474 if(!_maybe_scan_following_colon())
7476 _c4dbgp(
"mapblck[QMRK]: set ref as key");
7477 _handle_keyref(ref);
7481 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set ref as key");
7482 _handle_annotations_before_start_mapblck_as_key();
7483 m_evt_handler->begin_map_key_block();
7484 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7485 _handle_keyref(ref);
7486 _set_indentation(startindent);
7490 _maybe_skip_whitespace_tokens();
7492 else if(first ==
'&')
7494 csubstr anchor = _scan_anchor();
7495 _c4dbgpf(
"mapblck[QMRK]: key anchor! {}", _prs(anchor));
7496 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7498 else if(first ==
'!')
7501 _c4dbgpf(
"mapblck[QMRK]: key tag! {}", _prs(tag));
7502 _add_annotation(&m_pending_tags, tag, startindent, startline);
7504 else if(first ==
'-')
7506 _c4dbgp(
"mapblck[QMRK]: maybe seq or doc?");
7507 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7509 _c4dbgp(
"mapblck[QMRK]: start child seqblck (!)");
7511 _handle_annotations_before_blck_key_scalar();
7512 m_evt_handler->begin_seq_key_block();
7514 _set_indentation(startindent);
7515 _line_progressed(1);
7519 _c4dbgp(
"mapblck[QMRK]: end+start doc");
7520 _c4assert(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem));
7521 _start_doc_suddenly();
7522 _line_progressed(3);
7524 _maybe_skip_whitespace_tokens();
7527 else if(first ==
'[')
7529 _c4dbgp(
"mapblck[QMRK]: start child seqflow (!)");
7531 _handle_annotations_before_blck_key_scalar();
7532 m_evt_handler->begin_seq_key_flow();
7534 _set_indentation(m_evt_handler->m_parent->indref + 1);
7535 _line_progressed(1);
7538 else if(first ==
'{')
7540 _c4dbgp(
"mapblck[QMRK]: start child mapflow (!)");
7542 _handle_annotations_before_blck_key_scalar();
7543 m_evt_handler->begin_map_key_flow();
7545 _set_indentation(m_evt_handler->m_parent->indref + 1);
7546 _line_progressed(1);
7549 else if(first ==
'?')
7551 _c4dbgpf(
"mapblck[QMRK]: another QMRK '?'. ind={} indref={}", startindent, m_evt_handler->m_curr->indref);
7552 _RYML_ASSERT_PARSE_(callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
7553 _c4dbgp(
"mapblck[QMRK]: ? indent gt - start child mapblck (!)");
7555 _handle_annotations_before_blck_key_scalar();
7556 m_evt_handler->begin_map_key_block();
7558 _set_indentation(startindent);
7560 _line_progressed(1);
7561 _maybe_skipchars(
' ');
7562 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7564 _c4dbgp(
"mapblck[RVAL]: seqblck starts after ?");
7566 m_evt_handler->begin_seq_key_block();
7568 _save_indentation();
7569 _line_progressed(1);
7570 _maybe_skipchars(
' ');
7585template<
class EventHandler>
7586bool ParseEngine<EventHandler>::_handle_map_block_rkcl()
7591 if(m_evt_handler->m_curr->at_line_beginning())
7593 if(m_evt_handler->m_curr->indentation_eq())
7595 _c4dbgpf(
"mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
7596 _line_progressed(m_evt_handler->m_curr->indref);
7597 if(!m_evt_handler->m_curr->line_contents.rem.len)
7600 else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
7602 _c4err(
"invalid indentation");
7605 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7606 _c4dbgpf(
"mapblck[RKCL]: '{}'", first);
7609 _c4dbgp(
"mapblck[RKCL]: found the colon");
7610 _line_progressed(1);
7611 _maybe_skipchars(
' ');
7612 #if defined(__GNUC__) && ( \
7613 ((__GNUC__ >= 12) && ((C4_WORDSIZE == 4) || defined(C4_CPU_S390_X) || defined(C4_CPU_PPC64))) \
7615 (__GNUC__ == 16 && defined(C4_CPU_X86_64)))
7616 C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem);
7619 if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7626 _c4dbgp(
"mapblck[RKCL]: start val seqblck");
7628 m_evt_handler->begin_seq_val_block();
7630 _save_indentation();
7631 _line_progressed(1);
7632 _maybe_skipchars(
' ');
7636 else if(first ==
'?')
7638 _c4dbgp(
"mapblck[RKCL]: got '?'. val was empty");
7639 m_evt_handler->set_val_scalar_plain_empty();
7640 m_evt_handler->add_sibling();
7642 _line_progressed(1);
7643 _maybe_skipchars(
' ');
7644 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7646 _c4dbgp(
"mapblck[RKCL]: seqblck starts after ?");
7648 m_evt_handler->begin_seq_key_block();
7650 _save_indentation();
7651 _line_progressed(1);
7652 _maybe_skipchars(
' ');
7656 else if(first ==
'-')
7658 if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7660 _c4dbgp(
"mapblck[RKCL]: end+start doc");
7661 _RYML_CHECK_PARSE_(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
7662 _start_doc_suddenly();
7663 _line_progressed(3);
7664 _maybe_skip_whitespace_tokens();
7672 else if(first ==
'.')
7674 _c4dbgp(
"mapblck[RKCL]: maybe end doc?");
7675 csubstr rs = m_evt_handler->m_curr->line_contents.rem.
sub(1);
7676 if(rs ==
".." || rs.begins_with(
".. "))
7678 _c4dbgp(
"mapblck[RKCL]: end+start doc");
7679 _end_doc_suddenly();
7680 _line_progressed(3);
7681 _maybe_skip_whitespace_tokens();
7682 _check_doc_end_tokens();
7692 _c4dbgp(
"mapblck[RKCL]: missing :");
7693 if(C4_UNLIKELY(!m_evt_handler->m_curr->indentation_eq()))
7695 m_evt_handler->set_val_scalar_plain_empty();
7696 m_evt_handler->add_sibling();
7705template<
class EventHandler>
7706void ParseEngine<EventHandler>::_handle_unk_json()
7708 _c4dbgpf(
"handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7710 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP), m_evt_handler->m_curr->pos);
7711 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP), m_evt_handler->m_curr->pos);
7713 _maybe_skip_comment();
7714 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7721 pos = pos !=
npos ? pos : rem.len;
7722 _c4dbgpf(
"skipping indentation of {}", pos);
7723 _line_progressed(pos);
7724 rem = m_evt_handler->m_curr->line_contents.rem;
7727 _c4dbgpf(
"rem is now {}", _prs(rem));
7730 if(rem.begins_with(
'['))
7732 _c4dbgp(
"it's a seq");
7733 _check_trailing_doc_token();
7735 m_evt_handler->begin_seq_val_flow();
7737 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7738 m_doc_empty =
false;
7739 _line_progressed(1);
7741 else if(rem.begins_with(
'{'))
7743 _c4dbgp(
"it's a map");
7744 _check_trailing_doc_token();
7746 m_evt_handler->begin_map_val_flow();
7748 m_doc_empty =
false;
7749 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7750 _line_progressed(1);
7752 else if(_handle_bom())
7754 _c4dbgp(
"byte order mark");
7758 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
7759 _maybe_skip_whitespace_tokens();
7760 csubstr s = m_evt_handler->m_curr->line_contents.rem;
7763 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7764 const char first = s.
str[0];
7768 _c4dbgp(
"runk_json: scanning double-quoted scalar");
7769 _check_trailing_doc_token();
7772 m_doc_empty =
false;
7773 sc = _scan_scalar_dquot();
7774 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7775 if(!_maybe_scan_following_colon())
7777 _c4dbgp(
"runk_json: set as val");
7778 _handle_annotations_before_blck_val_scalar();
7779 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7786 else if(_scan_scalar_plain_unk(&sc))
7788 _c4dbgp(
"runk_json: got a plain scalar");
7789 _check_trailing_doc_token();
7792 m_doc_empty =
false;
7793 if(!_maybe_scan_following_colon())
7795 _c4dbgp(
"runk_json: set as val");
7796 _handle_annotations_before_blck_val_scalar();
7797 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7798 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7815template<
class EventHandler>
7816void ParseEngine<EventHandler>::_handle_unk()
7818 _c4dbgpf(
"handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7820 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP), m_evt_handler->m_curr->pos);
7821 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP), m_evt_handler->m_curr->pos);
7823 _maybe_skipchars(
' ');
7824 _maybe_skip_comment();
7826 if(!m_evt_handler->m_curr->line_contents.rem.len)
7829 _c4dbgpf(
"runk: rem is now {}", _prs(m_evt_handler->m_curr->line_contents.rem));
7831 if(m_evt_handler->m_curr->line_contents.indentation == 0u && (m_evt_handler->m_curr->at_line_beginning() || (m_bom_len && (m_evt_handler->m_curr->pos.line == m_bom_line))))
7833 _c4dbgpf(
"runk: rtop: zero indent + at line begin. offset={}", m_evt_handler->m_curr->pos.offset);
7834 _c4dbgp(
"runk: check BOM");
7837 m_bom_line = m_evt_handler->m_curr->pos.line;
7838 _c4dbgpf(
"runk: byte order mark! line={} offset={}", m_bom_line, m_evt_handler->m_curr->pos.offset);
7841 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7842 _c4dbgpf(
"runk: rtop: first={}", _c4prc(first));
7845 _c4dbgp(
"runk: rtop: suspecting doc");
7846 if(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7848 _c4dbgp(
"runk: rtop: begin doc");
7851 _set_indentation(0);
7853 _line_progressed(3u);
7854 _maybe_skip_whitespace_tokens();
7858 else if(first ==
'.')
7860 _c4dbgp(
"runk: rtop: suspecting doc end");
7861 if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
7863 _c4dbgp(
"runk: rtop: end doc");
7870 _c4dbgp(
"runk: rtop: ignore end doc");
7873 _line_progressed(3u);
7874 _maybe_skip_whitespace_tokens();
7875 _check_doc_end_tokens();
7879 else if(first ==
'%')
7881 _c4dbgpf(
"directive: {}", m_evt_handler->m_curr->line_contents.rem);
7882 if(C4_UNLIKELY(has_any(
RDOC) || (!m_doc_empty && has_none(
NDOC))))
7883 _c4err(
"need document footer before directives");
7884 _handle_directive(m_evt_handler->m_curr->line_contents.rem);
7891 size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7892 size_t remindent = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
7895 _c4dbgpf(
"runk: prev BOMlen={}", m_bom_len);
7896 if(m_evt_handler->m_curr->pos.line == m_bom_line)
7898 _c4dbgpf(
"runk: BOM remindent={} offset={}", remindent, m_evt_handler->m_curr->pos.offset);
7899 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, remindent >= m_bom_len, m_evt_handler->m_curr->pos);
7900 remindent -= m_bom_len;
7908 size_t startcol = _handle_block_skip_leading_whitespace();
7909 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7913 _c4dbgp(
"runk: flow seq?");
7914 _handle_unk_begin_doc();
7915 if(C4_LIKELY( ! _annotations_require_key_container()))
7917 _c4dbgp(
"runk: it's a seq, flow");
7918 _handle_annotations_before_blck_val_scalar();
7919 m_evt_handler->begin_seq_val_flow();
7921 _set_indentation(0);
7925 _c4dbgp(
"runk: start new block map, set flow seq as key (!)");
7926 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7927 m_evt_handler->begin_map_val_block();
7929 _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7930 m_evt_handler->begin_seq_key_flow();
7932 _set_indentation(0);
7934 _line_progressed(1);
7936 else if(first ==
'{')
7938 _c4dbgp(
"runk: flow map?");
7939 _handle_unk_begin_doc();
7940 if(C4_LIKELY( ! _annotations_require_key_container()))
7942 _c4dbgp(
"runk: it's a map, flow");
7943 _handle_annotations_before_blck_val_scalar();
7944 m_evt_handler->begin_map_val_flow();
7946 _set_indentation(0);
7950 _c4dbgp(
"runk: start new block map, set flow map as key (!)");
7951 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7952 m_evt_handler->begin_map_val_block();
7954 _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7955 m_evt_handler->begin_map_key_flow();
7957 _set_indentation(0);
7959 _line_progressed(1);
7961 else if(first ==
'-' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7963 _c4dbgp(
"runk: it's a seq, block");
7964 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7965 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col,
false);
7966 _handle_unk_begin_doc();
7967 _handle_annotations_before_blck_val_scalar();
7968 m_evt_handler->begin_seq_val_block();
7970 _set_indentation(startindent);
7971 _line_progressed(1);
7972 _maybe_skipchars(
' ');
7974 else if(first ==
'?' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7976 _c4dbgp(
"runk: it's a map + this key is complex");
7977 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7978 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col,
false);
7979 _handle_block_check_leading_tabs(startcol);
7980 _handle_unk_begin_doc();
7981 _handle_annotations_before_blck_val_scalar();
7982 m_evt_handler->begin_map_val_block();
7984 _set_indentation(startindent);
7985 _line_progressed(1);
7986 _maybe_skipchars(
' ');
7987 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7989 _c4dbgp(
"runk: seqblck key starts after ?");
7991 m_evt_handler->begin_seq_key_block();
7993 _save_indentation();
7994 _line_progressed(1);
7995 _maybe_skipchars(
' ');
7998 else if(first ==
':' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
8000 if(m_doc_empty || (m_pending_anchors.num_entries | m_pending_tags.num_entries))
8002 _c4dbgp(
"runk: it's a map with an empty key");
8003 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
8004 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col);
8005 _handle_block_check_leading_tabs(startcol);
8006 const size_t startline = m_evt_handler->m_curr->pos.line;
8007 _handle_unk_begin_doc();
8008 _handle_annotations_before_start_mapblck(startline);
8010 m_evt_handler->begin_map_val_block();
8011 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8012 m_evt_handler->set_key_scalar_plain_empty();
8013 _set_indentation(startindent);
8017 _c4err(
"block colon cannot occur on a new line unless ? is used");
8020 _line_progressed(1);
8021 _maybe_skip_whitespace_tokens();
8023 else if(first ==
'&')
8025 csubstr anchor = _scan_anchor();
8026 _c4dbgpf(
"anchor! {}", _prs(anchor));
8027 const size_t line = m_evt_handler->m_curr->pos.line;
8028 _handle_unk_begin_doc();
8029 _add_annotation(&m_pending_anchors, anchor, remindent, line);
8030 _set_indentation(0);
8032 else if(first ==
'*')
8034 csubstr ref = _scan_ref_map();
8035 _c4dbgpf(
"runk: ref! {}", _prs(ref));
8036 _handle_unk_begin_doc();
8037 if(!_maybe_scan_following_colon())
8039 _c4dbgp(
"runk: set val ref");
8040 _handle_valref(ref);
8044 _c4dbgp(
"runk: start new block map, set ref as key");
8045 _handle_block_check_leading_tabs(startcol);
8046 const size_t startline = m_evt_handler->m_curr->pos.line;
8047 _handle_annotations_before_start_mapblck(startline);
8048 m_evt_handler->begin_map_val_block();
8049 _handle_keyref(ref);
8050 _maybe_skip_whitespace_tokens();
8051 _set_indentation(0);
8055 else if(first ==
'!')
8058 csubstr tag = _scan_tag(&tag_orig);
8059 _c4dbgpf(
"runk: val tag! {}", _prs(tag));
8062 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
8063 const size_t line = m_evt_handler->m_curr->pos.line;
8064 _add_annotation(&m_pending_tags, tag, indentation, line, tag_orig);
8068 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
8069 const size_t startscalar = _handle_block_get_whitespace_mark();
8070 const size_t startline = m_evt_handler->m_curr->pos.line;
8071 auto beginmap = [&](
size_t startindent_){
8072 if(C4_UNLIKELY(m_evt_handler->m_curr->pos.line > startline))
8073 _c4err(
"multiline scalars cannot be used as implicit keys");
8074 _handle_block_check_leading_tabs(startcol, startscalar);
8075 _handle_annotations_before_start_mapblck(startline);
8077 m_evt_handler->begin_map_val_block();
8078 _handle_annotations_and_indentation_after_start_mapblck(startindent_, startline);
8080 auto after_beginmap = [&](
size_t startindent_){
8081 _maybe_skip_whitespace_tokens();
8082 _set_indentation(startindent_);
8087 _c4dbgp(
"runk: block-literal scalar");
8088 _handle_unk_begin_doc();
8090 _scan_block(&sb, startindent);
8091 _handle_annotations_before_blck_val_scalar();
8092 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8093 m_evt_handler->set_val_scalar_literal(maybe_filtered);
8095 else if(first ==
'>')
8097 _c4dbgp(
"runk: block-folded scalar");
8098 _handle_unk_begin_doc();
8100 _scan_block(&sb, startindent);
8101 _handle_annotations_before_blck_val_scalar();
8102 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8103 m_evt_handler->set_val_scalar_folded(maybe_filtered);
8105 else if(first ==
'\'')
8107 _c4dbgp(
"runk: single-quoted scalar");
8108 _handle_unk_begin_doc();
8109 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8110 size_t col = m_evt_handler->m_curr->pos.col;
8111 ScannedScalar sc = _scan_scalar_squot();
8112 if(!_maybe_scan_following_colon())
8114 _c4dbgp(
"runk: set as val");
8115 _handle_annotations_before_blck_val_scalar();
8116 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8117 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8121 _c4dbgp(
"runk: start new block map, set single-quoted scalar as key");
8123 startindent = _handle_unk_check_left_tokens(startindent, col);
8124 beginmap(startindent);
8125 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8126 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8127 after_beginmap(startindent);
8130 else if(first ==
'"')
8132 _c4dbgp(
"runk: double-quoted scalar");
8133 _handle_unk_begin_doc();
8134 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8135 size_t col = m_evt_handler->m_curr->pos.col;
8136 ScannedScalar sc = _scan_scalar_dquot();
8137 if(!_maybe_scan_following_colon())
8139 _c4dbgp(
"runk: set as val");
8140 _handle_annotations_before_blck_val_scalar();
8141 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8142 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8146 _c4dbgp(
"runk: start new block map, set double-quoted scalar as key");
8148 startindent = _handle_unk_check_left_tokens(startindent, col);
8149 beginmap(startindent);
8150 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8151 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8152 after_beginmap(startindent);
8157 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8158 size_t col = m_evt_handler->m_curr->pos.col;
8160 if(_scan_scalar_plain_unk(&sc))
8162 _c4dbgp(
"runk: plain scalar");
8163 _handle_unk_begin_doc();
8164 if(!_maybe_scan_following_colon())
8166 _c4dbgp(
"runk: set as val");
8167 _handle_annotations_before_blck_val_scalar();
8168 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8169 m_evt_handler->set_val_scalar_plain(maybe_filtered);
8173 _c4dbgp(
"runk: start new block map, set plain scalar as key");
8175 startindent = _handle_unk_check_left_tokens(startindent, col);
8176 beginmap(startindent);
8177 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8178 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8179 after_beginmap(startindent);
8190template<
class EventHandler>
8191void ParseEngine<EventHandler>::_handle_unk_begin_doc()
8193 _c4dbgp(
"runk: begin doc");
8194 _check_trailing_doc_token();
8197 m_doc_empty =
false;
8200template<
class EventHandler>
8201size_t ParseEngine<EventHandler>::_handle_unk_check_left_tokens(
size_t realindent,
size_t col,
bool skip_annotations)
8206 csubstr s = m_evt_handler->m_curr->line_contents.full.
range(m_bom_len, col);
8208 _c4dbgpf(
"runk: check left tokens: s={}", _prs(s,
true));
8209 if(skip_annotations)
8211 _handle_unk_get_first_non_pending_token_pos(s, &realindent, &pos);
8212 _c4dbgpf(
"runk: skip annotations: realindent={} pos={}", realindent, pos);
8214 size_t firstns = s.first_not_of(
' ', pos);
8217 _c4dbgpf(
"runk: check left tokens:\n"
8218 " tokens={} skipped={}\n"
8219 " bomlen={} first={} col={}\n"
8220 " (bomlen+first)={} vs {}=col\n"
8221 " startindent={} lineindent={}"
8222 , _prs(s,
true), _prs(s.sub(firstns),
true)
8223 , m_bom_len, firstns, col
8224 , m_bom_len+firstns, col,
8225 realindent, m_evt_handler->m_curr->line_contents.indentation);
8226 if(m_bom_len + firstns != col)
8228 if(!skip_annotations)
8229 realindent = firstns;
8230 _c4dbgpf(
"runk: pos={} firstns={} -> realindent={}", pos, firstns, realindent);
8236template<
class EventHandler>
8237void ParseEngine<EventHandler>::_handle_unk_get_first_non_pending_token_pos(
csubstr s,
size_t *indent,
size_t *first_non_token_pos)
8240 uint32_t total = _get_annotations_same_line(s, &first, &second);
8241 _c4dbgpf(
"runk: before skip: {}", _prs(s,
true));
8242 size_t pos = s.first_not_of(
" \t");
8247 *indent = *first_non_token_pos = pos;
8250 _c4assert(!s.sub(pos).begins_with_any(
" \t"));
8251 _c4dbgpf(
"runk: after skip leading {} whitespace: {}", pos, _prs(s.sub(pos),
true));
8252 _c4dbgpf(
"runk: first annotation: {}", first);
8256 _c4assert(s.sub(pos).begins_with(first));
8259 _c4dbgpf(
"runk: after skip first annotation: pos={} {}", pos, _prs(s.sub(pos),
true));
8262 _c4dbgpf(
"runk: second annotation: {}", second);
8270 _c4dbgpf(
"runk: next nonspace: {}", pos + more);
8272 _c4dbgpf(
"runk: after skip annotation whitespace: pos={} {}", pos, _prs(s.sub(pos),
true));
8273 _c4assert(s.sub(pos).begins_with(second));
8275 _c4dbgpf(
"runk: after skip annotation 2: pos={} {}", pos, _prs(s.sub(pos),
true));
8277 *first_non_token_pos = pos;
8281template<
class EventHandler>
8282uint32_t ParseEngine<EventHandler>::_get_annotations_same_line(
csubstr token_soup,
csubstr *first_,
csubstr *second_)
const
8284 _c4assert(!m_evt_handler->m_curr->at_first_token());
8286 using EntryPtr =
typename Annotation::Entry
const* C4_RESTRICT;
8287 EntryPtr first =
nullptr;
8288 EntryPtr second =
nullptr;
8289 uint32_t total = (uint32_t)(m_pending_anchors.num_entries + m_pending_tags.num_entries);
8292 _c4dbgpf(
"there are {} pending annotations: {} anchors + {} tags", total, m_pending_anchors.num_entries, m_pending_tags.num_entries);
8293 auto valid_if_same_line = [
this](EntryPtr entry){
8294 _c4dbgpf(
"pending: {} indent={} line={} vs currline={}", _maybe_null_str(entry->str), entry->indentation, entry->line, m_evt_handler->m_curr->pos.line);
8295 return (entry->line == m_evt_handler->m_curr->pos.line) ? entry :
nullptr;
8299 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8300 total += !!valid_if_same_line(&m_pending_anchors.annotations[i]);
8301 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
8302 total += !!valid_if_same_line(&m_pending_tags.annotations[i]);
8303 _c4dbgpf(
"{} annotations on same line", total);
8308 auto get_first_on_same_line = [
this](EntryPtr not_this_one){
8309 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8310 if(&m_pending_anchors.annotations[i] != not_this_one
8311 && m_pending_anchors.annotations[i].line == m_evt_handler->m_curr->pos.line)
8312 return &m_pending_anchors.annotations[i];
8313 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
8314 if(&m_pending_tags.annotations[i] != not_this_one
8315 && m_pending_tags.annotations[i].line == m_evt_handler->m_curr->pos.line)
8316 return &m_pending_tags.annotations[i];
8318 return (EntryPtr)
nullptr;
8322 first = get_first_on_same_line(
nullptr);
8324 _c4dbgpf(
"first annotation: {} indent={} line={}", _maybe_null_str(first->str), first->indentation, first->line);
8329 second = get_first_on_same_line(first);
8331 _c4dbgpf(
"second annotation: {} indent={} line={}", _maybe_null_str(second->str), second->indentation, second->line);
8333 auto extract_string = [&](EntryPtr e){
8335 if(!e->str.str || e->str.begins_with_any(
"!<"))
8341 _c4dbgpf(
"tag: {} -> {}", _maybe_null_str(e->str), tag);
8349 _c4assert(anchor.str - token_soup.str > 0);
8354 _c4dbgpf(
"anchor: {} -> {}", e->str, anchor);
8357 *first_ = first ? extract_string(first) : nullptr;
8358 *second_ = second ? extract_string(second) : nullptr;
8359 if(total > 1 && (first_->str > second_->str))
8364 _c4dbgpf(
"swap first and second: {} -> {}", *first_, *second_);
8373template<
class EventHandler>
8374C4_COLD
void ParseEngine<EventHandler>::_handle_usty()
8376 _c4dbgpf(
"handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
8378 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK|
RFLOW), m_evt_handler->m_curr->pos);
8380 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
8383 _c4dbgp(
"usty[RNXT]: finishing!");
8388 _maybe_skip_comment();
8389 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
8396 pos = pos !=
npos ? pos : rem.len;
8397 _c4dbgpf(
"skipping indentation of {}", pos);
8398 _line_progressed(pos);
8399 rem = m_evt_handler->m_curr->line_contents.rem;
8402 _c4dbgpf(
"rem is now {}", _prs(rem));
8405 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, rem.len > 0, m_evt_handler->m_curr->pos);
8406 size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
8407 char first = rem.str[0];
8410 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RMAP), m_evt_handler->m_curr->pos);
8411 _c4dbgpf(
"usty[RSEQ]: first='{}'", _c4prc(first));
8414 _c4dbgp(
"usty[RSEQ]: it's a flow seq. merging it");
8416 m_evt_handler->_push();
8418 _set_indentation(startindent);
8419 _line_progressed(1);
8420 _maybe_skip_whitespace_tokens();
8422 else if(first ==
'-' && _is_blck_token(rem))
8424 _c4dbgp(
"usty[RSEQ]: it's a block seq. merging it");
8426 m_evt_handler->_push();
8428 _set_indentation(startindent);
8429 _line_progressed(1);
8430 _maybe_skip_whitespace_tokens();
8434 _c4err(
"can only parse a seq into an existing seq");
8437 else if(has_any(
RMAP))
8439 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ), m_evt_handler->m_curr->pos);
8440 _c4dbgpf(
"usty[RMAP]: first='{}'", _c4prc(first));
8443 _c4dbgp(
"usty[RMAP]: it's a flow map. merging it");
8445 _handle_annotations_before_blck_val_scalar();
8446 m_evt_handler->_push();
8448 _set_indentation(startindent);
8449 _line_progressed(1);
8450 _maybe_skip_whitespace_tokens();
8452 else if(first ==
'?' && _is_blck_token(rem))
8454 _c4dbgp(
"usty[RMAP]: it's a block map + this key is complex");
8456 _handle_annotations_before_blck_val_scalar();
8457 m_evt_handler->_push();
8459 _save_indentation();
8460 _line_progressed(1);
8461 _maybe_skip_whitespace_tokens();
8463 else if(first ==
':' && _is_blck_token(rem))
8465 _c4dbgp(
"usty[RMAP]: it's a map with an empty key");
8467 _handle_annotations_before_blck_val_scalar();
8468 m_evt_handler->_push();
8469 m_evt_handler->set_key_scalar_plain_empty();
8471 _save_indentation();
8472 _line_progressed(1);
8473 _maybe_skip_whitespace_tokens();
8475 else if(rem.begins_with(
'&'))
8477 csubstr anchor = _scan_anchor();
8478 _c4dbgpf(
"usty[RMAP]: anchor! {}", _prs(anchor));
8479 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8480 const size_t line = m_evt_handler->m_curr->pos.line;
8481 _add_annotation(&m_pending_anchors, anchor, indentation, line);
8482 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8484 else if(first ==
'*')
8486 csubstr ref = _scan_ref_map();
8487 _c4dbgpf(
"usty[RMAP]: ref! {}", _prs(ref));
8488 if(!_maybe_scan_following_colon())
8490 _c4err(
"cannot read a VAL to a map");
8494 _c4dbgp(
"usty[RMAP]: start new block map, set ref as key");
8495 const size_t startline = m_evt_handler->m_curr->pos.line;
8497 _handle_annotations_before_start_mapblck(startline);
8498 m_evt_handler->_push();
8499 _handle_keyref(ref);
8500 _maybe_skip_whitespace_tokens();
8501 _set_indentation(startindent);
8505 else if(first ==
'!')
8508 _c4dbgpf(
"usty[RMAP]: val tag! {}", _prs(tag));
8511 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8512 const size_t line = m_evt_handler->m_curr->pos.line;
8513 _add_annotation(&m_pending_tags, tag, indentation, line);
8515 else if(first ==
'[' || (first ==
'-' && _is_blck_token(rem)))
8517 _c4err(
"cannot parse a seq into an existing map");
8521 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
8522 startindent = m_evt_handler->m_curr->line_contents.indentation;
8523 const size_t startline = m_evt_handler->m_curr->pos.line;
8525 _c4dbgpf(
"usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
8528 _c4dbgp(
"usty[RMAP]: scanning single-quoted scalar");
8529 sc = _scan_scalar_squot();
8530 if(!_maybe_scan_following_colon())
8532 _c4err(
"cannot read a VAL to a map");
8536 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
8538 _handle_annotations_before_start_mapblck(startline);
8539 m_evt_handler->_push();
8540 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8541 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8542 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8543 _set_indentation(startindent);
8545 _maybe_skip_whitespace_tokens();
8548 else if(first ==
'"')
8550 _c4dbgp(
"usty[RMAP]: scanning double-quoted scalar");
8551 sc = _scan_scalar_dquot();
8552 if(!_maybe_scan_following_colon())
8554 _c4err(
"cannot read a VAL to a map");
8558 _c4dbgp(
"usty[RMAP]: start new block map, set double-quoted scalar as key");
8560 _handle_annotations_before_start_mapblck(startline);
8561 m_evt_handler->_push();
8562 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8563 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8564 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8565 _set_indentation(startindent);
8567 _maybe_skip_whitespace_tokens();
8570 else if(first ==
'|')
8572 _c4err(
"block literal keys must be enclosed in '?'");
8574 else if(first ==
'>')
8576 _c4err(
"block literal keys must be enclosed in '?'");
8578 else if(_scan_scalar_plain_unk(&sc))
8580 _c4dbgp(
"usty[RMAP]: got a plain scalar");
8581 if(!_maybe_scan_following_colon())
8583 _c4err(
"cannot read a VAL to a map");
8587 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
8589 _handle_annotations_before_start_mapblck(startline);
8590 m_evt_handler->_push();
8591 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8592 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8593 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8594 _set_indentation(startindent);
8596 _maybe_skip_whitespace_tokens();
8607 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ), m_evt_handler->m_curr->pos);
8608 _c4dbgpf(
"usty[UNK]: first='{}'", _c4prc(first));
8611 _c4dbgp(
"usty[UNK]: it's a flow seq");
8613 _handle_annotations_before_blck_val_scalar();
8614 m_evt_handler->begin_seq_val_flow();
8616 _set_indentation(startindent);
8617 _line_progressed(1);
8618 _maybe_skip_whitespace_tokens();
8620 else if(first ==
'-' && _is_blck_token(rem))
8622 _c4dbgp(
"usty[UNK]: it's a block seq");
8624 _handle_annotations_before_blck_val_scalar();
8625 m_evt_handler->begin_seq_val_block();
8627 _set_indentation(startindent);
8628 _line_progressed(1);
8629 _maybe_skip_whitespace_tokens();
8631 else if(first ==
'{')
8633 _c4dbgp(
"usty[UNK]: it's a flow map");
8635 _handle_annotations_before_blck_val_scalar();
8636 m_evt_handler->begin_map_val_flow();
8638 _set_indentation(startindent);
8639 _line_progressed(1);
8640 _maybe_skip_whitespace_tokens();
8642 else if(first ==
'?' && _is_blck_token(rem))
8644 _c4dbgp(
"usty[UNK]: it's a map + this key is complex");
8646 _handle_annotations_before_blck_val_scalar();
8647 m_evt_handler->begin_map_val_block();
8649 _save_indentation();
8650 _line_progressed(1);
8651 _maybe_skip_whitespace_tokens();
8653 else if(first ==
':' && _is_blck_token(rem))
8655 _c4dbgp(
"usty[UNK]: it's a map with an empty key");
8657 _handle_annotations_before_blck_val_scalar();
8658 m_evt_handler->begin_map_val_block();
8659 m_evt_handler->set_key_scalar_plain_empty();
8661 _save_indentation();
8662 _line_progressed(1);
8663 _maybe_skip_whitespace_tokens();
8665 else if(first ==
'&')
8667 csubstr anchor = _scan_anchor();
8668 _c4dbgpf(
"usty[UNK]: anchor! {}", _prs(anchor));
8669 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8670 const size_t line = m_evt_handler->m_curr->pos.line;
8671 _add_annotation(&m_pending_anchors, anchor, indentation, line);
8672 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8674 else if(first ==
'*')
8676 csubstr ref = _scan_ref_map();
8677 _c4dbgpf(
"usty[UNK]: ref! {}", _prs(ref));
8678 if(!_maybe_scan_following_colon())
8680 _c4dbgp(
"usty[UNK]: set val ref");
8681 _handle_valref(ref);
8685 _c4dbgp(
"usty[UNK]: start new block map, set ref as key");
8686 const size_t startline = m_evt_handler->m_curr->pos.line;
8688 _handle_annotations_before_start_mapblck(startline);
8689 m_evt_handler->begin_map_val_block();
8690 _handle_keyref(ref);
8691 _maybe_skip_whitespace_tokens();
8692 _set_indentation(startindent);
8696 else if(first ==
'!')
8699 _c4dbgpf(
"usty[UNK]: val tag! {}", _prs(tag));
8702 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8703 const size_t line = m_evt_handler->m_curr->pos.line;
8704 _add_annotation(&m_pending_tags, tag, indentation, line);
8708 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
8709 startindent = m_evt_handler->m_curr->line_contents.indentation;
8710 const size_t startline = m_evt_handler->m_curr->pos.line;
8713 _c4dbgpf(
"usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
8716 _c4dbgp(
"usty[UNK]: scanning single-quoted scalar");
8717 sc = _scan_scalar_squot();
8718 if(!_maybe_scan_following_colon())
8720 _c4dbgp(
"usty[UNK]: set as val");
8721 _handle_annotations_before_blck_val_scalar();
8722 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8723 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8728 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
8730 _handle_annotations_before_start_mapblck(startline);
8731 m_evt_handler->begin_map_val_block();
8732 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8733 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8734 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8735 _set_indentation(startindent);
8737 _maybe_skip_whitespace_tokens();
8740 else if(first ==
'"')
8742 _c4dbgp(
"usty[UNK]: scanning double-quoted scalar");
8743 sc = _scan_scalar_dquot();
8744 if(!_maybe_scan_following_colon())
8746 _c4dbgp(
"usty[UNK]: set as val");
8747 _handle_annotations_before_blck_val_scalar();
8748 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8749 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8754 _c4dbgp(
"usty[UNK]: start new block map, set double-quoted scalar as key");
8756 _handle_annotations_before_start_mapblck(startline);
8757 m_evt_handler->begin_map_val_block();
8758 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8759 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8760 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8761 _set_indentation(startindent);
8763 _maybe_skip_whitespace_tokens();
8766 else if(first ==
'|')
8768 _c4dbgp(
"usty[UNK]: scanning block-literal scalar");
8770 _scan_block(&sb, startindent);
8771 _c4dbgp(
"usty[UNK]: set as val");
8772 _handle_annotations_before_blck_val_scalar();
8773 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8774 m_evt_handler->set_val_scalar_literal(maybe_filtered);
8777 else if(first ==
'>')
8779 _c4dbgp(
"usty[UNK]: scanning block-folded scalar");
8781 _scan_block(&sb, startindent);
8782 _c4dbgp(
"usty[UNK]: set as val");
8783 _handle_annotations_before_blck_val_scalar();
8784 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8785 m_evt_handler->set_val_scalar_folded(maybe_filtered);
8788 else if(_scan_scalar_plain_unk(&sc))
8790 _c4dbgp(
"usty[UNK]: got a plain scalar");
8791 if(!_maybe_scan_following_colon())
8793 _c4dbgp(
"usty[UNK]: set as val");
8794 _handle_annotations_before_blck_val_scalar();
8795 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8796 m_evt_handler->set_val_scalar_plain(maybe_filtered);
8801 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
8803 _handle_annotations_before_start_mapblck(startline);
8804 m_evt_handler->begin_map_val_block();
8805 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8806 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8807 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8808 _set_indentation(startindent);
8810 _maybe_skip_whitespace_tokens();
8824template<
class EventHandler>
8827 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8829 m_evt_handler->start_parse(filename.str, src);
8830 m_evt_handler->begin_stream();
8832 while( ! _finished_file())
8835 while( ! _finished_line())
8838 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8843 else if(has_any(
RMAP))
8847 else if(has_any(
RUNK))
8853 _c4err(
"internal error");
8856 if(_finished_file())
8861 m_evt_handler->finish_parse();
8867template<
class EventHandler>
8870 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8872 m_evt_handler->start_parse(filename.str, src);
8873 m_evt_handler->begin_stream();
8875 while( ! _finished_file())
8878 while( ! _finished_line())
8881 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8892 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
8901 else if(has_any(
RBLCK))
8905 _handle_seq_block();
8909 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
8910 _handle_map_block();
8913 else if(has_any(
RUNK))
8917 else if(has_any(
USTY))
8923 _c4err(
"internal error");
8926 if(_finished_file())
8931 m_evt_handler->finish_parse();
8940#undef _c4dbgnextline
8944C4_SUPPRESS_WARNING_MSVC_POP
8945C4_SUPPRESS_WARNING_GCC_CLANG_POP
Lightweight generic type-safe wrappers for converting individual values to/from strings.
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&) noexcept
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
#define RYML_NOEXCEPT
Conditionally expands to noexcept when RYML_USE_ASSERT is 0 and is empty otherwise.
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
void err_parse(ErrorDataParse const &errdata, const char *msg)
trigger a parse error to its respective handler, with a non-formatted error message.
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
basic_substring< char > substr
a mutable string view
basic_substring< const char > csubstr
an immutable string view
bool is_valid_tag_handle(csubstr handle)
bool is_custom_tag(csubstr tag)
is a tag of the form !handle!tag?
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
int ParserFlag_t
data type for ParserState_e
@ RTOP
reading at top level
@ RSET
the (implicit) map being read is a !!set.
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ QSCL
stored scalar was quoted
@ RBLCK
reading in block mode
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RFLOW
reading is inside explicit flow chars: [] or {}
size_t adjust_pos_with_escapes(csubstr scalar, size_t pos, bool keep_newlines=false)
Adjust a position in a scalar, increasing it to account for any escaped characters.
size_t to_chars(substr buf, escaped_scalar e)
formatting implementation to escape a scalar with escape_scalar()
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
@ npos
a null string position
@ UTF16BE
UTF16, Big-Endian.
@ UTF16LE
UTF16, Little-Endian.
@ NOBOM
No Byte Order Mark was found.
@ UTF32BE
UTF32, Big-Endian.
@ UTF32LE
UTF32, Little-Endian.
enum c4::yml::Encoding_ Encoding_e
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
#define _ryml_relocate(s)
#define _RYML_SAVE_TEST_YAML(filename, src)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _RYML_WITH_TAB_TOKENS(...)
#define _RYML_SAVE_TEST_JSON(filename, src)
basic_substring range(size_t first, size_t last=npos) const noexcept
return [first,last[.
size_t first_not_of(const C c) const
basic_substring triml(const C c) const
trim left
size_t first_of(const C c, size_t start=0) const
basic_substring first(size_t num) const noexcept
return the first num elements: [0,num[
basic_substring sub(size_t first) const noexcept
return [first,len[
basic_substring trimr(const C c) const
trim the character c from the right
C * str
a restricted pointer to the first character of the substring
Filters an input string into a different output string.
Abstracts the fact that a scalar filter result may not fit in the intended memory.
Abstracts the fact that a scalar filter result may not fit in the intended memory.
Helper to control the line contents while parsing a buffer.
holds a source or yaml file position, for example when an error is detected; See also location_format...
csubstr name
name of the file
Options to give to the parser to control its behavior.
Accelerator structure to reduce memory requirements by enabling reuse of resolved tags.
formatting helper to escape a scalar with escape_scalar_fn()
utilities for UTF and Byte Order Mark