1#ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2#define _C4_YML_PARSE_ENGINE_DEF_HPP_
4#ifndef _C4_YML_PARSE_ENGINE_HPP_
7#ifndef _C4_CHARCONV_HPP_
13#ifndef _C4_YML_FILTER_PROCESSOR_HPP_
16#ifndef _C4_YML_TAG_HPP_
19#ifndef _C4_YML_NODE_TYPE_HPP_
23#ifndef _C4_YML_DETAIL_DBGPRINT_HPP_
24#include "c4/yml/detail/dbgprint.hpp"
32 do { RYML_DEBUG_BREAK(); this->_err(RYML_LOC_HERE(), __VA_ARGS__); } while(0)
35 this->_err(RYML_LOC_HERE(), __VA_ARGS__)
37#define _c4assert(...) \
38 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, __VA_ARGS__, m_evt_handler->m_curr->pos)
41#if defined(RYML_WITH_TAB_TOKENS)
42#define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
43#define _RYML_WITHOUT_TAB_TOKENS(...)
44#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
46#define _RYML_WITH_TAB_TOKENS(...)
47#define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
48#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
52#ifndef RYML_SAVE_TEST_YAML
53#define _RYML_SAVE_TEST_YAML(filename, src)
54#define _RYML_SAVE_TEST_JSON(filename, src)
56#define _RYML_SAVE_TEST_YAML(filename, src) c4::yml::ryml_save_test_yaml(filename, src)
57#define _RYML_SAVE_TEST_JSON(filename, src) c4::yml::ryml_save_test_json(filename, src)
68#define _c4dbgnextline() \
70 _c4dbgq("\n-----------"); \
71 _c4dbgt("handling line={}, offset={}B", \
72 m_evt_handler->m_curr->pos.line, \
73 m_evt_handler->m_curr->pos.offset); \
77C4_SUPPRESS_WARNING_MSVC_PUSH
78C4_SUPPRESS_WARNING_MSVC(4296)
79C4_SUPPRESS_WARNING_MSVC(4702)
80C4_SUPPRESS_WARNING_GCC_CLANG_PUSH
81C4_SUPPRESS_WARNING_GCC_CLANG(
"-Wtype-limits")
82C4_SUPPRESS_WARNING_GCC_CLANG(
"-Wformat-nonliteral")
83C4_SUPPRESS_WARNING_GCC_CLANG(
"-Wold-style-cast")
84#if defined(__GNUC__) && (__GNUC__ >= 6)
85C4_SUPPRESS_WARNING_GCC(
"-Wnull-dereference")
87#if defined(__GNUC__) && (__GNUC__ >= 7)
88C4_SUPPRESS_WARNING_GCC(
"-Wduplicated-branches")
98C4_HOT C4_ALWAYS_INLINE
void _set_first(
substr &C4_RESTRICT subject,
size_t pos)
noexcept
101 subject.len = pos !=
npos ? pos : subject.len;
103C4_HOT C4_ALWAYS_INLINE
void _set_first(
csubstr &C4_RESTRICT subject,
size_t pos)
noexcept
106 subject.len = pos !=
npos ? pos : subject.len;
108C4_HOT C4_ALWAYS_INLINE
void _set_first_strict(
substr &C4_RESTRICT subject,
size_t pos)
RYML_NOEXCEPT
111 _RYML_ASSERT_BASIC(pos !=
npos);
114C4_HOT C4_ALWAYS_INLINE
void _set_first_strict(
csubstr &C4_RESTRICT subject,
size_t pos)
RYML_NOEXCEPT
117 _RYML_ASSERT_BASIC(pos !=
npos);
123 _RYML_ASSERT_BASIC(s.len > 0);
124 _RYML_ASSERT_BASIC(s.str[0] ==
'-' || s.str[0] ==
':' || s.str[0] ==
'?');
128C4_HOT C4_ALWAYS_INLINE
bool _is_blck_seq_token_maybe(
csubstr const& C4_RESTRICT s)
noexcept
130 return ((s.len >= 1) && (s.str[0] ==
'-') && ((s.len == 1) || ((s.str[1] ==
' ')
_RYML_WITH_TAB_TOKENS( || (s.str[1] ==
'\t')))));
135 _RYML_ASSERT_BASIC(s.begins_with(
'-'));
136 _RYML_ASSERT_BASIC(!s.ends_with(
"\n"));
137 _RYML_ASSERT_BASIC(!s.ends_with(
"\r"));
138 return (s.len >= 3 && s.str[1] ==
'-' && s.str[2] ==
'-')
144 _RYML_ASSERT_BASIC(s.begins_with(
'.'));
145 _RYML_ASSERT_BASIC(!s.ends_with(
"\n"));
146 _RYML_ASSERT_BASIC(!s.ends_with(
"\r"));
147 return (s.len >= 3 && s.str[1] ==
'.' && s.str[2] ==
'.')
151inline bool _is_doc_token(
csubstr s)
noexcept
159 return (s.str[1] ==
'-' && s.str[2] ==
'-')
163 return (s.str[1] ==
'.' && s.str[2] ==
'.')
172 _RYML_ASSERT_BASIC(s.len);
176 return s.begins_with(
"false") ? 5u : 0u;
178 return s.begins_with(
"true") ? 4u : 0u;
180 return s.begins_with(
"null") ? 4u : 0u;
188C4_ALWAYS_INLINE
size_t _extend_from_combined_newline(
char nl,
char following)
190 return (nl ==
'\n' && following ==
'\r') || (nl ==
'\r' && following ==
'\n');
196 size_t nlpos = rem.
first_of(
"\r\n");
199 const char nl = rem[nlpos];
200 rem = rem.right_of(nlpos);
203 if(_extend_from_combined_newline(nl, rem.front()))
211inline size_t _count_following_newlines(
csubstr r,
size_t *C4_RESTRICT i)
213 _RYML_ASSERT_BASIC(r[*i] ==
'\n');
214 size_t numnl_following = 0;
216 for( ; *i < r.len; ++(*i))
218 if(r.str[*i] ==
'\n')
221 else if(r.str[*i] ==
' ' || r.str[*i] ==
'\t' || r.str[*i] ==
'\r')
226 return numnl_following;
231inline size_t _count_following_newlines(
csubstr r,
size_t *C4_RESTRICT i,
size_t indentation)
233 _RYML_ASSERT_BASIC(r[*i] ==
'\n');
234 size_t numnl_following = 0;
238 for( ; *i < r.len; ++(*i))
240 const char c = r.str[*i];
244 else if(c !=
' ' && c !=
'\t' && c !=
'\r')
250 for( ; *i < r.len; ++(*i))
257 size_t stop = *i + indentation;
258 for( ; *i < r.len; ++(*i))
261 if(c !=
' ' && c !=
'\r')
263 _RYML_ASSERT_BASIC(*i < stop);
268 else if(c !=
' ' && c !=
'\t' && c !=
'\r')
274 return numnl_following;
284template<
class EventHandler>
291template<
class EventHandler>
294 , m_evt_handler(evt_handler)
295 , m_pending_anchors()
297 , m_has_directives_yaml(false)
298 , m_has_directives(false)
301 , m_prev_val_end(
npos)
303 , m_newline_offsets()
304 , m_newline_offsets_size(0)
305 , m_newline_offsets_capacity(0)
307 _RYML_CHECK_BASIC(evt_handler);
310template<
class EventHandler>
312 : m_options(that.m_options)
313 , m_evt_handler(that.m_evt_handler)
314 , m_pending_anchors(that.m_pending_anchors)
315 , m_pending_tags(that.m_pending_tags)
316 , m_has_directives_yaml(that.m_has_directives_yaml)
317 , m_has_directives(that.m_has_directives)
318 , m_doc_empty(that.m_doc_empty)
320 , m_prev_val_end(
npos)
322 , m_newline_offsets(that.m_newline_offsets)
323 , m_newline_offsets_size(that.m_newline_offsets_size)
324 , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
329template<
class EventHandler>
331 : m_options(that.m_options)
332 , m_evt_handler(that.m_evt_handler)
333 , m_pending_anchors(that.m_pending_anchors)
334 , m_pending_tags(that.m_pending_tags)
335 , m_has_directives_yaml(that.m_has_directives_yaml)
336 , m_has_directives(that.m_has_directives)
337 , m_doc_empty(that.m_doc_empty)
339 , m_prev_val_end(
npos)
341 , m_newline_offsets()
342 , m_newline_offsets_size()
343 , m_newline_offsets_capacity()
345 if(that.m_newline_offsets_capacity)
347 _resize_locations(that.m_newline_offsets_capacity);
348 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
349 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
350 m_newline_offsets_size = that.m_newline_offsets_size;
354template<
class EventHandler>
358 m_options = (that.m_options);
359 m_evt_handler = that.m_evt_handler;
360 m_pending_anchors = that.m_pending_anchors;
361 m_pending_tags = that.m_pending_tags;
362 m_has_directives_yaml = that.m_has_directives_yaml;
363 m_has_directives = that.m_has_directives;
364 m_doc_empty = that.m_doc_empty;
365 m_prev_colon = that.m_prev_colon;
366 m_prev_val_end = that.m_prev_val_end;
367 m_encoding = that.m_encoding;
368 m_newline_offsets = (that.m_newline_offsets);
369 m_newline_offsets_size = (that.m_newline_offsets_size);
370 m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
375template<
class EventHandler>
381 m_options = (that.m_options);
382 m_evt_handler = that.m_evt_handler;
383 m_pending_anchors = that.m_pending_anchors;
384 m_pending_tags = that.m_pending_tags;
385 m_has_directives_yaml = that.m_has_directives_yaml;
386 m_has_directives = that.m_has_directives;
387 m_doc_empty = that.m_doc_empty;
388 m_prev_colon = that.m_prev_colon;
389 m_prev_val_end = that.m_prev_val_end;
390 m_encoding = that.m_encoding;
391 if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
392 _resize_locations(that.m_newline_offsets_capacity);
393 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
394 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
395 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size *
sizeof(
size_t));
396 m_newline_offsets_size = that.m_newline_offsets_size;
401template<
class EventHandler>
402void ParseEngine<EventHandler>::_clr()
406 m_pending_anchors = {};
408 m_has_directives_yaml =
false;
409 m_has_directives =
false;
412 m_prev_val_end =
npos;
414 m_newline_offsets = {};
415 m_newline_offsets_size = {};
416 m_newline_offsets_capacity = {};
419template<
class EventHandler>
420void ParseEngine<EventHandler>::_free()
422 if(m_newline_offsets)
424 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
425 m_newline_offsets =
nullptr;
426 m_newline_offsets_size = 0u;
427 m_newline_offsets_capacity = 0u;
434template<
class EventHandler>
435void ParseEngine<EventHandler>::_reset()
437 m_pending_anchors = {};
439 m_has_directives_yaml =
false;
440 m_has_directives =
false;
443 m_prev_val_end =
npos;
447 if(m_options.locations())
449 _prepare_locations();
456template<
class EventHandler>
457void ParseEngine<EventHandler>::_relocate_arena(
csubstr prev_arena,
substr next_arena,
substr *other)
459 _c4dbgp(
"relocate to new arena");
460 const char *pb = prev_arena.str;
461 const char *pe = prev_arena.str + prev_arena.len;
462 #define _ryml_relocate(s) \
463 if((s).str >= pb && (s).str <= pe) \
465 (s).str = next_arena.str + ((s).str - pb); \
474 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
479 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
486 for(
size_t i = 0, sz = tds.size(); i < sz; ++i)
493 TagCache &tch = m_evt_handler->tag_cache();
494 for(
id_type i = 0, sz = tch.m_entries.size(); i < sz; ++i)
504 #undef _ryml_relocate
508template<
class EventHandler>
511 csubstr prev = m_evt_handler->arena();
512 substr out = m_evt_handler->alloc_arena(len);
513 substr curr = m_evt_handler->arena();
514 if(curr.str != prev.str)
515 _relocate_arena(prev, curr, other);
524template<
class EventHandler>
525template<
class DumpFn>
528 ParserState const *
const C4_RESTRICT st = m_evt_handler->m_curr;
535 csubstr m_file = m_evt_handler->m_curr->pos.name;
538 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{}:", m_file);
539 offs += m_file.len + 1;
541 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{}:{}: ", st->pos.line, st->pos.col);
542 csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
544 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{}{} (size={})\n",
escaped_scalar(maybe_full_content,
true), maybe_ellipsis, contents.len);
546 size_t firstcol = (size_t)(lc.rem.str - lc.full.str);
547 size_t lastcol = firstcol + lc.rem.len;
550 for(
size_t i = 0; i < offs + firstcol_adj; ++i)
551 std::forward<DumpFn>(dumpfn)(
" ");
552 std::forward<DumpFn>(dumpfn)(
"^");
553 for(
size_t i = 1, e = (len < 80u ? len : 80u); i < e; ++i)
554 std::forward<DumpFn>(dumpfn)(
"~");
555 _dbg_dump(std::forward<DumpFn>(dumpfn),
"{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
559 std::forward<DumpFn>(dumpfn)(
"\n");
564 _dbg_dump(std::forward<DumpFn>(dumpfn),
"top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
568template<
class EventHandler>
574 _dbg_printf(
"state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(buf, s.flags));
578template<
class EventHandler>
582 _print_state_stack(buf);
589template<
class EventHandler>
590template<
class ...Args>
591C4_NORETURN C4_NO_INLINE
void ParseEngine<EventHandler>::_err(
Location const& cpploc,
Location const& ymlloc,
const char* fmt, Args
const& ...args)
const
593 m_evt_handler->cancel_parse();
597template<
class EventHandler>
598template<
class ...Args>
599C4_NORETURN C4_NO_INLINE
void ParseEngine<EventHandler>::_err(
Location const& cpploc,
const char *fmt, Args
const& ...args)
const
601 m_evt_handler->cancel_parse();
602 err_parse(m_evt_handler->m_stack.m_callbacks,
ErrorDataParse{cpploc, m_evt_handler->m_curr->pos}, fmt, args...);
608template<
class EventHandler>
609template<
class ...Args>
614 _dbg_printf(fmt, args...);
616 _fmt_msg(_dbg_dumper);
623template<
class EventHandler>
624bool ParseEngine<EventHandler>::_finished_file()
const
626 bool ret = m_evt_handler->m_curr->pos.offset >= _buf().len;
630 _c4dbgp(
"finished file!!!");
636template<
class EventHandler>
637C4_HOT C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_finished_line() const
639 return m_evt_handler->m_curr->line_contents.rem.empty();
645template<
class EventHandler>
646void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
648 if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' _RYML_WITH_TAB_TOKENS(|| m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')))
652 pos = m_evt_handler->m_curr->line_contents.rem.len;
653 _c4dbgpf(
"skip {} whitespace characters", pos);
654 _line_progressed(pos);
658template<
class EventHandler>
659void ParseEngine<EventHandler>::_maybe_skipchars(
char c)
661 if(m_evt_handler->m_curr->line_contents.rem.len && m_evt_handler->m_curr->line_contents.rem.str[0] == c)
663 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(c);
665 pos = m_evt_handler->m_curr->line_contents.rem.len;
666 _c4dbgpf(
"skip {}x'{}'", pos, _c4prc(c));
667 _line_progressed(pos);
671template<
class EventHandler>
673void ParseEngine<EventHandler>::_skipchars(
const char (&chars)[N])
675 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars), m_evt_handler->m_curr->pos);
676 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
678 pos = m_evt_handler->m_curr->line_contents.rem.len;
679 _c4dbgpf(
"skip {} characters", pos);
680 _line_progressed(pos);
683template<
class EventHandler>
684void ParseEngine<EventHandler>::_skip_comment()
686 LineContents const& C4_RESTRICT lc = m_evt_handler->m_curr->line_contents;
687 const size_t col = m_evt_handler->m_curr->pos.col - 1u;
688 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, lc.rem.begins_with(
'#'), m_evt_handler->m_curr->pos);
689 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, lc.rem.is_sub(lc.full), m_evt_handler->m_curr->pos);
690 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col >= 1, m_evt_handler->m_curr->pos);
691 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, col == ((
size_t)(lc.rem.str - lc.full.str)), m_evt_handler->m_curr->pos);
693 if(lc.rem.str != lc.full.str)
695 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, col > 0, m_evt_handler->m_curr->pos);
696 const char prev = lc.full.str[col - 1u];
697 if(C4_UNLIKELY(prev !=
' ' && prev !=
'\t'))
698 _c4err(
"comment not preceded by whitespace");
700 _c4dbgpf(
"comment was '{}'", m_evt_handler->m_curr->line_contents.rem);
701 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
704template<
class EventHandler>
705void ParseEngine<EventHandler>::_maybe_skip_comment_strict()
707 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
710 if(
'#' == m_evt_handler->m_curr->line_contents.rem[pos])
712 _line_progressed(pos);
718template<
class EventHandler>
719void ParseEngine<EventHandler>::_maybe_skip_comment()
721 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
724 if(
'#' == m_evt_handler->m_curr->line_contents.rem[pos])
726 _line_progressed(pos);
732 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
736template<
class EventHandler>
737bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
739 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
742 if(
':' == m_evt_handler->m_curr->line_contents.rem[pos])
746 if(++pos < m_evt_handler->m_curr->line_contents.rem.len)
748 const char next = m_evt_handler->m_curr->line_contents.rem.str[pos];
754 _line_progressed(pos);
760 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
768template<
class EventHandler>
769csubstr ParseEngine<EventHandler>::_scan_anchor()
771 csubstr s = m_evt_handler->m_curr->line_contents.rem;
772 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'&'), m_evt_handler->m_curr->pos);
774 _line_progressed(1u + anchor.len);
775 _maybe_skipchars(
' ');
779template<
class EventHandler>
780csubstr ParseEngine<EventHandler>::_scan_ref_seq()
782 csubstr s = m_evt_handler->m_curr->line_contents.rem;
783 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'), m_evt_handler->m_curr->pos);
784 _set_first(s, s.first_of(
" ,]\t"));
785 _line_progressed(s.len);
789template<
class EventHandler>
790csubstr ParseEngine<EventHandler>::_scan_ref_map()
792 csubstr s = m_evt_handler->m_curr->line_contents.rem;
793 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'*'), m_evt_handler->m_curr->pos);
794 _set_first(s, s.first_of(
" ,}\t"));
795 _line_progressed(s.len);
799template<
class EventHandler>
800csubstr ParseEngine<EventHandler>::_scan_tag()
802 csubstr t = m_evt_handler->m_curr->line_contents.rem;
803 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.begins_with(
'!'), m_evt_handler->m_curr->pos);
804 if(!t.begins_with(
"!<"))
806 _c4dbgp(
"begins with '!'");
807 _set_first(t, t.first_of(
" ,]}\t"));
808 if(C4_UNLIKELY(t.first_of(
"[{") !=
npos))
810 _line_progressed(t.len);
811 if(m_options.resolve_tags_all() || (m_options.resolve_tags() &&
is_custom_tag(t)))
816 _c4dbgp(
"begins with '!<'");
817 size_t pos = t.find(
'>');
818 if(C4_UNLIKELY(pos ==
npos))
820 _set_first_strict(t, pos+1);
821 _line_progressed(t.len);
824 _maybe_skip_whitespace_tokens();
828template<
class EventHandler>
831 csubstr t = m_evt_handler->m_curr->line_contents.rem;
832 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.begins_with(
'!'), m_evt_handler->m_curr->pos);
833 if(!t.begins_with(
"!<"))
835 _c4dbgp(
"begins with '!'");
836 _set_first(t, t.first_of(
" ,\t"));
837 if(C4_UNLIKELY(t.first_of(
"[{") !=
npos))
839 _line_progressed(t.len);
841 if(m_options.resolve_tags_all() || (m_options.resolve_tags() &&
is_custom_tag(t)))
846 _c4dbgp(
"begins with '!<'");
847 size_t pos = t.find(
'>');
848 if(C4_UNLIKELY(pos ==
npos))
850 _set_first_strict(t, pos+1);
851 _line_progressed(t.len);
855 _maybe_skip_whitespace_tokens();
862template<
class EventHandler>
863bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_block_token(
csubstr s)
865 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
866 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(
":-"), m_evt_handler->m_curr->pos);
867 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count(
'\n') == 0, m_evt_handler->m_curr->pos);
868 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count(
'\r') == 0, m_evt_handler->m_curr->pos);
880 _c4dbgpf(
"not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
890 _c4err(
"invalid token \":{}\"", _c4prc(s.str[1]));
905template<
class EventHandler>
906bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_qmrk(
csubstr s)
908 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
909 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] ==
'?', m_evt_handler->m_curr->pos);
910 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count(
'\n') == 0, m_evt_handler->m_curr->pos);
911 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count(
'\r') == 0, m_evt_handler->m_curr->pos);
918 _c4dbgpf(
"not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
924 _c4err(
"invalid token \"?{}\"", _c4prc(s.str[1]));
938template<
class EventHandler>
939bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(
csubstr s)
941 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.empty(), m_evt_handler->m_curr->pos);
957 _c4dbgpf(
"not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
962 _c4dbgpf(
"suspicious token='{}' len={}", _c4prc(s.str[0]), s.len);
963 return _is_valid_start_scalar_plain_flow_check_block_token(s);
965 _c4dbgpf(
"qmrk='{}' len={}", _c4prc(s.str[0]), s.len);
966 return _is_valid_start_scalar_plain_flow_check_qmrk(s);
974template<
class EventHandler>
975bool ParseEngine<EventHandler>::_scan_scalar_plain_handle_newline(
csubstr s,
size_t offs)
977 _c4dbgpf(
"newl[PLAIN]: found '\\n'. offs={} line={} sofar={}", offs, m_evt_handler->m_curr->pos.line, _prs(s.first(offs),
true));
980 _c4dbgp(
"newl[PLAIN]: buffer continues");
982 size_t next_line_indentation = next_line.
first_not_of(
' ');
983 if(next_line_indentation !=
npos)
985 _c4dbgpf(
"newl[PLAIN]: line={} indentation={} indref={}", m_evt_handler->m_curr->pos.line + 1, next_line_indentation, m_evt_handler->m_curr->indref);
986 next_line = next_line.first(next_line.first_of(
"\n\r"));
987 _c4dbgpf(
"newl[PLAIN]: has indentation. next_line={}", _prs(next_line));
988 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, next_line_indentation <= next_line.len, m_evt_handler->m_curr->pos);
989 if(C4_LIKELY(next_line_indentation >= m_evt_handler->m_curr->indref))
991 _c4dbgp(
"newl[PLAIN]: larger indentation");
992 next_line = next_line.sub(next_line_indentation);
994 else if(C4_UNLIKELY(next_line.len && next_line.triml(
' ').len))
996 _c4dbgp(
"newl[PLAIN]: err, smaller indentation");
997 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
1000 if(m_evt_handler->m_curr->line_contents.indentation !=
npos)
1001 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
1004 _c4dbgpf(
"newl[PLAIN]: next_line.len={}", next_line.len);
1007 size_t fno = next_line.first_not_of(
" \t");
1011 switch(next_line.str[fno])
1013 case ',':
case ']':
case '#':
1014 _c4dbgpf(
"newl[PLAIN]: found terminating character beginning next line: '{}'", next_line.str[fno]);
1017 _c4dbgp(
"newl[PLAIN]: found :");
1018 if(fno + 1 == next_line.len || _is_blck_token(next_line.sub(fno)))
1020 _c4dbgpf(
"newl[PLAIN]: found terminating character beginning next line: '{}'", next_line.str[fno]);
1029 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
1035template<
class EventHandler>
1036bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
1038 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP), m_evt_handler->m_curr->pos);
1039 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1040 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ|
RSEQIMAP), m_evt_handler->m_curr->pos);
1041 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1042 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL), m_evt_handler->m_curr->pos);
1044 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
' '), m_evt_handler->m_curr->pos);
1045 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
'\n'), m_evt_handler->m_curr->pos);
1047 if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1050 substr s = _buf().
sub(m_evt_handler->m_curr->pos.offset);
1051 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1053 _c4dbgp(
"scanning seqflow scalar...");
1055 bool needs_filter =
false;
1058 for( ; offs < s.len; ++offs, ++col)
1060 const char c = s.str[offs];
1065 _c4dbgpf(
"found terminating character at {}: '{}'", offs, c);
1066 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1069 _c4dbgpf(
"found '\\n' at col={}", col);
1070 if(!_scan_scalar_plain_handle_newline(s, offs))
1073 needs_filter =
true;
1077 needs_filter =
true;
1080 _c4dbgp(
"found suspicious ':'");
1081 if(s.len > offs + 1)
1083 char next = s.str[offs + 1];
1084 _c4dbgpf(
"next char is '{}'", _c4prc(next));
1090 next = after.str[0];
1091 _c4dbgpf(
"skip \\r to '{}'", _c4prc(next));
1095 if(next ==
' ' _RYML_WITH_TAB_TOKENS(|| next ==
'\t') || next ==
',' || next ==
'\n' || next ==
']')
1097 _c4dbgp(
"map starting!");
1102 _c4dbgp(
"':' nothing to see here");
1107 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len == offs + 1, m_evt_handler->m_curr->pos);
1108 _line_progressed(col);
1109 _c4err(
"missing termination: '{}'", c);
1114 _c4dbgp(
"found suspicious '#'");
1115 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1116 char prev = s.str[offs - 1];
1119 _c4dbgpf(
"found terminating character at {}: '{}'", offs, c);
1127 _line_progressed(col);
1128 _c4err(
"invalid character: '{}'", c);
1131 _c4dbgpf(
"doc token character: '{}', offs={}", c, offs);
1132 if(offs == 0 && m_evt_handler->m_curr->at_line_beginning())
1134 _c4dbgp(
"at line beginning");
1135 if(s.len >= 3 && s.str[1] == c && s.str[2] == c)
1148 _line_progressed(col);
1149 _set_first(s, offs);
1151 sc->needs_filter = needs_filter;
1153 _c4prscalar(
"scanned plain scalar", sc->scalar,
true);
1158template<
class EventHandler>
1159bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
1161 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ) || has_any(
RSEQIMAP), m_evt_handler->m_curr->pos);
1162 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1163 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP|
RSEQIMAP), m_evt_handler->m_curr->pos);
1164 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1165 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK), m_evt_handler->m_curr->pos);
1167 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
' '), m_evt_handler->m_curr->pos);
1168 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(
'\n'), m_evt_handler->m_curr->pos);
1170 if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1173 substr s = _buf().
sub(m_evt_handler->m_curr->pos.offset);
1174 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1176 _c4dbgp(
"scanning mapflow scalar...");
1178 bool needs_filter =
false;
1181 for( ; offs < s.len; ++offs, ++col)
1183 const char c = s.str[offs];
1188 _c4dbgpf(
"found terminating character at {}: '{}'", offs, c);
1189 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1192 _c4dbgpf(
"found '\\n' at col={}", col);
1193 if(!_scan_scalar_plain_handle_newline(s, offs))
1196 needs_filter =
true;
1200 needs_filter =
true;
1203 _c4dbgpf(
"found ':'", c);
1207 const char next = s.str[offs+1];
1208 _c4dbgpf(
"next='{}'", c);
1209 if(next ==
' ' || next ==
',' || next ==
'}' || next ==
'\n' || next ==
'\r' _RYML_WITH_TAB_TOKENS(|| next ==
'\t'))
1211 _c4dbgpf(
"found terminating character: '{}'", c);
1218 _line_progressed(col);
1219 _c4err(
"invalid character: '{}'", c);
1222 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQIMAP), m_evt_handler->m_curr->pos);
1231 _line_progressed(col);
1234 sc->needs_filter = needs_filter;
1236 _c4prscalar(
"scanned plain scalar", sc->scalar,
true);
1238 return sc->scalar.len > 0u;
1241template<
class EventHandler>
1242bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1244 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP), m_evt_handler->m_curr->pos);
1245 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1246 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ), m_evt_handler->m_curr->pos);
1247 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1249 substr s = m_evt_handler->m_curr->line_contents.rem;
1250 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '), m_evt_handler->m_curr->pos);
1251 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1253 _c4dbgp(
"seq_json: scanning scalar...");
1260 _c4dbgp(
"seq_json: not a scalar.");
1265 const size_t len = _begins_with_special_json_scalar(s);
1268 char c = s.len > len ? s.str[len] :
',';
1269 if(c ==
',' || c ==
']' || c ==
' ' || c ==
'\n' || c ==
'\t' || c ==
'\r')
1271 sc->scalar = s.first(len);
1272 sc->needs_filter =
false;
1273 _c4dbgpf(
"seq_json: special scalar: '{}'", sc->scalar);
1274 _line_progressed(len);
1286 for( ; i < s.len; ++i)
1288 const char c = s.str[i];
1295 _c4dbgpf(
"seq_json: found terminating character: '{}'", c);
1304 if(C4_LIKELY(i > 0))
1306 _line_progressed(i);
1307 sc->scalar = s.first(i);
1308 sc->needs_filter =
false;
1309 _c4dbgpf(
"seq_json: scalar was {}", _prs(sc->scalar,
true));
1315template<
class EventHandler>
1316bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1318 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ), m_evt_handler->m_curr->pos);
1319 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK), m_evt_handler->m_curr->pos);
1320 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP), m_evt_handler->m_curr->pos);
1321 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1322 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL), m_evt_handler->m_curr->pos);
1324 substr s = m_evt_handler->m_curr->line_contents.rem;
1325 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '), m_evt_handler->m_curr->pos);
1326 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1328 _c4dbgp(
"scanning scalar...");
1331 const size_t len = _begins_with_special_json_scalar(s);
1334 char c = s.len > len ? s.str[len] :
',';
1335 _c4dbgpf(
"begins with special scalar: {} next='{}'", s.first(len), _c4prc(c));
1336 if(c ==
',' || c ==
'}' || c ==
' ' || c ==
'\n' || c ==
'\t' || c ==
'\r')
1338 sc->scalar = s.first(len);
1339 sc->needs_filter =
false;
1340 _c4dbgpf(
"special json scalar: '{}'", _prs(sc->scalar));
1341 _line_progressed(len);
1353 for( ; i < s.len; ++i)
1355 const char c = s.str[i];
1362 _c4dbgpf(
"found terminating character: '{}'", c);
1371 if(C4_LIKELY(i > 0))
1373 _line_progressed(i);
1374 sc->scalar = s.first(i);
1375 sc->needs_filter =
false;
1376 _c4dbgpf(
"scalar was {}", _prs(sc->scalar));
1383template<
class EventHandler>
1384bool ParseEngine<EventHandler>::_is_doc_begin(
csubstr s)
1386 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] ==
'-', m_evt_handler->m_curr->pos);
1387 return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_begin_token(s));
1390template<
class EventHandler>
1391bool ParseEngine<EventHandler>::_is_doc_end(
csubstr s)
1393 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] ==
'.', m_evt_handler->m_curr->pos);
1394 return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_end_token(s));
1397template<
class EventHandler>
1398bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc,
size_t indentation)
1400 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1401 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP), m_evt_handler->m_curr->pos);
1402 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK|
RUNK|
USTY), m_evt_handler->m_curr->pos);
1404 substr s = m_evt_handler->m_curr->line_contents.rem;
1405 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(
' '), m_evt_handler->m_curr->pos);
1406 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1411 if(_is_blck_token(s))
1415 else if(_is_doc_begin(s))
1417 _c4dbgp(
"token is doc start");
1423 if(_is_blck_token(s))
1438 _c4dbgp(
"token is doc end");
1444 _c4dbgpf(
"plain scalar! indentation={}", indentation);
1446 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1447 const size_t start_line = m_evt_handler->m_curr->pos.line;
1449 bool needs_filter =
false;
1452 _c4dbgpf(
"plain scalar line: {}", _prs(s));
1453 for(
size_t i = 0; i < s.len; ++i)
1455 const char curr = s.str[i];
1460 _c4dbgpf(
"[{}]: got suspicious ':'", i);
1464 _c4dbgpf(
"followed by '{}'", i+1 == s.len ?
csubstr(
"\\n") : _c4prc(s.str[i+1]));
1465 _line_progressed(i);
1467 if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1469 _c4dbgp(
"start line. scalar ends here");
1474 _c4err(
"multiline scalars cannot be used as keys");
1480 while(j + 1 < s.len && s.str[j+1] ==
':')
1482 _c4dbgp(
"skip colon");
1485 i = j > i ? j-1 : i;
1486 _c4dbgp(
"nothing to see here");
1490 _c4dbgp(
"got suspicious '#'");
1491 if(!i || (s.str[i-1] ==
' ' || s.str[i-1] ==
'\t'))
1493 _c4dbgp(
"comment! scalar ends here");
1494 _line_progressed(i);
1499 _c4dbgp(
"nothing to see here");
1504 _line_progressed(s.len);
1505 csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1506 next_peeked = next_peeked.trimr(
"\n\r");
1507 const size_t next_indentation = next_peeked.first_not_of(
' ');
1508 _c4dbgpf(
"indentation curr={} next={}", indentation, next_indentation);
1509 if(next_indentation < indentation)
1511 _c4dbgp(
"smaller indentation! scalar ended");
1514 else if(next_indentation == 0 && next_peeked.len > 0)
1516 const char first = next_peeked.str[0];
1520 _c4dbgpf(
"doc begin? peeked={}", _prs(next_peeked,
size_t(3)));
1521 if(_is_doc_begin_token(next_peeked))
1523 _c4dbgp(
"doc begin! scalar ended");
1528 _c4dbgpf(
"doc end? peeked={}", _prs(next_peeked,
size_t(3)));
1529 if(_is_doc_end_token(next_peeked))
1531 _c4dbgp(
"doc end! scalar ended");
1538 _c4dbgp(
"next line!");
1539 if(!_finished_file())
1541 _c4dbgp(
"next line!");
1547 _c4dbgp(
"file finished!");
1550 s = m_evt_handler->m_curr->line_contents.rem;
1551 needs_filter =
true;
1556 sc->scalar = _buf().range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(
" \n\r\t");
1557 sc->needs_filter = needs_filter;
1559 _c4dbgpf(
"scalar was {}", _prs(sc->scalar));
1564template<
class EventHandler>
1565C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc)
1567 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RMAP), m_evt_handler->m_curr->pos);
1568 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1569 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQIMAP), m_evt_handler->m_curr->pos);
1570 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ), m_evt_handler->m_curr->pos);
1571 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK), m_evt_handler->m_curr->pos);
1572 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL), m_evt_handler->m_curr->pos);
1573 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1576template<
class EventHandler>
1577C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc)
1579 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RSEQ), m_evt_handler->m_curr->pos);
1580 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1581 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP), m_evt_handler->m_curr->pos);
1582 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RBLCK), m_evt_handler->m_curr->pos);
1583 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RVAL|
QMRK), m_evt_handler->m_curr->pos);
1584 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1587template<
class EventHandler>
1588C4_ALWAYS_INLINE
bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc)
1590 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RUNK|
USTY), m_evt_handler->m_curr->pos);
1591 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1597template<
class EventHandler>
1598substr ParseEngine<EventHandler>::_peek_next_line(
size_t pos)
const
1602 pos = pos ==
npos ? m_evt_handler->m_curr->pos.offset : pos;
1603 if(pos >= _buf().len)
1607 rem = _from_next_line(_buf().sub(pos));
1612 nlpos = rem.first_of(
"\r\n");
1614 nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1615 rem = rem.left_of(nlpos,
true);
1617 _c4dbgpf(
"peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr(
"\r\n"));
1621 _c4dbgpf(
"peek next line @ {}: (len=0)''", pos);
1627template<
class EventHandler>
1628void ParseEngine<EventHandler>::_scan_line()
1630 if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < _buf().len))
1631 m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf(), m_evt_handler->m_curr->pos.offset);
1633 m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf().last(0), 0);
1636template<
class EventHandler>
1637void ParseEngine<EventHandler>::_line_progressed(
size_t ahead)
1639 _c4dbgpf(
"line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}",
1640 m_evt_handler->m_curr->pos.line,
1641 m_evt_handler->m_curr->line_contents.full.len,
1642 ahead, m_evt_handler->m_curr->pos.col,
1643 m_evt_handler->m_curr->pos.col+ahead,
1644 m_evt_handler->m_curr->pos.offset,
1645 m_evt_handler->m_curr->pos.offset+ahead);
1646 m_evt_handler->m_curr->pos.offset += ahead;
1647 m_evt_handler->m_curr->pos.col += ahead;
1648 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.num_cols+1, m_evt_handler->m_curr->pos);
1649 m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1652template<
class EventHandler>
1653void ParseEngine<EventHandler>::_line_ended()
1655 _c4dbgpf(
"line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1656 m_evt_handler->m_curr->pos.line,
1657 m_evt_handler->m_curr->line_contents.full.len,
1658 m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols,
1659 m_evt_handler->m_curr->pos.col, 1);
1660 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.num_cols + 1, m_evt_handler->m_curr->pos);
1661 m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1662 ++m_evt_handler->m_curr->pos.line;
1663 m_evt_handler->m_curr->pos.col = 1;
1666template<
class EventHandler>
1667void ParseEngine<EventHandler>::_line_ended_undo()
1669 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u, m_evt_handler->m_curr->pos);
1670 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u, m_evt_handler->m_curr->pos);
1671 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols, m_evt_handler->m_curr->pos);
1672 const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1673 _c4dbgpf(
"line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1674 m_evt_handler->m_curr->pos.offset -= delta;
1675 --m_evt_handler->m_curr->pos.line;
1676 m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.num_cols + 1u;
1679 m_evt_handler->m_curr->line_contents.rem = _buf().sub(m_evt_handler->m_curr->pos.offset, 0);
1684template<
class EventHandler>
1685void ParseEngine<EventHandler>::_set_indentation(
size_t indentation)
noexcept
1687 m_evt_handler->m_curr->indref = indentation;
1688 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1691template<
class EventHandler>
1692void ParseEngine<EventHandler>::_save_indentation()
1694 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full), m_evt_handler->m_curr->pos);
1695 m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1696 _c4dbgpf(
"state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1699template<
class EventHandler>
1700void ParseEngine<EventHandler>::_mark_seqflow_val_end() noexcept
1702 _c4dbgpf(
"SEQFLOW. mark val end at line={}", m_evt_handler->m_curr->pos.line);
1703 m_prev_val_end = m_evt_handler->m_curr->pos.line;
1709template<
class EventHandler>
1710void ParseEngine<EventHandler>::_flow_container_was_a_key(
size_t orig_indent)
1712 _c4dbgpf(
"flow container is followed by colon! orig_indent={}", orig_indent);
1713 m_evt_handler->actually_val_is_first_key_of_new_map_block();
1715 _set_indentation(orig_indent);
1716 _maybe_skip_whitespace_tokens();
1719template<
class EventHandler>
1720void ParseEngine<EventHandler>::_end_flow_container(
size_t orig_indent,
bool multiline)
1726 _c4dbgp(
"flow container: end as vanilla block map key!");
1727 if(C4_UNLIKELY(multiline))
1728 _c4err(
"multiline key is invalid");
1729 if(C4_UNLIKELY(!_maybe_scan_following_colon()))
1730 _c4err(
"could not find ':' colon after key");
1731 _maybe_skip_whitespace_tokens();
1734 else if(has_none(
RFLOW))
1736 _c4dbgp(
"end_flow_container: now not in flow!");
1737 if(has_any(
RUNK|
RSEQ|
RKCL) && _maybe_scan_following_colon())
1739 if(C4_UNLIKELY(multiline))
1740 _c4err(
"multiline key is invalid");
1741 _flow_container_was_a_key(orig_indent);
1745 _c4dbgp(
"end_flow_container: end map as key!");
1748 else if(has_any(
RSEQ))
1750 _c4dbgp(
"end_flow_container: now in a flow seq");
1751 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RFLOW), m_evt_handler->m_curr->pos);
1752 _mark_seqflow_val_end();
1756template<
class EventHandler>
1757void ParseEngine<EventHandler>::_end_map_flow()
1759 bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1760 size_t orig_indent = m_evt_handler->m_curr->indref;
1761 _c4dbgpf(
"mapflow: end, multiline={}", multiline);
1762 m_evt_handler->end_map_flow(multiline && m_options.detect_flow_ml(), m_options.flow_ml_style().type);
1763 _end_flow_container(orig_indent, multiline);
1766template<
class EventHandler>
1767void ParseEngine<EventHandler>::_end_seq_flow()
1769 bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1770 size_t orig_indent = m_evt_handler->m_curr->indref;
1771 _c4dbgpf(
"seqflow: end, multiline={}", multiline);
1772 m_evt_handler->end_seq_flow(multiline && m_options.detect_flow_ml(), m_options.flow_ml_style().type);
1773 _end_flow_container(orig_indent, multiline);
1776template<
class EventHandler>
1777void ParseEngine<EventHandler>::_end_map_blck()
1779 _c4dbgp(
"mapblck: end");
1782 _c4dbgp(
"mapblck: set missing val");
1783 _handle_annotations_before_blck_val_scalar();
1784 m_evt_handler->set_val_scalar_plain_empty();
1786 else if(has_any(
QMRK))
1788 _c4dbgp(
"mapblck: set missing keyval");
1789 _handle_annotations_before_blck_key_scalar();
1790 m_evt_handler->set_key_scalar_plain_empty();
1791 _handle_annotations_before_blck_val_scalar();
1792 m_evt_handler->set_val_scalar_plain_empty();
1794 m_evt_handler->end_map_block();
1797template<
class EventHandler>
1798void ParseEngine<EventHandler>::_end_seq_blck()
1802 _c4dbgp(
"seqblck: set missing val");
1803 _handle_annotations_before_blck_val_scalar();
1804 m_evt_handler->set_val_scalar_plain_empty();
1806 m_evt_handler->end_seq_block();
1809template<
class EventHandler>
1810void ParseEngine<EventHandler>::_end2_map()
1812 _c4dbgp(
"map: end");
1813 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RMAP), m_evt_handler->m_curr->pos);
1820 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1821 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
USTY), m_evt_handler->m_curr->pos);
1822 m_evt_handler->_pop();
1826template<
class EventHandler>
1827void ParseEngine<EventHandler>::_end2_seq()
1829 _c4dbgp(
"seq: end");
1830 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RSEQ), m_evt_handler->m_curr->pos);
1837 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RFLOW), m_evt_handler->m_curr->pos);
1838 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
USTY), m_evt_handler->m_curr->pos);
1839 m_evt_handler->_pop();
1843template<
class EventHandler>
1844void ParseEngine<EventHandler>::_begin2_doc()
1846 _c4dbgp(
"begin_doc");
1847 m_has_directives_yaml =
false;
1848 m_has_directives =
false;
1851 m_evt_handler->begin_doc();
1852 m_evt_handler->m_curr->indref = 0;
1855template<
class EventHandler>
1856void ParseEngine<EventHandler>::_begin2_doc_expl()
1858 _c4dbgp(
"begin_doc_expl");
1859 m_has_directives_yaml =
false;
1860 m_has_directives =
false;
1863 m_evt_handler->begin_doc_expl();
1864 m_evt_handler->m_curr->indref = 0;
1867template<
class EventHandler>
1868void ParseEngine<EventHandler>::_end2_doc()
1870 _c4dbgp(
"doc: end");
1871 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC), m_evt_handler->m_curr->pos);
1872 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1874 _c4dbgp(
"doc was empty; add empty val");
1875 _handle_annotations_before_blck_val_scalar();
1876 m_evt_handler->set_val_scalar_plain_empty();
1878 m_evt_handler->end_doc();
1882template<
class EventHandler>
1883void ParseEngine<EventHandler>::_end2_doc_expl()
1885 _c4dbgp(
"doc: end");
1886 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1888 _c4dbgp(
"doc: no children; add empty val");
1889 _handle_annotations_before_blck_val_scalar();
1890 m_evt_handler->set_val_scalar_plain_empty();
1892 m_evt_handler->end_doc_expl();
1896template<
class EventHandler>
1897void ParseEngine<EventHandler>::_maybe_begin_doc()
1901 _c4dbgp(
"doc must be started");
1905template<
class EventHandler>
1906void ParseEngine<EventHandler>::_maybe_end_doc()
1910 _c4dbgp(
"doc must be finished");
1913 else if(m_doc_empty && (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1915 _c4dbgp(
"no doc to finish, but pending annotations");
1916 m_evt_handler->begin_doc();
1917 _handle_annotations_before_blck_val_scalar();
1918 m_evt_handler->set_val_scalar_plain_empty();
1919 m_evt_handler->end_doc();
1923template<
class EventHandler>
1924void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1926 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1, m_evt_handler->m_curr->pos);
1927 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack[0].flags &
RDOC, m_evt_handler->m_curr->pos);
1928 _c4dbgp(
"root is RDOC");
1929 if(m_evt_handler->m_curr->level != 0)
1930 _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1931 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RDOC), m_evt_handler->m_curr->pos);
1936template<
class EventHandler>
1937void ParseEngine<EventHandler>::_check_trailing_doc_token()
1939 const bool is_root = (m_evt_handler->m_stack.size() == 1u);
1940 const bool isndoc = (m_evt_handler->m_curr->flags &
NDOC) != 0;
1941 const bool suspicious = m_evt_handler->template _has_any__<MAP|SEQ|VAL>();
1942 _c4dbgpf(
"target={} isroot={} suspicious={} ndoc={}", m_evt_handler->m_curr->node_id, is_root, suspicious, isndoc);
1943 if((is_root || m_evt_handler->template _has_any__<DOC>()) && suspicious && !isndoc)
1947template<
class EventHandler>
1948void ParseEngine<EventHandler>::_end_doc_suddenly()
1950 _c4dbgp(
"end doc suddenly");
1951 _end_doc_suddenly__pop();
1956template<
class EventHandler>
1957void ParseEngine<EventHandler>::_check_doc_end_tokens()
const
1959 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1960 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !rem.begins_with_any(
". \t"), m_evt_handler->m_curr->pos);
1961 if(C4_UNLIKELY(rem.len && !rem.begins_with(
'#')))
1967template<
class EventHandler>
1968void ParseEngine<EventHandler>::_start_doc_suddenly()
1970 _c4dbgp(
"start doc suddenly");
1971 _end_doc_suddenly__pop();
1976template<
class EventHandler>
1977void ParseEngine<EventHandler>::_end_stream()
1979 _c4dbgpf(
"end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1981 _c4err(
"missing terminating ]");
1982 else if(C4_UNLIKELY(has_all(
RMAP|
RFLOW)))
1983 _c4err(
"missing terminating }");
1984 if(m_evt_handler->m_stack.size() > 1)
1985 _handle_indentation_pop(m_evt_handler->m_stack.begin());
1992 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1996 m_evt_handler->begin_doc();
1997 _handle_annotations_before_blck_val_scalar();
1998 m_evt_handler->set_val_scalar_plain_empty();
1999 m_evt_handler->end_doc();
2003 m_evt_handler->end_stream();
2004 if(C4_UNLIKELY(m_has_directives))
2005 _c4err(
"directives cannot be used without a document");
2009template<
class EventHandler>
2010void ParseEngine<EventHandler>::_handle_indentation_pop(
ParserState const* popto)
2012 _c4dbgpf(
"popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ?
"s" :
""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
2013 while(m_evt_handler->m_curr != popto)
2017 _c4dbgpf(
"popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2020 else if(has_any(
RMAP))
2022 _c4dbgpf(
"popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2030 _c4dbgpf(
"current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
2033template<
class EventHandler>
2034void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
2037 using state_type =
typename EventHandler::state;
2038 state_type
const* popto =
nullptr;
2039 auto &stack = m_evt_handler->m_stack;
2040 _RYML_ASSERT_PARSE_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos);
2041 _RYML_ASSERT_PARSE_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2042 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2044 _print_state_stack();
2046 for(state_type
const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
2048 _c4dbgpf(
"searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
2049 if(s->indref == ind)
2051 _c4dbgpf(
"gotit!!! level={} node={}", s->level, s->node_id);
2056 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2058 _c4err(
"parse error: incorrect indentation?");
2060 _handle_indentation_pop(popto);
2063template<
class EventHandler>
2064void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
2067 using state_type =
typename EventHandler::state;
2068 auto &stack = m_evt_handler->m_stack;
2069 _RYML_ASSERT_PARSE_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos);
2070 _RYML_ASSERT_PARSE_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2071 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2072 state_type
const* popto =
nullptr;
2075 _print_state_stack(flagbuf_);
2077 for(state_type
const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s)
2079 _c4dbgpf(
"searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
2084 else if(s->indref == ind)
2086 _c4dbgpf(
"same indentation!!! level={} node={}", s->level, s->node_id);
2087 if(popto && has_any(
RTOP, s) && has_none(
RMAP|
RSEQ, s))
2094 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2096 _RYML_ASSERT_PARSE_(stack.m_callbacks, first == ind || first ==
npos, m_evt_handler->m_curr->pos);
2097 rem = rem.right_of(first,
true);
2098 _c4dbgpf(
"indentless? rem='{}' first={}", rem, first);
2099 if(rem.begins_with(
'-') && _is_blck_token(rem))
2101 _c4dbgp(
"parent was indentless seq");
2107 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2109 _c4err(
"parse error: incorrect indentation?");
2111 _handle_indentation_pop(popto);
2116template<
class EventHandler>
2117void ParseEngine<EventHandler>::_check_valid_newline_in_quoted_scalar()
2121 _c4err(
"multiline quoted keys are invalid");
2125 const size_t minindent = m_evt_handler->m_curr->indref + ((has_any(
RMAP|
RSEQ) && has_any(
RBLCK)));
2126 _c4dbgpf(
"indent={} vs minindent={} indref={}", m_evt_handler->m_curr->line_contents.indentation, minindent, m_evt_handler->m_curr->indref);
2127 if(m_evt_handler->m_curr->line_contents.indentation < minindent)
2129 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks,
2130 m_evt_handler->m_curr->line_contents.indentation == m_evt_handler->m_curr->line_contents.rem.first_not_of(
' '),
2131 m_evt_handler->m_curr->pos);
2132 csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.
sub(m_evt_handler->m_curr->line_contents.indentation);
2133 _c4dbgpf(
"trimmed.len={} line={}", trimmed.len, _prs(m_evt_handler->m_curr->line_contents.rem,
true));
2134 if(C4_UNLIKELY(!!trimmed.len))
2136 _c4err(
"bad indentation");
2144template<
class EventHandler>
2145typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
2150 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with(
'\''), m_evt_handler->m_curr->pos);
2153 substr s = _buf().
sub(m_evt_handler->m_curr->pos.offset + 1);
2154 _line_progressed(1);
2155 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
2157 bool needs_filter =
false;
2159 while( ! _finished_file())
2161 const csubstr line = m_evt_handler->m_curr->line_contents.rem;
2162 _c4dbgpf(
"scanning single quoted scalar @ line[{}]: {}", m_evt_handler->m_curr->pos.line, _prs(line));
2163 if(C4_UNLIKELY(m_evt_handler->m_curr->at_line_beginning() && _is_doc_token(line)))
2164 _c4err(
"token can not appear at line begin");
2165 for(
size_t i = 0; i < line.len; ++i)
2167 const char curr = line.str[i];
2170 const char next = i+1 < line.len ? line.str[i+1] :
'~';
2173 _line_progressed(i + 1);
2174 pos = i + (size_t)(line.str - s.str);
2179 needs_filter =
true;
2185 needs_filter =
true;
2186 _line_progressed(line.len);
2189 _check_valid_newline_in_quoted_scalar();
2192 _c4err(
"reached end of file while looking for closing quote");
2196 _c4dbgpf(
"found closing quote at: {}", pos);
2197 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos !=
npos, m_evt_handler->m_curr->pos);
2198 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2199 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2200 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() ==
'\'', m_evt_handler->m_curr->pos);
2201 _set_first_strict(s, pos);
2203 _c4prscalar(
"scanned squoted scalar", s,
true);
2205 return ScannedScalar { s, needs_filter };
2210template<
class EventHandler>
2211typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
2216 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with(
'"'), m_evt_handler->m_curr->pos);
2219 substr s = _buf().
sub(m_evt_handler->m_curr->pos.offset + 1);
2220 _line_progressed(1);
2221 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
2223 bool needs_filter =
false;
2225 while( ! _finished_file())
2227 #if defined(__GNUC__) && (__GNUC__ == 13)
2228 C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem);
2230 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2231 _c4dbgpf(
"scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, rem);
2232 if(C4_UNLIKELY(m_evt_handler->m_curr->at_line_beginning() && _is_doc_token(rem)))
2233 _c4err(
"token can not appear at line begin");
2234 for(
size_t i = 0; i < rem.len; ++i)
2236 const char curr = rem.str[i];
2240 const char next = i+1 < rem.len ? rem.str[i+1] :
'~';
2241 needs_filter =
true;
2242 if(next ==
'"' || next ==
'\\')
2245 else if(curr ==
'"')
2247 _line_progressed(i + 1);
2248 pos = i + (size_t)(rem.str - s.str);
2254 needs_filter =
true;
2255 _line_progressed(rem.len);
2258 _check_valid_newline_in_quoted_scalar();
2261 _c4err(
"reached end of file while looking for closing quote");
2265 _c4dbgpf(
"found closing quote at: {}", pos);
2266 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos !=
npos, m_evt_handler->m_curr->pos);
2267 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2268 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2269 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() ==
'"', m_evt_handler->m_curr->pos);
2270 _set_first_strict(s, pos);
2272 _c4prscalar(
"scanned dquoted scalar", s,
true);
2274 return ScannedScalar{s, needs_filter};
2279template<
class EventHandler>
2280void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb,
size_t indref)
2282 _c4dbgpf(
"blck: indref={}", indref);
2283 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, indref !=
npos, m_evt_handler->m_curr->pos);
2286 csubstr s = m_evt_handler->m_curr->line_contents.rem;
2287 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(
'|') || s.begins_with(
'>'), m_evt_handler->m_curr->pos);
2289 _c4dbgpf(
"blck: specs={}", _prs(s));
2292 BlockChomp_e chomp = CHOMP_CLIP;
2293 size_t indentation =
npos;
2296 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(
"|>"), m_evt_handler->m_curr->pos);
2298 _c4dbgpf(
"blck: spec is multichar: {}", _prs(t));
2299 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.len >= 1, m_evt_handler->m_curr->pos);
2300 size_t pos = t.first_of(
"-+");
2301 _c4dbgpf(
"blck: spec chomp char: pos={}", pos);
2304 _c4dbgpf(
"blck: spec chomp char: {}", _c4prc(t[pos]));
2307 _c4dbgp(
"blck: chomp=STRIP");
2308 chomp = CHOMP_STRIP;
2310 else if(t[pos] ==
'+')
2312 _c4dbgp(
"blck: chomp=KEEP");
2319 _c4dbgpf(
"blck: spec is now: {}", _prs(t));
2322 pos = t.first_not_of(
"0123456789");
2326 _c4dbgpf(
"blck: parse indentation digits: {}", _prs(rest));
2327 if(C4_UNLIKELY(rest.len > 1))
2328 _c4err(
"parse error: invalid indentation");
2329 if(C4_UNLIKELY( !
c4::atou(rest, &indentation)))
2330 _c4err(
"parse error: could not read indentation as decimal");
2331 if(C4_UNLIKELY( ! indentation))
2332 _c4err(
"parse error: null indentation");
2333 _c4dbgpf(
"blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2334 indentation += m_evt_handler->m_curr->indref;
2338 rest = t.triml(
" \t");
2339 _c4dbgpf(
"blck: digits empty. t={} trimmed={} iscomm={} t.iscomm={}", _prs(t), _prs(rest), rest.begins_with(
'#'), t.begins_with(
'#'));
2340 if(C4_UNLIKELY(rest.len && (rest.str[0] !=
'#' || t.str[0] ==
'#')))
2341 _c4err(
"parse error: invalid token");
2345 _c4dbgpf(
"blck: style={} chomp={} indentation={}", s.begins_with(
'>') ?
"fold" :
"literal", chomp==CHOMP_CLIP ?
"clip" : (chomp==CHOMP_STRIP ?
"strip" :
"keep"), indentation);
2348 _line_progressed(s.len);
2353 substr raw_block(_buf().data() + m_evt_handler->m_curr->pos.offset,
size_t(0));
2354 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.str, m_evt_handler->m_curr->pos);
2362 size_t num_lines = 0;
2363 size_t first = m_evt_handler->m_curr->pos.line;
2364 size_t provisional_indentation =
npos;
2366 while(( ! _finished_file()))
2369 lc.reset_with_next_line(_buf(), m_evt_handler->m_curr->pos.offset);
2370 #if defined(__GNUC__) && (__GNUC__ == 12 || __GNUC__ == 13)
2371 C4_DONT_OPTIMIZE(lc.rem);
2373 _c4dbgpf(
"blck: peeking at {}", _prs(lc.rem.trimr(
"\r\n"),
true));
2375 if(indentation !=
npos)
2377 _c4dbgpf(
"blck: indentation={}", indentation);
2379 if(lc.indentation < indentation && ( ! lc.rem.trim(
" \t").empty()))
2383 _c4dbgpf(
"blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2387 _c4err(
"indentation decreased without any scalar");
2391 else if(indentation == 0)
2393 _c4dbgpf(
"blck: noindent. lc.rem={}", _prs(lc.rem));
2394 if(_is_doc_token(lc.rem))
2396 _c4dbgp(
"blck: stop. indentation=0 and doc ended");
2403 const size_t fns = lc.rem.first_not_of(
' ');
2404 _c4dbgpf(
"blck: indentation ref not set. firstnonws={}", fns);
2407 _c4dbgpf(
"blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2408 if(C4_UNLIKELY(lc.full.begins_with(
'\t')))
2410 if(provisional_indentation ==
npos)
2412 if(lc.indentation < indref)
2414 _c4dbgpf(
"blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2415 if(raw_block.len == 0)
2417 _c4dbgp(
"blck: was empty, undo next line");
2422 else if(lc.indentation == m_evt_handler->m_curr->indref)
2426 _c4dbgpf(
"blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2430 _c4dbgpf(
"blck: set indentation ref from this line: ref={}", lc.indentation);
2431 indentation = lc.indentation;
2435 if(lc.indentation >= provisional_indentation)
2437 _c4dbgpf(
"blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2439 indentation = lc.indentation;
2443 if(lc.indentation >= indref)
2444 _c4err(
"parse error: first non-empty block line should have at least the original indentation");
2445 _c4dbgp(
"blck: finished");
2452 _c4dbgpf(
"blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.rem.len, lc.indentation, provisional_indentation);
2453 if(provisional_indentation !=
npos)
2455 if(lc.rem.len >= provisional_indentation)
2457 _c4dbgpf(
"blck: increase provisional_ref {} -> {}", provisional_indentation, lc.rem.len);
2458 provisional_indentation = lc.rem.len;
2463 provisional_indentation = lc.indentation ? lc.indentation : has_any(
RSEQ|
RVAL);
2464 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2465 if(provisional_indentation ==
npos)
2467 provisional_indentation = lc.rem.len ? lc.rem.len : has_any(
RSEQ|
RVAL);
2468 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2470 if(provisional_indentation < indref)
2472 provisional_indentation = indref;
2473 _c4dbgpf(
"blck: initialize provisional_ref={}", provisional_indentation);
2479 m_evt_handler->m_curr->line_contents = lc;
2480 _c4dbgpf(
"blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2481 raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2482 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2486 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0), m_evt_handler->m_curr->pos);
2487 C4_UNUSED(num_lines);
2490 if(indentation ==
npos)
2492 _c4dbgpf(
"blck: set indentation from provisional: {}", provisional_indentation);
2493 indentation = provisional_indentation;
2499 _c4prscalar(
"scanned block", raw_block,
true);
2501 sb->scalar = raw_block;
2502 sb->indentation = indentation;
2514#define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2516#define _c4dbgfws(...)
2519template<
class EventHandler>
2520template<
class FilterProcessor>
2523 _c4dbgfws(
"found whitespace '{}'", _c4prc(proc.curr()));
2524 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
' ' || proc.curr() ==
'\t', m_evt_handler->m_curr->pos);
2526 const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(
" \t", proc.rpos) : proc.src.first_not_of(
' ', proc.rpos);
2527 if(first_pos !=
npos)
2529 const char first_char = proc.src[first_pos];
2530 _c4dbgfws(
"firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2531 if(first_char ==
'\n' || first_char ==
'\r')
2533 _c4dbgfws(
"whitespace is trailing on line",
"");
2534 proc.skip(first_pos - proc.rpos);
2539 _c4dbgfws(
"legit whitespace. sofar={}", _prs(proc.sofar()));
2543 _c4dbgfws(
"whitespace is trailing on line",
"");
2547template<
class EventHandler>
2548template<
class FilterProcessor>
2551 if(!_filter_ws_handle_to_first_non_space(proc))
2553 _c4dbgfws(
"... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2554 proc.copy(proc.src.len - proc.rpos);
2558template<
class EventHandler>
2559template<
class FilterProcessor>
2562 if(!_filter_ws_handle_to_first_non_space(proc))
2564 _c4dbgfws(
"... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2565 proc.skip(proc.src.len - proc.rpos);
2579#define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2581#define _c4dbgfps(fmt, ...)
2584template<
class EventHandler>
2585template<
class FilterProcessor>
2588 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
2590 _c4dbgfps(
"found newline. sofar={}", _prs(proc.sofar()));
2591 size_t ii = proc.rpos;
2592 const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2595 proc.set(
'\n', numnl_following);
2596 _c4dbgfps(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2600 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2604 _c4dbgfps(
"single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2608 _c4dbgfps(
"last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2615template<
class EventHandler>
2616template<
class FilterProcessor>
2619 _RYML_ASSERT_PARSE_(this->callbacks(), indentation !=
npos, m_evt_handler->m_curr->pos);
2620 _c4dbgfps(
"before={}", _prs(proc.src));
2622 while(proc.has_more_chars())
2624 const char curr = proc.curr();
2625 _c4dbgfps(
"'{}', sofar={}", _c4prc(curr), _prs(proc.sofar()));
2630 _c4dbgfps(
"whitespace", curr);
2631 _filter_ws_skip_trailing(proc);
2634 _c4dbgfps(
"newline", curr);
2635 _filter_nl_plain(proc, indentation);
2638 _c4dbgfps(
"carriage return, ignore", curr);
2647 _c4dbgfps(
"after={}", _prs(proc.sofar()));
2649 return proc.result();
2655template<
class EventHandler>
2659 return _filter_plain(proc, indentation);
2662template<
class EventHandler>
2666 return _filter_plain(proc, indentation);
2677#define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2679#define _c4dbgfsq(fmt, ...)
2682template<
class EventHandler>
2683template<
class FilterProcessor>
2686 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
2688 _c4dbgfsq(
"found newline. sofar={}", _prs(proc.sofar()));
2689 size_t ii = proc.rpos;
2690 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2693 proc.set(
'\n', numnl_following);
2694 _c4dbgfsq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2698 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2702 _c4dbgfsq(
"single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2707 _c4dbgfsq(
"single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2713template<
class EventHandler>
2714template<
class FilterProcessor>
2717 _c4dbgfsq(
"before={}", _prs(proc.src));
2721 while(proc.has_more_chars())
2723 const char curr = proc.curr();
2724 _c4dbgfsq(
"'{}', sofar={}", _c4prc(curr), _prs(proc.sofar()));
2729 _c4dbgfsq(
"whitespace", curr);
2730 _filter_ws_copy_trailing(proc);
2733 _c4dbgfsq(
"newline", curr);
2734 _filter_nl_squoted(proc);
2737 _c4dbgfsq(
"skip cr", curr);
2741 _c4dbgfsq(
"squote", curr);
2742 if(proc.next() ==
'\'')
2744 _c4dbgfsq(
"two consecutive squotes", curr);
2759 _c4dbgfsq(
": #filteredchars={} after={}", proc.src.len-proc.sofar().len, _prs(proc.sofar()));
2761 return proc.result();
2766template<
class EventHandler>
2770 return _filter_squoted(proc);
2773template<
class EventHandler>
2777 return _filter_squoted(proc);
2788#define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2790#define _c4dbgfdq(...)
2793template<
class EventHandler>
2794template<
class FilterProcessor>
2797 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
2799 _c4dbgfdq(
"found newline. sofar={}", _prs(proc.sofar()));
2800 size_t ii = proc.rpos;
2801 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2804 proc.set(
'\n', numnl_following);
2805 _c4dbgfdq(
"{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ?
"in the middle" :
"at the end", proc.rpos-ii);
2809 const size_t ret = proc.src.first_not_of(
" \t", proc.rpos+1);
2813 _c4dbgfdq(
"single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2818 _c4dbgfdq(
"single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2820 if(ii < proc.src.len && proc.src.str[ii] ==
'\\')
2822 _c4dbgfdq(
"backslash at [{}]", ii);
2823 const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] :
'\0';
2824 if(next ==
' ' || next ==
'\t')
2826 _c4dbgfdq(
"extend skip to backslash",
"");
2834template<
class EventHandler>
2835template<
class FilterProcessor>
2838 const size_t szp1 = sz + 1u;
2839 if(C4_UNLIKELY(proc.rpos + szp1 >= proc.src.len))
2840 _c4err(
"codepoint requires {} hex digits. scalar pos={}", sz, proc.rpos);
2842 csubstr codepoint = proc.src.
sub(proc.rpos + 2u, sz);
2843 _c4dbgfdq(
"utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2844 uint32_t codepoint_val = {};
2845 if(C4_UNLIKELY(!
read_hex(codepoint, &codepoint_val)))
2846 _c4err(
"failed to parse codepoint. scalar pos={}", proc.rpos);
2847 const size_t numbytes =
decode_code_point((uint8_t*)readbuf,
sizeof(readbuf), codepoint_val);
2848 if(C4_UNLIKELY(numbytes == 0))
2849 _c4err(
"failed to decode code point={}", proc.rpos);
2850 _RYML_ASSERT_PARSE_(callbacks(), numbytes <= 4, m_evt_handler->m_curr->pos);
2851 proc.translate_esc_bulk(readbuf, numbytes, szp1);
2852 _c4dbgfdq(
"utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2855template<
class EventHandler>
2856template<
class FilterProcessor>
2859 char next = proc.next();
2860 _c4dbgfdq(
"backslash, next='{}'", _c4prc(next));
2863 if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] ==
'\n')
2867 _c4dbgfdq(
"[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2873 size_t ii = proc.rpos + 2;
2874 for( ; ii < proc.src.len; ++ii)
2877 if(proc.src.str[ii] ==
' ' || proc.src.str[ii] ==
'\t')
2882 proc.skip(ii - proc.rpos);
2884 else if(next ==
'"' || next ==
'/' || next ==
' ' || next ==
'\t')
2887 proc.translate_esc(next);
2888 _c4dbgfdq(
"here, used '{}'", _c4prc(next));
2890 else if(next ==
'\r')
2894 else if(next ==
'n')
2896 proc.translate_esc(
'\n');
2898 else if(next ==
'r')
2900 proc.translate_esc(
'\r');
2902 else if(next ==
't')
2904 proc.translate_esc(
'\t');
2906 else if(next ==
'\\')
2908 proc.translate_esc(
'\\');
2910 else if(next ==
'x')
2912 _filter_dquoted_backslash_decode(proc, 2u);
2914 else if(next ==
'u')
2916 _filter_dquoted_backslash_decode(proc, 4u);
2918 else if(next ==
'U')
2920 _filter_dquoted_backslash_decode(proc, 8u);
2923 else if(next ==
'0')
2925 proc.translate_esc(
'\0');
2927 else if(next ==
'b')
2929 proc.translate_esc(
'\b');
2931 else if(next ==
'f')
2933 proc.translate_esc(
'\f');
2935 else if(next ==
'a')
2937 proc.translate_esc(
'\a');
2939 else if(next ==
'v')
2941 proc.translate_esc(
'\v');
2943 else if(next ==
'e')
2945 proc.translate_esc(
'\x1b');
2947 else if(next ==
'_')
2950 const char payload[] = {
2951 _RYML_CHCONST(-0x3e, 0xc2),
2952 _RYML_CHCONST(-0x60, 0xa0),
2954 proc.translate_esc_bulk(payload, 2, 1);
2956 else if(next ==
'N')
2959 const char payload[] = {
2960 _RYML_CHCONST(-0x3e, 0xc2),
2961 _RYML_CHCONST(-0x7b, 0x85),
2963 proc.translate_esc_bulk(payload, 2, 1);
2965 else if(next ==
'L')
2968 const char payload[] = {
2969 _RYML_CHCONST(-0x1e, 0xe2),
2970 _RYML_CHCONST(-0x80, 0x80),
2971 _RYML_CHCONST(-0x58, 0xa8),
2973 proc.translate_esc_extending(payload, 3, 1);
2975 else if(next ==
'P')
2978 const char payload[] = {
2979 _RYML_CHCONST(-0x1e, 0xe2),
2980 _RYML_CHCONST(-0x80, 0x80),
2981 _RYML_CHCONST(-0x57, 0xa9),
2983 proc.translate_esc_extending(payload, 3, 1);
2985 else if(next ==
'\0')
2991 _c4err(
"unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2993 _c4dbgfdq(
"backslash...sofar={}", _prs(proc.sofar()));
2997template<
class EventHandler>
2998template<
class FilterProcessor>
3001 _c4dbgfdq(
"before={}", _prs(proc.src));
3004 while(proc.has_more_chars())
3006 const char curr = proc.curr();
3007 _c4dbgfdq(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3013 _c4dbgfdq(
"whitespace", curr);
3014 _filter_ws_copy_trailing(proc);
3019 _c4dbgfdq(
"newline", curr);
3020 _filter_nl_dquoted(proc);
3025 _c4dbgfdq(
"carriage return, ignore", curr);
3031 _filter_dquoted_backslash(proc);
3041 _c4dbgfdq(
"after={}", _prs(proc.sofar()));
3042 return proc.result();
3048template<
class EventHandler>
3052 return _filter_dquoted(proc);
3055template<
class EventHandler>
3059 return _filter_dquoted(proc);
3068C4_NO_INLINE
inline size_t _find_last_newline_and_larger_indentation(
csubstr s,
size_t indentation)
noexcept
3070 if(indentation + 1 > s.len)
3072 for(
size_t i = s.len-indentation-1; i !=
size_t(-1); --i)
3074 if(s.str[i] ==
'\n')
3078 first = (first !=
npos) ? first : rem.len;
3079 if(first > indentation)
3086template<
class EventHandler>
3087template<
class FilterProcessor>
3090 _RYML_ASSERT_PARSE_(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP, m_evt_handler->m_curr->pos);
3091 _RYML_ASSERT_PARSE_(this->callbacks(), proc.rem().first_not_of(
" \n\r") ==
npos, m_evt_handler->m_curr->pos);
3095 #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3097 #define _c4dbgchomp(...)
3102 size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
3105 _c4dbgchomp(
"found newline and larger indentation. last={}", last);
3106 last = proc.rpos + last + size_t(1) + indentation;
3107 _RYML_ASSERT_PARSE_(this->callbacks(), last <= proc.src.len, m_evt_handler->m_curr->pos);
3109 while((proc.rpos < last) && proc.has_more_chars())
3111 const char curr = proc.curr();
3112 _c4dbgchomp(
"curr='{}'", _c4prc(curr));
3117 _c4dbgchomp(
"newline! remlen={}", proc.rem().len);
3120 csubstr at_next_line = proc.rem();
3121 if(at_next_line.begins_with(
' '))
3123 _c4dbgchomp(
"next line begins with spaces. indentation={}", indentation);
3125 size_t first_non_space = at_next_line.first_not_of(
' ');
3126 _c4dbgchomp(
"first_non_space={}", first_non_space);
3127 if(first_non_space ==
npos)
3129 _c4dbgchomp(
"{} spaces, to the end", at_next_line.len);
3130 first_non_space = at_next_line.len;
3132 if(first_non_space <= indentation)
3134 _c4dbgchomp(
"skip spaces={}<=indentation={}", first_non_space, indentation);
3135 proc.skip(first_non_space);
3139 _c4dbgchomp(
"skip indentation={}<spaces={}", indentation, first_non_space);
3140 proc.skip(indentation);
3142 _c4dbgchomp(
"copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
3143 proc.copy(first_non_space - indentation);
3161 bool had_one =
false;
3162 while(proc.has_more_chars())
3164 const char curr = proc.curr();
3165 _c4dbgchomp(
"CLIP: '{}'", _c4prc(curr));
3170 _c4dbgchomp(
"copy newline!", curr);
3178 _c4dbgchomp(
"skip!", curr);
3185 _c4dbgchomp(
"chomp=CLIP: add missing newline @{}", proc.wpos);
3192 _c4dbgchomp(
"chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
3193 while(proc.has_more_chars())
3195 const char curr = proc.curr();
3196 _c4dbgchomp(
"KEEP: '{}'", _c4prc(curr));
3200 _c4dbgchomp(
"copy newline!", curr);
3205 _c4dbgchomp(
"skip!", curr);
3214 _c4dbgchomp(
"chomp=STRIP: strip {} characters", proc.rem().len);
3226#define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3228#define _c4dbgfb(...)
3231template<
class EventHandler>
3232template<
class FilterProcessor>
3238 size_t first = rem.first_not_of(
' ');
3241 _c4dbgfb(
"{} spaces follow before next nonws character", first);
3242 if(first < indentation)
3244 _c4dbgfb(
"skip {}<{} spaces from indentation", first, indentation);
3249 _c4dbgfb(
"skip {} spaces from indentation", indentation);
3250 proc.skip(indentation);
3253 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
3256 _c4dbgfb(
"all spaces to the end: {} spaces", first);
3260 if(first < indentation)
3262 _c4dbgfb(
"skip everything", first);
3263 proc.skip(proc.src.len - proc.rpos);
3267 _c4dbgfb(
"skip {} spaces from indentation", indentation);
3268 proc.skip(indentation);
3276template<
class EventHandler>
3277template<
class FilterProcessor>
3281 _c4dbgfb(
"ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
3284 _c4dbgfb(
"ws: all whitespace: len={}", proc.src.len);
3285 if(chomp == CHOMP_KEEP && proc.src.len)
3287 _c4dbgfb(
"ws: chomp=KEEP all {} newlines", proc.src.count(
'\n'));
3288 while(proc.has_more_chars())
3290 const char curr = proc.curr();
3302 return contents.len;
3305template<
class EventHandler>
3306template<
class FilterProcessor>
3309 _c4dbgfb(
"contents_len={}", contents_len);
3311 _RYML_ASSERT_PARSE_(this->callbacks(), contents_len > 0u, m_evt_handler->m_curr->pos);
3315 size_t firstnewl = proc.src.first_of(
'\n', contents_len);
3316 if(firstnewl !=
npos)
3318 contents_len = firstnewl;
3319 _c4dbgfb(
"contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3323 contents_len = proc.src.len;
3324 _c4dbgfb(
"contents_len={} <--- src.len={}", contents_len, proc.src.len);
3327 return contents_len;
3339#define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3341#define _c4dbgfbl(...)
3344template<
class EventHandler>
3345template<
class FilterProcessor>
3348 _c4dbgfbl(
"indentation={} before={}", indentation, _prs(proc.src));
3350 size_t contents_len = _handle_all_whitespace(proc, chomp);
3352 return proc.result();
3354 contents_len = _extend_to_chomp(proc, contents_len);
3356 _c4dbgfbl(
"to filter={}", _prs(proc.src.first(contents_len)));
3358 _filter_block_indentation(proc, indentation);
3361 while(proc.has_more_chars(contents_len))
3363 const char curr = proc.curr();
3364 _c4dbgfbl(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3369 _c4dbgfbl(
"found newline. skip indentation on the next line", curr);
3371 _filter_block_indentation(proc, indentation);
3383 _c4dbgfbl(
"before chomp: #tochomp={} sofar={}", proc.rem().len, _prs(proc.sofar()));
3385 _filter_chomp(proc, chomp, indentation);
3387 _c4dbgfbl(
"final={}", _prs(proc.sofar()));
3389 return proc.result();
3394template<
class EventHandler>
3398 return _filter_block_literal(proc, indentation, chomp);
3401template<
class EventHandler>
3405 return _filter_block_literal(proc, indentation, chomp);
3415#define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3417#define _c4dbgfbf(...)
3421template<
class EventHandler>
3422template<
class FilterProcessor>
3425 _filter_block_indentation(proc, indentation);
3426 while(proc.has_more_chars(len))
3428 const char curr = proc.curr();
3429 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3433 _c4dbgfbf(
"newline.", curr);
3435 _filter_block_indentation(proc, indentation);
3443 size_t first = proc.rem().first_not_of(
" \t");
3444 _c4dbgfbf(
"space. first={}", first);
3446 first = proc.rem().len;
3447 _c4dbgfbf(
"... indentation increased to {}", first);
3448 _filter_block_folded_indented_block(proc, indentation, len, first);
3452 _c4dbgfbf(
"newl leading: not space, not newline. stop.", 0);
3458template<
class EventHandler>
3459template<
class FilterProcessor>
3465 _c4dbgfbf(
"... this is the first newline. turn into space. wpos={}", proc.wpos);
3466 wpos_at_first_newl = proc.wpos;
3471 _c4dbgfbf(
"... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3472 _RYML_ASSERT_PARSE_(this->callbacks(), wpos_at_first_newl !=
npos, m_evt_handler->m_curr->pos);
3473 _RYML_ASSERT_PARSE_(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
' ', m_evt_handler->m_curr->pos);
3474 _RYML_ASSERT_PARSE_(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos, m_evt_handler->m_curr->pos);
3476 proc.set_at(wpos_at_first_newl,
'\n');
3477 _RYML_ASSERT_PARSE_(this->callbacks(), proc.sofar()[wpos_at_first_newl] ==
'\n', m_evt_handler->m_curr->pos);
3480 _c4dbgfbf(
"... subsequent newline (num_newl={}). copy", num_newl);
3484 return wpos_at_first_newl;
3487template<
class EventHandler>
3488template<
class FilterProcessor>
3491 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() ==
'\n', m_evt_handler->m_curr->pos);
3492 size_t num_newl = 0;
3493 size_t wpos_at_first_newl =
npos;
3494 while(proc.has_more_chars(len))
3496 const char curr = proc.curr();
3497 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3502 _c4dbgfbf(
"newline. sofar={}", num_newl);
3538 wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3539 _filter_block_indentation(proc, indentation);
3545 size_t first = proc.rem().first_not_of(
" \t");
3546 _c4dbgfbf(
"space. first={}", first);
3548 first = proc.rem().len;
3549 _c4dbgfbf(
"... indentation increased to {}", first);
3552 _c4dbgfbf(
"... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3553 proc.set_at(wpos_at_first_newl,
'\n');
3557 _c4dbgfbf(
"... add missing newline", wpos_at_first_newl);
3560 _filter_block_folded_indented_block(proc, indentation, len, first);
3562 wpos_at_first_newl =
npos;
3569 _c4dbgfbf(
"not space, not newline. stop.", 0);
3576template<
class EventHandler>
3577template<
class FilterProcessor>
3580 _RYML_ASSERT_PARSE_(this->callbacks(), (proc.rem().first_not_of(
" \t") == curr_indentation) || (proc.rem().first_not_of(
" \t") ==
npos), m_evt_handler->m_curr->pos);
3581 if(curr_indentation)
3582 proc.copy(curr_indentation);
3583 while(proc.has_more_chars(len))
3585 const char curr = proc.curr();
3586 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3592 _filter_block_indentation(proc, indentation);
3595 _c4dbgfbf(
"newline. firstns={}", first);
3598 const char c = rem[first];
3599 _c4dbgfbf(
"firstns={}='{}'", first, _c4prc(c));
3600 if(c !=
'\n' && c !=
'\r')
3602 _c4dbgfbf(
"done with indented block", first);
3606 else if(first !=
npos)
3609 _c4dbgfbf(
"copy all {} spaces", first);
3627template<
class EventHandler>
3628template<
class FilterProcessor>
3631 _c4dbgfbf(
"indentation={} before={}", indentation, _prs(proc.src));
3633 size_t contents_len = _handle_all_whitespace(proc, chomp);
3635 return proc.result();
3637 contents_len = _extend_to_chomp(proc, contents_len);
3639 _c4dbgfbf(
"to filter={}", _prs(proc.src.first(contents_len)));
3641 _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3644 while(proc.has_more_chars(contents_len))
3646 const char curr = proc.curr();
3647 _c4dbgfbf(
"'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3652 _c4dbgfbf(
"found newline", curr);
3653 _filter_block_folded_newlines(proc, indentation, contents_len);
3665 _c4dbgfbf(
"before chomp: #tochomp={} sofar={}", proc.rem().len, _prs(proc.sofar()));
3667 _filter_chomp(proc, chomp, indentation);
3669 _c4dbgfbf(
"final={}", proc.sofar().len, _prs(proc.sofar()));
3671 return proc.result();
3676template<
class EventHandler>
3680 return _filter_block_folded(proc, indentation, chomp);
3683template<
class EventHandler>
3687 return _filter_block_folded(proc, indentation, chomp);
3695template<
class EventHandler>
3698 _c4dbgpf(
"filtering plain scalar: s={}", _prs(s));
3699 FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3700 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.valid(), m_evt_handler->m_curr->pos);
3701 _c4dbgpf(
"filtering plain scalar: success! s={}", _prs(r.get()));
3707template<
class EventHandler>
3710 _c4dbgpf(
"filtering squo scalar: s={}", _prs(s));
3711 FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3712 _RYML_ASSERT_PARSE_(this->callbacks(), r.valid(), m_evt_handler->m_curr->pos);
3713 _c4dbgpf(
"filtering squo scalar: success! s={}", _prs(r.get()));
3720template<
class EventHandler>
3723 _c4dbgpf(
"filtering dquo scalar: s={}", _prs(s));
3725 if(C4_LIKELY(r.valid()))
3727 _c4dbgpf(
"filtering dquo scalar: success! s={}", _prs(r.get()));
3732 const size_t len = r.required_len();
3733 _c4dbgpf(
"filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3734 substr dst = _alloc_arena(len, &s);
3735 _c4dbgpf(
"filtering dquo scalar: dst.len={}", dst.len);
3738 _RYML_ASSERT_PARSE_(this->callbacks(), dst.len == len, m_evt_handler->m_curr->pos);
3739 FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3740 _c4dbgpf(
"filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3741 _RYML_ASSERT_PARSE_(this->callbacks(), rsd.required_len() <= len, m_evt_handler->m_curr->pos);
3742 _RYML_CHECK_PARSE_(m_evt_handler->m_stack.m_callbacks, rsd.valid(), m_evt_handler->m_curr->pos);
3743 _c4dbgpf(
"filtering dquo scalar: success! s={}", _prs(rsd.get()));
3753template<
class EventHandler>
3756 if(s.is_sub(_buf()))
3758 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.str > _buf().str, m_evt_handler->m_curr->pos);
3759 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.str-1 >= _buf().str, m_evt_handler->m_curr->pos);
3761 memmove(s.str - 1, s.str, s.len);
3763 s.str[s.len] =
'\n';
3769 substr dst = _alloc_arena(s.len + 1, &s);
3771 memcpy(dst.str, s.str, s.len);
3777template<
class EventHandler>
3780 _c4dbgpf(
"filtering block literal scalar: s={}", _prs(s));
3781 FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3783 if(C4_LIKELY(r.valid()))
3789 _c4dbgpf(
"filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3790 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3793 result = _move_scalar_left_and_add_newline(s);
3795 _c4dbgpf(
"filtering block literal scalar: success! s={}", _prs(result));
3801template<
class EventHandler>
3804 _c4dbgpf(
"filtering block folded scalar: s={}", _prs(s));
3805 FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3807 if(C4_LIKELY(r.valid()))
3813 _c4dbgpf(
"filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3814 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3817 result = _move_scalar_left_and_add_newline(s);
3819 _c4dbgpf(
"filtering block folded scalar: success! s={}", _prs(result));
3826template<
class EventHandler>
3831 if(m_options.scalar_filtering())
3833 return _filter_scalar_plain(sc.scalar, indentation);
3837 _c4dbgp(
"plain scalar left unfiltered");
3838 m_evt_handler->mark_key_scalar_unfiltered();
3843 _c4dbgp(
"plain scalar doesn't need filtering");
3848template<
class EventHandler>
3853 if(m_options.scalar_filtering())
3855 return _filter_scalar_plain(sc.scalar, indentation);
3859 _c4dbgp(
"plain scalar left unfiltered");
3860 m_evt_handler->mark_val_scalar_unfiltered();
3865 _c4dbgp(
"plain scalar doesn't need filtering");
3873template<
class EventHandler>
3878 if(m_options.scalar_filtering())
3880 return _filter_scalar_squot(sc.scalar);
3884 _c4dbgp(
"squo key scalar left unfiltered");
3885 m_evt_handler->mark_key_scalar_unfiltered();
3890 _c4dbgp(
"squo key scalar doesn't need filtering");
3895template<
class EventHandler>
3900 if(m_options.scalar_filtering())
3902 return _filter_scalar_squot(sc.scalar);
3906 _c4dbgp(
"squo val scalar left unfiltered");
3907 m_evt_handler->mark_val_scalar_unfiltered();
3912 _c4dbgp(
"squo val scalar doesn't need filtering");
3920template<
class EventHandler>
3925 if(m_options.scalar_filtering())
3927 return _filter_scalar_dquot(sc.scalar);
3931 _c4dbgp(
"dquo scalar left unfiltered");
3932 m_evt_handler->mark_key_scalar_unfiltered();
3937 _c4dbgp(
"dquo scalar doesn't need filtering");
3942template<
class EventHandler>
3947 if(m_options.scalar_filtering())
3949 return _filter_scalar_dquot(sc.scalar);
3953 _c4dbgp(
"dquo scalar left unfiltered");
3954 m_evt_handler->mark_val_scalar_unfiltered();
3959 _c4dbgp(
"dquo scalar doesn't need filtering");
3967template<
class EventHandler>
3970 if(m_options.scalar_filtering())
3972 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3976 _c4dbgp(
"literal scalar left unfiltered");
3977 m_evt_handler->mark_key_scalar_unfiltered();
3982template<
class EventHandler>
3985 if(m_options.scalar_filtering())
3987 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3991 _c4dbgp(
"literal scalar left unfiltered");
3992 m_evt_handler->mark_val_scalar_unfiltered();
4000template<
class EventHandler>
4003 if(m_options.scalar_filtering())
4005 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
4009 _c4dbgp(
"folded scalar left unfiltered");
4010 m_evt_handler->mark_key_scalar_unfiltered();
4015template<
class EventHandler>
4018 if(m_options.scalar_filtering())
4020 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
4024 _c4dbgp(
"folded scalar left unfiltered");
4025 m_evt_handler->mark_val_scalar_unfiltered();
4037template<
class EventHandler>
4038void ParseEngine<EventHandler>::add_flags(
ParserFlag_t on)
4041 char buf1_[64], buf2_[64], buf3_[64];
4042 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4043 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4044 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
4045 _c4dbgpf(
"state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
4049template<
class EventHandler>
4053 char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
4054 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4055 csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
4056 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
4057 csubstr buf4 = detail::_parser_flags_to_str(buf4_, (~off)&((s->flags|on)));
4058 _c4dbgpf(
"state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
4064template<
class EventHandler>
4065void ParseEngine<EventHandler>::rem_flags(
ParserFlag_t off)
4068 char buf1_[64], buf2_[64], buf3_[64];
4069 csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
4070 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4071 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
4072 _c4dbgpf(
"state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
4079 bool gotone =
false;
4081 #define _prflag(fl) \
4082 if((flags & fl) == (fl)) \
4086 if(pos + 1 < buf.len) \
4090 csubstr fltxt = #fl; \
4091 if(pos + fltxt.len <= buf.len) \
4092 memcpy(buf.str + pos, fltxt.str, fltxt.len); \
4122 _RYML_CHECK_BASIC(pos <= buf.len);
4124 return buf.first(pos);
4134template<
class EventHandler>
4137 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, loc.offset < _buf().len);
4138 return _buf().sub(loc.offset);
4141template<
class EventHandler>
4144 if(C4_UNLIKELY(val ==
nullptr))
4145 return {m_evt_handler->m_curr->pos.name, 0, 0, 0};
4146 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4149 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4150 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
4151 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets !=
nullptr);
4152 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
4155 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, val !=
nullptr || src.str ==
nullptr);
4156 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str ==
nullptr && val ==
nullptr));
4158 using lineptr_type =
size_t const* C4_RESTRICT;
4159 lineptr_type lineptr =
nullptr;
4160 size_t offset = (size_t)(val - src.begin());
4164 for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
4179 size_t count = m_newline_offsets_size;
4180 lineptr = m_newline_offsets;
4183 size_t step = count >> 1;
4184 lineptr_type it = lineptr + step;
4196 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
4197 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
4198 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
4200 loc.
name = m_evt_handler->m_curr->pos.name;
4201 loc.offset = offset;
4202 loc.line = (size_t)(lineptr - m_newline_offsets);
4203 if(lineptr > m_newline_offsets)
4204 loc.col = (offset - *(lineptr-1) - 1u);
4210template<
class EventHandler>
4211void ParseEngine<EventHandler>::_prepare_locations()
4214 size_t numnewlines = 1u + src.count(
'\n');
4215 _resize_locations(numnewlines);
4216 m_newline_offsets_size = 0;
4217 for(
size_t i = 0; i < src.len; i++)
4218 if(src.str[i] ==
'\n')
4219 m_newline_offsets[m_newline_offsets_size++] = i;
4220 m_newline_offsets[m_newline_offsets_size++] = src.len;
4221 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4224template<
class EventHandler>
4225void ParseEngine<EventHandler>::_resize_locations(
size_t numnewlines)
4227 numnewlines = numnewlines >= 16 ? numnewlines : 16;
4228 if(numnewlines > m_newline_offsets_capacity)
4230 if(m_newline_offsets)
4231 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets,
size_t, m_newline_offsets_capacity);
4232 m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks,
size_t, numnewlines, m_newline_offsets);
4233 m_newline_offsets_capacity = numnewlines;
4237template<
class EventHandler>
4238bool ParseEngine<EventHandler>::_locations_dirty()
const
4240 return !m_newline_offsets_size;
4248template<
class EventHandler>
4249void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4252 if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4254 if(m_evt_handler->m_curr->line_contents.rem.str[0] ==
' ' || m_evt_handler->m_curr->line_contents.rem.str[0] ==
'\t')
4256 _c4dbgpf(
"starts with whitespace: '{}'", _c4prc(m_evt_handler->m_curr->line_contents.rem.str[0]));
4260 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
4262 _c4dbgpf(
"it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4263 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4269template<
class EventHandler>
4270void ParseEngine<EventHandler>::_handle_flow_line_beginning()
4272 _c4dbgpf(
"flow: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
4273 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
4274 if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
4276 csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.
sub(m_evt_handler->m_curr->line_contents.indentation);
4277 _c4dbgpf(
"flow: after indentation={}", _prs(trimmed));
4278 if(trimmed.len && trimmed.triml(
" \t").len)
4280 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
4281 _c4err(
"bad indentation");
4286template<
class EventHandler>
4287size_t ParseEngine<EventHandler>::_handle_block_skip_leading_whitespace()
4289 const size_t mark = m_evt_handler->m_curr->pos.offset;
4290 const size_t firstpos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
" \t");
4291 _c4dbgpf(
"block: mark={} firstpos={}", mark, firstpos);
4292 if(firstpos !=
npos)
4294 _c4dbgp(
"block: non empty line");
4295 _line_progressed(firstpos);
4300 _c4dbgp(
"block: rest of line is whitespace");
4301 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4306template<
class EventHandler>
4307void ParseEngine<EventHandler>::_handle_block_check_leading_tabs(
size_t start_mark,
size_t end_mark)
4309 _c4dbgpf(
"block: start_mark={} end_mark={}", start_mark, end_mark);
4310 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, end_mark >= start_mark, m_evt_handler->m_curr->pos);
4311 if(end_mark != start_mark)
4314 _c4dbgpf(
"block: leading[{}-{}]={}", start_mark, end_mark, _prs(leading,
true));
4315 size_t pos = leading.find(
'\t');
4318 size_t fno = leading.first_not_of(
" \t");
4319 if(fno ==
npos || pos < fno)
4320 _c4err(
"invalid tab character to the left");
4330template<
class EventHandler>
4331void ParseEngine<EventHandler>::_handle_colon()
4333 size_t curr = m_evt_handler->m_curr->pos.line;
4334 if(C4_UNLIKELY(m_prev_colon !=
npos && curr == m_prev_colon))
4336 _c4dbgpf(
"colon: prevline={} currline={}", m_prev_colon, curr);
4337 _c4err(
"two colons on same line");
4339 _c4dbgpf(
"colon: set prevline={}->{}", m_prev_colon, curr);
4340 m_prev_colon = curr;
4343template<
class EventHandler>
4344void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst,
csubstr str)
4346 _c4dbgpf(
"store annotation[{}]: {}", dst->num_entries, _prs(str));
4347 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos);
4348 dst->annotations[dst->num_entries].str = str;
4349 dst->annotations[dst->num_entries].indentation = {};
4350 dst->annotations[dst->num_entries].line = {};
4351 dst->annotations[dst->num_entries].orig = {};
4355template<
class EventHandler>
4356void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst,
csubstr str,
size_t indentation,
size_t line)
4358 _c4dbgpf(
"store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, _maybe_null_str(str), indentation, line);
4359 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos);
4360 if(C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line))
4364 dst->annotations[dst->num_entries].str = str;
4365 dst->annotations[dst->num_entries].indentation = indentation;
4366 dst->annotations[dst->num_entries].line = line;
4367 dst->annotations[dst->num_entries].orig = {};
4371template<
class EventHandler>
4372void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst,
csubstr str,
size_t indentation,
size_t line,
csubstr orig)
4374 _c4dbgpf(
"store annotation[{}]: '{}'->'{}' indentation={} line={}", dst->num_entries, orig, _maybe_null_str(str), indentation, line);
4375 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos);
4376 if(C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line))
4380 dst->annotations[dst->num_entries].str = str;
4381 dst->annotations[dst->num_entries].indentation = indentation;
4382 dst->annotations[dst->num_entries].line = line;
4383 dst->annotations[dst->num_entries].orig = orig;
4387template<
class EventHandler>
4388bool ParseEngine<EventHandler>::_annotations_require_key_container()
const
4390 return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4393template<
class EventHandler>
4394bool ParseEngine<EventHandler>::_handle_annotations_before_unexpected_flow_token_rkey()
4396 if(!(m_pending_tags.num_entries | m_pending_anchors.num_entries))
4398 _c4dbgpf(
"handle_annotations_before_unexpected_flow_comma_rkey, node={}", m_evt_handler->m_curr->node_id);
4399 if(m_pending_tags.num_entries)
4401 _c4dbgpf(
"handle_annotations_before_unexpected_flow_comma_rkey, #tags={}", m_pending_tags.num_entries);
4402 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4404 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4405 _clear_annotations(&m_pending_tags);
4412 if(m_pending_anchors.num_entries)
4414 _c4dbgpf(
"handle_annotations_before_unexpected_flow_comma, #anchors={}", m_pending_tags.num_entries);
4415 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4417 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4418 _clear_annotations(&m_pending_anchors);
4422 _c4err(
"too many anchors");
4425 m_evt_handler->set_key_scalar_plain_empty();
4426 m_evt_handler->set_val_scalar_plain_empty();
4430template<
class EventHandler>
4431void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4433 _c4dbgpf(
"annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4434 if(m_pending_tags.num_entries)
4436 _c4dbgpf(
"annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4437 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4439 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4440 _clear_annotations(&m_pending_tags);
4447 if(m_pending_anchors.num_entries)
4449 _c4dbgpf(
"annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4450 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4452 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4453 _clear_annotations(&m_pending_anchors);
4457 _c4err(
"too many anchors");
4462template<
class EventHandler>
4463void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4465 _c4dbgpf(
"annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4466 if(m_pending_tags.num_entries)
4468 _c4dbgpf(
"annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4469 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4471 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4472 _clear_annotations(&m_pending_tags);
4479 if(m_pending_anchors.num_entries)
4481 _c4dbgpf(
"annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4482 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4484 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4485 _clear_annotations(&m_pending_anchors);
4489 _c4err(
"too many anchors");
4494template<
class EventHandler>
4495void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(
size_t current_line)
4497 _c4dbgpf(
"annotations_before_start_mapblck, current_line={}", current_line);
4498 if(m_pending_tags.num_entries == 2)
4500 _c4dbgp(
"2 tags, setting entry 0");
4501 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4503 else if(m_pending_tags.num_entries == 1)
4505 _c4dbgpf(
"1 tag. line={}, curr={}", m_pending_tags.annotations[0].line, current_line);
4506 if(m_pending_tags.annotations[0].line < current_line)
4508 _c4dbgp(
"...tag is for the map. setting it.");
4509 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4510 _clear_annotations(&m_pending_tags);
4514 if(m_pending_anchors.num_entries == 2)
4516 _c4dbgp(
"2 anchors, setting entry 0");
4517 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4519 else if(m_pending_anchors.num_entries == 1)
4521 _c4dbgpf(
"1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line, current_line);
4522 if(m_pending_anchors.annotations[0].line < current_line)
4524 _c4dbgp(
"...anchor is for the map. setting it.");
4525 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4526 _clear_annotations(&m_pending_anchors);
4531template<
class EventHandler>
4532void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4534 _c4dbgp(
"annotations_before_start_mapblck_as_key");
4535 switch(m_pending_tags.num_entries)
4538 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 1 tag={} line={} currline=", _prs(m_pending_tags.annotations[0].str), m_pending_tags.annotations[0].line, m_evt_handler->m_curr->pos.line);
4539 if(m_pending_tags.annotations[0].line != m_evt_handler->m_curr->pos.line)
4541 _c4dbgp(
"annotations_after_start_mapblck_as_key: is map tag");
4542 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4543 _clear_annotations(&m_pending_tags);
4547 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 2 tags: {} -> {}", _prs(m_pending_tags.annotations[0].str), _prs(m_pending_tags.annotations[1].str));
4548 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4551 switch(m_pending_anchors.num_entries)
4554 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 1 anchor={} line={} currline=", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[0].line, m_evt_handler->m_curr->pos.line);
4555 if(m_pending_anchors.annotations[0].line != m_evt_handler->m_curr->pos.line)
4557 _c4dbgp(
"annotations_after_start_mapblck_as_key: is map anchor");
4558 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4559 _clear_annotations(&m_pending_anchors);
4563 _c4dbgpf(
"annotations_after_start_mapblck_as_key: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4564 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4569template<
class EventHandler>
4570void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(
size_t key_indentation,
size_t key_line)
4572 _c4dbgp(
"annotations_after_start_mapblck");
4573 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2, m_evt_handler->m_curr->pos);
4574 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2, m_evt_handler->m_curr->pos);
4575 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4577 key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4578 switch(m_pending_tags.num_entries)
4581 _c4dbgpf(
"annotations_after_start_mapblck: 1 tag: {}", _prs(m_pending_tags.annotations[0].str));
4582 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4583 _clear_annotations(&m_pending_tags);
4586 _c4dbgpf(
"annotations_after_start_mapblck: 2 tags: {} -> {}", _prs(m_pending_tags.annotations[0].str), _prs(m_pending_tags.annotations[1].str));
4587 m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4588 _clear_annotations(&m_pending_tags);
4591 switch(m_pending_anchors.num_entries)
4594 _c4dbgpf(
"annotations_after_start_mapblck: 1 anchors: {} -> {}", m_pending_anchors.annotations[0].str);
4595 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4596 _clear_annotations(&m_pending_anchors);
4599 _c4dbgpf(
"annotations_after_start_mapblck: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4600 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4601 _clear_annotations(&m_pending_anchors);
4605 _set_indentation(key_indentation);
4608template<
class EventHandler>
4609size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(
size_t val_indentation,
size_t val_line)
4611 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries | m_pending_anchors.num_entries, m_evt_handler->m_curr->pos);
4613 auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4614 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4616 auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4617 if(ann.line > curr->line)
4619 else if(ann.indentation < curr->indentation)
4622 for(
size_t j = 0; j < m_pending_tags.num_entries; ++j)
4624 auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4625 if(ann.line > curr->line)
4627 else if(ann.indentation < curr->indentation)
4630 return curr->line < val_line ? val_indentation : curr->indentation;
4633template<
class EventHandler>
4634void ParseEngine<EventHandler>::_handle_keyref(
csubstr alias)
4636 if(C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries)))
4637 m_evt_handler->set_key_ref(alias);
4639 _c4err(
"aliases cannot have anchors or tags");
4642template<
class EventHandler>
4643void ParseEngine<EventHandler>::_handle_valref(
csubstr alias)
4645 if(C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries)))
4646 m_evt_handler->set_val_ref(alias);
4648 _c4err(
"aliases cannot have anchors or tags");
4651template<
class EventHandler>
4654 _c4dbgpf(
"resolving tag: {} curr_doc={}", _prs(tag), m_evt_handler->m_curr_doc);
4659 _c4dbgpf(
"resolving tag: found in cache[{}]: {}", ret.pos, _prs(ret.resolved));
4660 return ret.resolved;
4662 _c4dbgpf(
"resolving tag: not in cache: {} curr_doc={}", _prs(tag), m_evt_handler->m_curr_doc);
4664 substr buf = m_evt_handler->arena_rem();
4665 TagDirectives const& C4_RESTRICT tds = m_evt_handler->tag_directives();
4666 csubstr ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4667 m_evt_handler->m_curr->pos,
4668 m_evt_handler->m_stack.m_callbacks);
4669 _c4dbgpf(
"resolving tag: bufsz={} ttag.len={} !!ttag.str={}", bufsz, ttag.len, !!ttag.str);
4670 _c4assert((bufsz > buf.len) == (!ttag.str));
4671 _c4assert(!!bufsz == (ttag.len == bufsz));
4675 _c4dbgpf(
"tag requires arena, but it was small. arena.len={} arena.slack={} tag.required={}", m_evt_handler->arena_rem().len, m_evt_handler->arena().len, ttag.len);
4677 buf = _alloc_arena(bufsz, &tag);
4680 ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4681 m_evt_handler->m_curr->pos,
4682 m_evt_handler->m_stack.m_callbacks);
4685 _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4689 _c4dbgp(
"tag required arena. update size");
4692 (void)_alloc_arena(bufsz);
4694 C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4127)
4695 if C4_IF_CONSTEXPR (EventHandler::requires_strings_on_buffers)
4697 _c4dbgpf(
"handler requires tags in buffers. !!ttag.str={} in_arena={} in_src={}", !!ttag.str, ttag.is_sub(m_evt_handler->arena()), ttag.is_sub(_buf()));
4699 if(ttag.str && !ttag.is_sub(m_evt_handler->arena()) && !ttag.is_sub(_buf()))
4701 _c4dbgpf(
"copying resolved tag to arena: slack={} required={}", m_evt_handler->arena_rem().len, ttag.len);
4702 buf = _alloc_arena(ttag.len, &tag);
4704 memcpy(buf.str, ttag.str, ttag.len);
4706 _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4709 C4_SUPPRESS_WARNING_MSVC_POP
4710 _c4dbgpf(
"resolved tag: {} --> [{}]~~~{}~~~", _prs(tag), ttag.len, _maybe_null_str(ttag));
4713 m_evt_handler->tag_cache().add(tag, ttag, m_evt_handler->m_curr_doc, ret.pos);
4717template<
class EventHandler>
4718bool ParseEngine<EventHandler>::_validate_directive_yaml(
csubstr *C4_RESTRICT directive,
csubstr *C4_RESTRICT
version)
const
4720 _c4assert(directive->begins_with(
"%YAML"));
4721 size_t version_start = directive->first_not_of(
" \t", 5);
4722 if(version_start !=
npos)
4724 csubstr digits =
"0123456789";
4725 size_t major_end = directive->
first_not_of(digits, version_start);
4726 if(major_end !=
npos && directive->str[major_end] ==
'.')
4728 size_t minor_end = directive->first_not_of(digits, major_end + 1);
4729 if(minor_end ==
npos)
4730 minor_end = directive->len;
4731 _set_first_strict(*directive, minor_end);
4732 *
version = directive->range(version_start, minor_end);
4733 _c4dbgpf(
"%YAML: version={} full={}", *
version, _prs(*directive,
true));
4740template<
class EventHandler>
4741bool ParseEngine<EventHandler>::_validate_directive_tag(
csubstr *C4_RESTRICT directive,
csubstr *C4_RESTRICT handle,
csubstr *C4_RESTRICT prefix)
const
4743 _c4assert(directive->begins_with(
"%TAG"));
4745 size_t handle_start = directive->
first_not_of(whitespace, 4);
4746 if(handle_start !=
npos && directive->str[handle_start] ==
'!')
4748 size_t handle_end = directive->first_of(whitespace, handle_start);
4749 if(handle_end !=
npos)
4751 size_t prefix_start = directive->first_not_of(whitespace, handle_end);
4752 if(prefix_start !=
npos)
4754 size_t prefix_end = directive->first_of(whitespace, prefix_start);
4755 if(prefix_end ==
npos)
4756 prefix_end = directive->len;
4757 _set_first_strict(*directive, prefix_end);
4758 *handle = directive->range(handle_start, handle_end);
4759 *prefix = directive->range(prefix_start, prefix_end);
4760 _c4dbgpf(
"%TAG: handle={} prefix={} full={}", *handle, *prefix, _prs(*directive,
true));
4769template<
class EventHandler>
4770void ParseEngine<EventHandler>::_handle_directive(
csubstr directive)
4772 _c4dbgpf(
"handle_directive: rem={}", _prs(directive,
true));
4773 _c4assert(m_evt_handler->m_curr->line_contents.rem.begins_with(
'%'));
4774 _c4assert(directive.str == m_evt_handler->m_curr->line_contents.rem.str);
4775 const char *err =
nullptr;
4779 if(str.begins_with(dir))
4782 return (!rest.len || rest.str[0] ==
' ' || rest.str[0] ==
'\t');
4786 if(isdirective(directive,
"%TAG"))
4790 if(C4_UNLIKELY(!_validate_directive_tag(&directive, &handle, &prefix)))
4792 err =
"invalid %TAG directive";
4793 goto directive_error;
4795 m_evt_handler->add_directive_tag(handle, prefix);
4797 else if(isdirective(directive,
"%YAML"))
4800 if(C4_UNLIKELY(!_validate_directive_yaml(&directive, &
version)))
4802 err =
"invalid %YAML directive";
4803 goto directive_error;
4805 if(C4_UNLIKELY(m_has_directives_yaml))
4807 err =
"multiple %YAML directives";
4808 goto directive_error;
4810 m_has_directives_yaml =
true;
4811 m_evt_handler->add_directive_yaml(
version);
4813 m_has_directives =
true;
4814 rem = m_evt_handler->m_curr->line_contents.rem;
4816 pos = pos !=
npos ? pos : rem.len;
4817 _line_progressed(pos);
4819 _c4dbgpf(
"handle_directive: rest={}", _prs(rem));
4820 if(C4_UNLIKELY(rem.len && !rem.begins_with(
'#')))
4822 err =
"invalid tokens after directive";
4823 goto directive_error;
4826 if(C4_UNLIKELY(err !=
nullptr))
4830template<
class EventHandler>
4831bool ParseEngine<EventHandler>::_handle_bom()
4833 const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4838 #define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f')
4839 if(rem.begins_with(
csubstr{
"\x00\x00\xfe\xff", 4}) || (rem.begins_with(
csubstr{
"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
4841 _c4dbgp(
"byte order mark: UTF32BE");
4843 _line_progressed(4);
4847 else if(rem.begins_with(
csubstr{
"\xff\xfe\x00\x00", 4}) || (rest.begins_with(
csubstr{
"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
4849 _c4dbgp(
"byte order mark: UTF32LE");
4851 _line_progressed(4);
4855 else if(rem.begins_with(
"\xfe\xff") || (rem.begins_with(
'\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
4857 _c4dbgp(
"byte order mark: UTF16BE");
4859 _line_progressed(2);
4863 else if(rem.begins_with(
"\xff\xfe") || (rest.begins_with(
'\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
4865 _c4dbgp(
"byte order mark: UTF16LE");
4867 _line_progressed(2);
4871 else if(rem.begins_with(
"\xef\xbb\xbf"))
4873 _c4dbgp(
"byte order mark: UTF8");
4875 _line_progressed(3);
4884template<
class EventHandler>
4885void ParseEngine<EventHandler>::_handle_bom(
Encoding_e enc)
4887 if(m_encoding ==
NOBOM)
4889 if(enc ==
UTF8 || (m_evt_handler->m_curr->line_contents.rem.str == _buf().str))
4892 _c4err(
"non-UTF8 byte order mark can appear only at the beginning of the file");
4894 else if(enc != m_encoding)
4896 _c4err(
"byte order mark can only be set once");
4903template<
class EventHandler>
4904void ParseEngine<EventHandler>::_handle_seq_json()
4907 _c4dbgpf(
"handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4909 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
4910 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ), m_evt_handler->m_curr->pos);
4911 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
4912 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT), m_evt_handler->m_curr->pos);
4913 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT), m_evt_handler->m_curr->pos);
4915 _handle_flow_skip_whitespace();
4916 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4922 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
4923 const char first = rem.str[0];
4924 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
4929 _c4dbgp(
"seqjson[RVAL]: scanning double-quoted scalar");
4930 ScannedScalar sc = _scan_scalar_dquot();
4931 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4932 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4938 _c4dbgp(
"seqjson[RVAL]: start child seqjson");
4940 m_evt_handler->begin_seq_val_flow();
4942 _line_progressed(1);
4947 _c4dbgp(
"seqjson[RVAL]: start child mapjson");
4949 m_evt_handler->begin_map_val_flow();
4951 _line_progressed(1);
4952 goto seqjson_finish;
4956 _c4dbgp(
"seqjson[RVAL]: end!");
4959 _line_progressed(1);
4961 goto seqjson_finish;
4967 if(_scan_scalar_seq_json(&sc))
4969 _c4dbgp(
"seqjson[RVAL]: it's a plain scalar.");
4970 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4971 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4983 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
4984 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
4985 const char first = rem.str[0];
4986 _c4dbgpf(
"mapjson[RNXT]: '{}'", first);
4991 _c4dbgp(
"seqjson[RNXT]: expect next val");
4993 m_evt_handler->add_sibling();
4994 _line_progressed(1);
4999 _c4dbgp(
"seqjson[RNXT]: end!");
5001 _line_progressed(1);
5002 goto seqjson_finish;
5010 _c4dbgt(
"seqjson: go again", 0);
5011 if(_finished_line())
5013 if(C4_LIKELY(!_finished_file()))
5021 _c4err(
"missing terminating ]");
5027 _c4dbgp(
"seqjson: finish");
5033template<
class EventHandler>
5034void ParseEngine<EventHandler>::_handle_map_json()
5037 _c4dbgpf(
"handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5039 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
5040 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
5041 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5042 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT), m_evt_handler->m_curr->pos);
5043 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT)), m_evt_handler->m_curr->pos);
5045 _handle_flow_skip_whitespace();
5046 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5052 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5053 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5054 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5055 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5056 const char first = rem.str[0];
5057 _c4dbgpf(
"mapjson[RKEY]: '{}'", first);
5062 _c4dbgp(
"mapjson[RKEY]: scanning double-quoted scalar");
5063 ScannedScalar sc = _scan_scalar_dquot();
5064 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5065 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5071 _c4dbgp(
"mapjson[RKEY]: end!");
5073 _line_progressed(1);
5074 goto mapjson_finish;
5080 else if(has_any(
RVAL))
5082 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5083 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5084 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5085 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5086 const char first = rem.str[0];
5087 _c4dbgpf(
"mapjson[RVAL]: '{}'", first);
5092 _c4dbgp(
"mapjson[RVAL]: scanning double-quoted scalar");
5093 ScannedScalar sc = _scan_scalar_dquot();
5094 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5095 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5101 _c4dbgp(
"mapjson[RVAL]: start val seqjson");
5103 m_evt_handler->begin_seq_val_flow();
5104 _set_indentation(m_evt_handler->m_parent->indref);
5106 _line_progressed(1);
5107 goto mapjson_finish;
5111 _c4dbgp(
"mapjson[RVAL]: start val mapjson");
5113 m_evt_handler->begin_map_val_flow();
5114 _set_indentation(m_evt_handler->m_parent->indref);
5116 _line_progressed(1);
5123 if(_scan_scalar_map_json(&sc))
5125 _c4dbgp(
"mapjson[RVAL]: plain scalar.");
5126 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5127 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5138 else if(has_any(
RKCL))
5140 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5141 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5142 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5143 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5144 const char first = rem.str[0];
5145 _c4dbgpf(
"mapjson[RKCL]: '{}'", first);
5148 _c4dbgp(
"mapjson[RKCL]: found the colon");
5150 _line_progressed(1);
5157 else if(has_any(
RNXT))
5159 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5160 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5161 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5162 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5163 _c4dbgpf(
"mapjson[RNXT]: '{}'", rem.str[0]);
5164 if(rem.begins_with(
','))
5166 _c4dbgp(
"mapjson[RNXT]: expect next keyval");
5167 m_evt_handler->add_sibling();
5169 _line_progressed(1);
5171 else if(rem.begins_with(
'}'))
5173 _c4dbgp(
"mapjson[RNXT]: end!");
5175 _line_progressed(1);
5176 goto mapjson_finish;
5185 _c4dbgt(
"mapjson: go again", 0);
5186 if(_finished_line())
5188 if(C4_LIKELY(!_finished_file()))
5196 _c4err(
"missing terminating }");
5202 _c4dbgp(
"mapjson: finish");
5208template<
class EventHandler>
5209void ParseEngine<EventHandler>::_handle_seq_imap()
5212 _c4dbgpf(
"handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5214 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQIMAP), m_evt_handler->m_curr->pos);
5215 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5216 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT|
QMRK|
RKCL), m_evt_handler->m_curr->pos);
5217 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == has_all(
RVAL) + has_all(
RNXT) + has_all(
QMRK) + has_all(
RKCL), m_evt_handler->m_curr->pos);
5218 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3, m_evt_handler->m_curr->pos);
5220 _handle_flow_skip_whitespace();
5221 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5227 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL), m_evt_handler->m_curr->pos);
5228 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5229 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5230 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5231 const char first = rem.str[0];
5232 _c4dbgpf(
"seqimap[RVAL]: '{}'", _c4prc(first));
5236 _c4dbgp(
"seqimap[RVAL]: scanning single-quoted scalar");
5237 sc = _scan_scalar_squot();
5238 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5239 _handle_annotations_before_blck_val_scalar();
5240 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5242 goto seqimap_finish;
5244 else if(first ==
'"')
5246 _c4dbgp(
"seqimap[RVAL]: scanning double-quoted scalar");
5247 sc = _scan_scalar_dquot();
5248 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5249 _handle_annotations_before_blck_val_scalar();
5250 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5252 goto seqimap_finish;
5255 else if(_scan_scalar_plain_map_flow(&sc))
5257 _c4dbgp(
"seqimap[RVAL]: it's a scalar.");
5258 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5259 _handle_annotations_before_blck_val_scalar();
5260 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5262 goto seqimap_finish;
5264 else if(first ==
'[')
5266 _c4dbgp(
"seqimap[RVAL]: start child seqflow");
5268 _handle_annotations_before_blck_val_scalar();
5269 m_evt_handler->begin_seq_val_flow();
5271 _set_indentation(m_evt_handler->m_parent->indref);
5272 _line_progressed(1);
5273 goto seqimap_finish;
5275 else if(first ==
'{')
5277 _c4dbgp(
"seqimap[RVAL]: start child mapflow");
5279 _handle_annotations_before_blck_val_scalar();
5280 m_evt_handler->begin_map_val_flow();
5282 _set_indentation(m_evt_handler->m_parent->indref);
5283 _line_progressed(1);
5284 goto seqimap_finish;
5286 else if(first ==
',' || first ==
']')
5288 _c4dbgp(
"seqimap[RVAL]: finish without val.");
5289 _handle_annotations_before_blck_val_scalar();
5290 m_evt_handler->set_val_scalar_plain_empty();
5292 goto seqimap_finish;
5294 else if(first ==
'*')
5296 csubstr ref = _scan_ref_seq();
5297 _c4dbgpf(
"seqimap[RVAL]: ref! {}", _prs(ref));
5298 _handle_valref(ref);
5301 else if(first ==
'&')
5303 csubstr anchor = _scan_anchor();
5304 _c4dbgpf(
"seqimap[RVAL]: anchor! {}", _prs(anchor));
5305 _add_annotation(&m_pending_anchors, anchor);
5307 else if(first ==
'!')
5310 _c4dbgpf(
"seqimap[RVAL]: tag! {}", _prs(tag));
5311 _add_annotation(&m_pending_tags, tag);
5318 else if(has_any(
RNXT))
5320 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
5321 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5322 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5323 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5324 const char first = rem.str[0];
5325 _c4dbgpf(
"seqimap[RNXT]: '{}'", _c4prc(first));
5326 if(first ==
',' || first ==
']')
5330 _c4dbgp(
"seqimap: done");
5332 goto seqimap_finish;
5339 else if(has_any(
QMRK))
5341 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
QMRK), m_evt_handler->m_curr->pos);
5342 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5343 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5344 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5345 const char first = rem.str[0];
5346 _c4dbgpf(
"seqimap[QMRK]: '{}'", _c4prc(first));
5350 _c4dbgp(
"seqimap[QMRK]: scanning single-quoted scalar");
5351 sc = _scan_scalar_squot();
5352 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5353 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5357 else if(first ==
'"')
5359 _c4dbgp(
"seqimap[QMRK]: scanning double-quoted scalar");
5360 sc = _scan_scalar_dquot();
5361 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5362 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5367 else if(_scan_scalar_plain_map_flow(&sc))
5369 _c4dbgp(
"seqimap[QMRK]: it's a scalar.");
5370 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5371 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5375 else if(first ==
'[')
5377 _c4dbgp(
"seqimap[QMRK]: start child seqflow");
5379 m_evt_handler->begin_seq_key_flow();
5381 _set_indentation(m_evt_handler->m_parent->indref);
5382 _line_progressed(1);
5383 goto seqimap_finish;
5385 else if(first ==
'{')
5387 _c4dbgp(
"seqimap[QMRK]: start child mapflow");
5389 m_evt_handler->begin_map_key_flow();
5391 _set_indentation(m_evt_handler->m_parent->indref);
5392 _line_progressed(1);
5393 goto seqimap_finish;
5395 else if(first ==
',' || first ==
']')
5397 _c4dbgp(
"seqimap[QMRK]: finish without key.");
5398 m_evt_handler->set_key_scalar_plain_empty();
5399 m_evt_handler->set_val_scalar_plain_empty();
5401 goto seqimap_finish;
5403 else if(first ==
'&')
5405 csubstr anchor = _scan_anchor();
5406 _c4dbgp(
"seqimap[QMRK]: anchor!");
5407 m_evt_handler->set_key_anchor(anchor);
5409 else if(first ==
'*')
5411 csubstr ref = _scan_ref_seq();
5412 _c4dbgp(
"seqimap[QMRK]: ref!");
5413 _handle_keyref(ref);
5421 else if(has_any(
RKCL))
5423 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5424 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5425 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5426 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKCL), m_evt_handler->m_curr->pos);
5427 const char first = rem.str[0];
5428 _c4dbgpf(
"seqimap[RKCL]: '{}'", _c4prc(first));
5431 _c4dbgp(
"seqimap[RKCL]: found ':'");
5433 _line_progressed(1);
5436 else if(first ==
',' || first ==
']')
5438 _c4dbgp(
"seqimap[RKCL]: found ','. finish without val");
5439 m_evt_handler->set_val_scalar_plain_empty();
5441 goto seqimap_finish;
5450 _c4dbgt(
"seqimap: go again", 0);
5451 if(_finished_line())
5453 if(C4_LIKELY(!_finished_file()))
5467 _c4dbgp(
"seqimap: finish");
5473template<
class EventHandler>
5474void ParseEngine<EventHandler>::_handle_seq_flow()
5477 _c4dbgpf(
"handle_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5479 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5480 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ), m_evt_handler->m_curr->pos);
5481 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
5482 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT), m_evt_handler->m_curr->pos);
5483 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RVAL) != has_all(
RNXT), m_evt_handler->m_curr->pos);
5484 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
5486 if(m_evt_handler->m_curr->at_line_beginning())
5488 _handle_flow_line_beginning();
5491 _handle_flow_skip_whitespace();
5492 if(!m_evt_handler->m_curr->line_contents.rem.len)
5497 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5498 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5502 _c4dbgp(
"seqflow[RVAL]: scanning single-quoted scalar");
5503 sc = _scan_scalar_squot();
5504 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5505 _handle_annotations_before_blck_val_scalar();
5506 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5508 _mark_seqflow_val_end();
5510 else if(first ==
'"')
5512 _c4dbgp(
"seqflow[RVAL]: scanning double-quoted scalar");
5513 sc = _scan_scalar_dquot();
5514 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5515 _handle_annotations_before_blck_val_scalar();
5516 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5518 _mark_seqflow_val_end();
5521 else if(_scan_scalar_plain_seq_flow(&sc))
5523 _c4dbgp(
"seqflow[RVAL]: it's a scalar.");
5524 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5525 _handle_annotations_before_blck_val_scalar();
5526 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5528 _mark_seqflow_val_end();
5530 else if(first ==
'[')
5532 _c4dbgp(
"seqflow[RVAL]: start child seqflow");
5534 _handle_annotations_before_blck_val_scalar();
5535 m_evt_handler->begin_seq_val_flow();
5536 _set_indentation(m_evt_handler->m_parent->indref);
5538 _line_progressed(1);
5540 else if(first ==
'{')
5542 _c4dbgp(
"seqflow[RVAL]: start child mapflow");
5544 _handle_annotations_before_blck_val_scalar();
5545 m_evt_handler->begin_map_val_flow();
5546 _set_indentation(m_evt_handler->m_parent->indref);
5548 _line_progressed(1);
5549 goto seqflow_finish;
5551 else if(first ==
']')
5553 _c4dbgp(
"seqflow[RVAL]: end!");
5554 if(m_pending_anchors.num_entries | m_pending_tags.num_entries)
5556 _c4dbgp(
"seqflow[RVAL]: add pending annotations");
5557 _handle_annotations_before_blck_val_scalar();
5558 m_evt_handler->set_val_scalar_plain_empty();
5560 _line_progressed(1);
5562 goto seqflow_finish;
5564 else if(first ==
'*')
5566 csubstr ref = _scan_ref_seq();
5567 _c4dbgpf(
"seqflow[RVAL]: ref! {}", _prs(ref));
5568 _handle_valref(ref);
5571 else if(first ==
'&')
5573 csubstr anchor = _scan_anchor();
5574 _c4dbgpf(
"seqflow[RVAL]: anchor! {}", _prs(anchor));
5575 _add_annotation(&m_pending_anchors, anchor);
5577 else if(first ==
'!')
5580 _c4dbgpf(
"seqflow[RVAL]: tag! {}", _prs(tag));
5581 _add_annotation(&m_pending_tags, tag);
5583 else if(first ==
':')
5585 _c4dbgpf(
"seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5587 m_evt_handler->begin_map_val_flow();
5588 _set_indentation(m_evt_handler->m_parent->indref);
5589 _handle_annotations_before_blck_key_scalar();
5590 m_evt_handler->set_key_scalar_plain_empty();
5592 _line_progressed(1);
5593 goto seqflow_finish;
5595 else if(first ==
'?')
5597 _c4dbgp(
"seqflow[RVAL]: start child mapflow, explicit key");
5599 m_evt_handler->begin_map_val_flow();
5600 _set_indentation(m_evt_handler->m_parent->indref);
5602 _line_progressed(1);
5603 _maybe_skip_whitespace_tokens();
5604 goto seqflow_finish;
5606 else if(first ==
',')
5608 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
5610 _c4dbgp(
"seqflow[RVAL]: add pending annotations");
5611 _handle_annotations_before_blck_val_scalar();
5612 m_evt_handler->set_val_scalar_plain_empty();
5614 _mark_seqflow_val_end();
5628 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
5629 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5630 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5633 _c4dbgp(
"seqflow[RNXT]: expect next val");
5635 m_evt_handler->add_sibling();
5636 _line_progressed(1);
5637 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
5639 _c4err(
"parse error: invalid comment after comma");
5641 _mark_seqflow_val_end();
5643 else if(first ==
']')
5645 _c4dbgp(
"seqflow[RNXT]: end!");
5646 _line_progressed(1);
5648 goto seqflow_finish;
5650 else if(first ==
':')
5652 _c4dbgpf(
"seqflow[RNXT]: line@valend={} line@now={}", m_prev_val_end, m_evt_handler->m_curr->pos.line);
5653 if(m_prev_val_end !=
NONE && m_evt_handler->m_curr->pos.line == m_prev_val_end)
5655 _c4dbgpf(
"seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5656 m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5657 _set_indentation(m_evt_handler->m_parent->indref);
5658 _line_progressed(1);
5660 goto seqflow_finish;
5674 _c4dbgt(
"seqflow: go again", 0);
5675 if(_finished_line())
5677 if(C4_LIKELY(!_finished_file()))
5685 _c4err(
"missing terminating ]");
5691 _c4dbgp(
"seqflow: finish");
5697template<
class EventHandler>
5698void ParseEngine<EventHandler>::_handle_map_flow()
5701 _c4dbgpf(
"handle_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5703 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
5704 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RFLOW), m_evt_handler->m_curr->pos);
5705 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT|
QMRK), m_evt_handler->m_curr->pos);
5706 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)), m_evt_handler->m_curr->pos);
5708 if(m_evt_handler->m_curr->at_line_beginning())
5710 _handle_flow_line_beginning();
5713 _handle_flow_skip_whitespace();
5714 if(!m_evt_handler->m_curr->line_contents.rem.len)
5719 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5720 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5721 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5722 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5723 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5724 _c4dbgpf(
"mapflow[RKEY]: '{}'", first);
5728 _c4dbgp(
"mapflow[RKEY]: scanning single-quoted scalar");
5729 sc = _scan_scalar_squot();
5730 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5731 _handle_annotations_before_blck_key_scalar();
5732 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5735 else if(first ==
'"')
5737 _c4dbgp(
"mapflow[RKEY]: scanning double-quoted scalar");
5738 sc = _scan_scalar_dquot();
5739 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5740 _handle_annotations_before_blck_key_scalar();
5741 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5745 else if(_scan_scalar_plain_map_flow(&sc))
5747 _c4dbgp(
"mapflow[RKEY]: plain scalar");
5748 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5749 _handle_annotations_before_blck_key_scalar();
5750 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5753 else if(first ==
'?')
5755 _c4dbgp(
"mapflow[RKEY]: explicit key");
5756 _handle_annotations_before_blck_key_scalar();
5758 _line_progressed(1);
5759 _maybe_skip_whitespace_tokens();
5761 else if(first ==
':')
5763 _c4dbgp(
"mapflow[RKEY]: setting empty key");
5764 _handle_annotations_before_blck_key_scalar();
5765 m_evt_handler->set_key_scalar_plain_empty();
5767 _line_progressed(1);
5768 _maybe_skip_whitespace_tokens();
5770 else if(first ==
',')
5772 _c4dbgp(
"mapflow[RKEY]: comma!");
5773 if(!_handle_annotations_before_unexpected_flow_token_rkey())
5774 _c4err(
"unexpected comma");
5778 else if(first ==
'}')
5780 _c4dbgp(
"mapflow[RKEY]: end!");
5781 (void)_handle_annotations_before_unexpected_flow_token_rkey();
5782 _line_progressed(1);
5784 goto mapflow_finish;
5786 else if(first ==
'&')
5788 csubstr anchor = _scan_anchor();
5789 _c4dbgpf(
"mapflow[RKEY]: key anchor! {}", _prs(anchor));
5790 _add_annotation(&m_pending_anchors, anchor);
5792 else if(first ==
'!')
5795 _c4dbgpf(
"mapflow[RKEY]: tag! {}", _prs(tag));
5796 _add_annotation(&m_pending_tags, tag);
5798 else if(first ==
'*')
5800 csubstr ref = _scan_ref_map();
5801 _c4dbgpf(
"mapflow[RKEY]: key ref! {}", _prs(ref));
5802 _handle_keyref(ref);
5805 else if(first ==
'[')
5810 _c4dbgp(
"mapflow[RKEY]: start child seqflow (!)");
5811 _handle_annotations_before_blck_key_scalar();
5813 m_evt_handler->begin_seq_key_flow();
5815 _set_indentation(m_evt_handler->m_parent->indref);
5816 _line_progressed(1);
5817 goto mapflow_finish;
5819 else if(first ==
'{')
5824 _c4dbgp(
"mapflow[RKEY]: start child mapflow (!)");
5825 _handle_annotations_before_blck_key_scalar();
5827 m_evt_handler->begin_map_key_flow();
5829 _set_indentation(m_evt_handler->m_parent->indref);
5830 _line_progressed(1);
5838 else if(has_any(
RKCL))
5840 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5841 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5842 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5843 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5844 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5845 _c4dbgpf(
"mapflow[RKCL]: '{}'", first);
5848 _c4dbgp(
"mapflow[RKCL]: found the colon");
5850 _line_progressed(1);
5852 else if(first ==
'}')
5854 _c4dbgp(
"mapflow[RKCL]: end with missing val!");
5856 m_evt_handler->set_val_scalar_plain_empty();
5857 _line_progressed(1);
5859 goto mapflow_finish;
5861 else if(first ==
',')
5863 _c4dbgp(
"mapflow[RKCL]: got comma. val is missing");
5864 m_evt_handler->set_val_scalar_plain_empty();
5865 m_evt_handler->add_sibling();
5867 _line_progressed(1);
5868 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
5870 _c4err(
"parse error: invalid comment after comma");
5878 else if(has_any(
RVAL))
5880 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5881 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5882 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
5883 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5884 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5885 _c4dbgpf(
"mapflow[RVAL]: '{}'", first);
5889 _c4dbgp(
"mapflow[RVAL]: scanning single-quoted scalar");
5890 sc = _scan_scalar_squot();
5891 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5892 _handle_annotations_before_blck_val_scalar();
5893 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5896 else if(first ==
'"')
5898 _c4dbgp(
"mapflow[RVAL]: scanning double-quoted scalar");
5899 sc = _scan_scalar_dquot();
5900 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5901 _handle_annotations_before_blck_val_scalar();
5902 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5906 else if(_scan_scalar_plain_map_flow(&sc))
5908 _c4dbgp(
"mapflow[RVAL]: plain scalar.");
5909 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5910 _handle_annotations_before_blck_val_scalar();
5911 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5914 else if(first ==
'[')
5916 _c4dbgp(
"mapflow[RVAL]: start val seqflow");
5918 _handle_annotations_before_blck_val_scalar();
5919 m_evt_handler->begin_seq_val_flow();
5920 _set_indentation(m_evt_handler->m_parent->indref);
5922 _line_progressed(1);
5923 goto mapflow_finish;
5925 else if(first ==
'{')
5927 _c4dbgp(
"mapflow[RVAL]: start val mapflow");
5929 _handle_annotations_before_blck_val_scalar();
5930 m_evt_handler->begin_map_val_flow();
5931 _set_indentation(m_evt_handler->m_parent->indref);
5933 _line_progressed(1);
5936 else if(first ==
'}')
5938 _c4dbgp(
"mapflow[RVAL]: end!");
5939 _handle_annotations_before_blck_val_scalar();
5940 m_evt_handler->set_val_scalar_plain_empty();
5941 _line_progressed(1);
5943 goto mapflow_finish;
5945 else if(first ==
',')
5947 _c4dbgp(
"mapflow[RVAL]: empty val!");
5948 _handle_annotations_before_blck_val_scalar();
5949 m_evt_handler->set_val_scalar_plain_empty();
5953 else if(first ==
'*')
5955 csubstr ref = _scan_ref_map();
5956 _c4dbgpf(
"mapflow[RVAL]: key ref! {}", _prs(ref));
5957 _handle_valref(ref);
5960 else if(first ==
'&')
5962 csubstr anchor = _scan_anchor();
5963 _c4dbgpf(
"mapflow[RVAL]: key anchor! {}", _prs(anchor));
5964 _add_annotation(&m_pending_anchors, anchor);
5966 else if(first ==
'!')
5969 _c4dbgpf(
"mapflow[RVAL]: tag! {}", _prs(tag));
5970 _add_annotation(&m_pending_tags, tag);
5977 else if(has_any(
RNXT))
5979 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
5980 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
5981 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
5982 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
5983 _c4dbgpf(
"mapflow[RNXT]: '{}'", m_evt_handler->m_curr->line_contents.rem.str[0]);
5984 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
','))
5986 _c4dbgp(
"mapflow[RNXT]: expect next keyval");
5987 m_evt_handler->add_sibling();
5989 _line_progressed(1);
5990 if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'#'))
5992 _c4err(
"parse error: invalid comment after comma");
5995 else if(m_evt_handler->m_curr->line_contents.rem.begins_with(
'}'))
5997 _c4dbgp(
"mapflow[RNXT]: end!");
5998 _line_progressed(1);
6000 goto mapflow_finish;
6007 else if(has_any(
QMRK))
6009 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
6010 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
6011 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
6012 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
6013 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6014 _c4dbgpf(
"mapflow[QMRK]: '{}'", first);
6018 _c4dbgp(
"mapflow[QMRK]: scanning single-quoted scalar");
6019 sc = _scan_scalar_squot();
6020 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6021 _handle_annotations_before_blck_key_scalar();
6022 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6025 else if(first ==
'"')
6027 _c4dbgp(
"mapflow[QMRK]: scanning double-quoted scalar");
6028 sc = _scan_scalar_dquot();
6029 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6030 _handle_annotations_before_blck_key_scalar();
6031 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6035 else if(_scan_scalar_plain_map_flow(&sc))
6037 _c4dbgp(
"mapflow[QMRK]: plain scalar");
6038 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6039 _handle_annotations_before_blck_key_scalar();
6040 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6043 else if(first ==
':')
6045 _c4dbgp(
"mapflow[QMRK]: setting empty key");
6046 _handle_annotations_before_blck_key_scalar();
6047 m_evt_handler->set_key_scalar_plain_empty();
6049 _line_progressed(1);
6050 _maybe_skip_whitespace_tokens();
6052 else if(first ==
'}')
6054 _c4dbgp(
"mapflow[QMRK]: end!");
6055 _handle_annotations_before_blck_key_scalar();
6056 m_evt_handler->set_key_scalar_plain_empty();
6057 m_evt_handler->set_val_scalar_plain_empty();
6059 _line_progressed(1);
6060 goto mapflow_finish;
6062 else if(first ==
',')
6064 _c4dbgp(
"mapflow[QMRK]: empty key+val!");
6065 _handle_annotations_before_blck_key_scalar();
6066 m_evt_handler->set_key_scalar_plain_empty();
6067 m_evt_handler->set_val_scalar_plain_empty();
6070 else if(first ==
'&')
6072 csubstr anchor = _scan_anchor();
6073 _c4dbgpf(
"mapflow[QMRK]: key anchor! {}", _prs(anchor));
6074 _add_annotation(&m_pending_anchors, anchor);
6076 else if(first ==
'*')
6078 csubstr ref = _scan_ref_map();
6079 _c4dbgpf(
"mapflow[QMRK]: key ref! {}", _prs(ref));
6080 _handle_keyref(ref);
6083 else if(first ==
'[')
6088 _c4dbgp(
"mapflow[QMRK]: start child seqflow (!)");
6090 _handle_annotations_before_blck_key_scalar();
6091 m_evt_handler->begin_seq_key_flow();
6093 _set_indentation(m_evt_handler->m_parent->indref);
6094 _line_progressed(1);
6095 goto mapflow_finish;
6097 else if(first ==
'{')
6102 _c4dbgp(
"mapflow[QMRK]: start child mapflow (!)");
6104 _handle_annotations_before_blck_key_scalar();
6105 m_evt_handler->begin_map_key_flow();
6106 _set_indentation(m_evt_handler->m_parent->indref);
6108 _line_progressed(1);
6111 else if(first ==
'!')
6114 _c4dbgpf(
"mapflow[QMRK]: tag! {}", _prs(tag));
6115 _add_annotation(&m_pending_tags, tag);
6124 _c4dbgt(
"mapflow: go again", 0);
6125 if(_finished_line())
6127 if(C4_LIKELY(!_finished_file()))
6135 _c4err(
"missing terminating }");
6141 _c4dbgp(
"mapflow: finish");
6147template<
class EventHandler>
6148void ParseEngine<EventHandler>::_handle_seq_block()
6151 _c4dbgpf(
"handle_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6153 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RSEQ), m_evt_handler->m_curr->pos);
6154 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RBLCK), m_evt_handler->m_curr->pos);
6155 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RVAL|
RNXT), m_evt_handler->m_curr->pos);
6156 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RVAL) + has_any(
RNXT)), m_evt_handler->m_curr->pos);
6158 _maybe_skip_comment_strict();
6159 if(!m_evt_handler->m_curr->line_contents.rem.len)
6164 _c4dbgpf(
"seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
6165 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
6166 if(m_evt_handler->m_curr->at_line_beginning())
6168 _c4dbgpf(
"seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6169 if(m_evt_handler->m_curr->indentation_ge_extra())
6171 _c4dbgpf(
"seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
6172 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6173 if(!m_evt_handler->m_curr->line_contents.rem.len)
6176 else if(m_evt_handler->m_curr->indentation_lt_extra())
6178 _c4dbgp(
"seqblck[RVAL]: smaller indentation than RVAL!");
6179 if(m_evt_handler->m_curr->indentation_eq())
6181 _c4dbgp(
"seqblck[RVAL]: smaller indentation than RVAL!");
6182 _handle_annotations_before_blck_val_scalar();
6183 m_evt_handler->set_val_scalar_plain_empty();
6189 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6190 _c4dbgp(
"seqblck[RVAL]: smaller indentation!");
6191 _handle_indentation_pop_from_block_seq();
6192 goto seqblck_finish;
6195 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
6197 _c4dbgp(
"seqblck[RVAL]: empty line!");
6198 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6202 _RYML_ASSERT_PARSE_(callbacks(), m_evt_handler->m_curr->line_contents.rem.len, m_evt_handler->m_curr->pos);
6203 const size_t startmark = _handle_block_skip_leading_whitespace();
6204 _c4dbgpf(
"seqblck[RVAL]: startmark={}", startmark);
6205 if(startmark ==
npos)
6207 _c4dbgp(
"seqblck[RVAL]: whitespace only");
6210 const size_t tabmark = _handle_block_get_whitespace_mark();
6211 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6212 _c4dbgpf(
"seqblck[RVAL]: first='{}' currcol={}", first, m_evt_handler->m_curr->pos.col - 1);
6213 const size_t startline = m_evt_handler->m_curr->pos.line;
6214 _c4assert(m_evt_handler->m_curr->line_contents.current_col() >= m_bom_len);
6215 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col() - m_bom_len;
6219 _c4dbgp(
"seqblck[RVAL]: single-quoted scalar");
6220 sc = _scan_scalar_squot();
6221 if(!_maybe_scan_following_colon())
6223 _c4dbgp(
"seqblck[RVAL]: set as val");
6224 _handle_annotations_before_blck_val_scalar();
6225 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6226 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6231 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
6232 _handle_block_check_leading_tabs(startmark);
6234 _handle_annotations_before_start_mapblck(startline);
6236 m_evt_handler->begin_map_val_block();
6237 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6238 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6239 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6241 _maybe_skip_whitespace_tokens();
6242 goto seqblck_finish;
6245 else if(first ==
'"')
6247 _c4dbgp(
"seqblck[RVAL]: double-quoted scalar");
6248 sc = _scan_scalar_dquot();
6249 if(!_maybe_scan_following_colon())
6251 _c4dbgp(
"seqblck[RVAL]: set as val");
6252 _handle_annotations_before_blck_val_scalar();
6253 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6254 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6259 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
6261 _handle_block_check_leading_tabs(startmark);
6262 _handle_annotations_before_start_mapblck(startline);
6264 m_evt_handler->begin_map_val_block();
6265 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6266 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6267 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6269 _maybe_skip_whitespace_tokens();
6270 goto seqblck_finish;
6276 else if(first ==
'|')
6278 _c4dbgp(
"seqblck[RVAL]: block-literal scalar");
6280 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6281 _handle_annotations_before_blck_val_scalar();
6282 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6283 m_evt_handler->set_val_scalar_literal(maybe_filtered);
6286 else if(first ==
'>')
6288 _c4dbgp(
"seqblck[RVAL]: block-folded scalar");
6290 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6291 _handle_annotations_before_blck_val_scalar();
6292 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6293 m_evt_handler->set_val_scalar_folded(maybe_filtered);
6296 else if(_scan_scalar_plain_seq_blck(&sc))
6298 _c4dbgp(
"seqblck[RVAL]: plain scalar.");
6299 if(!_maybe_scan_following_colon())
6301 _c4dbgp(
"seqblck[RVAL]: set as val");
6302 _handle_annotations_before_blck_val_scalar();
6303 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6304 m_evt_handler->set_val_scalar_plain(maybe_filtered);
6309 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
6310 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6311 _c4dbgp(
"seqblck[RVAL]: start mapblck, set scalar as key");
6312 _handle_block_check_leading_tabs(startmark, tabmark);
6314 _handle_annotations_before_start_mapblck(startline);
6316 m_evt_handler->begin_map_val_block();
6317 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6318 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6319 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6321 _maybe_skip_whitespace_tokens();
6322 goto seqblck_finish;
6325 else if(first ==
'[')
6327 _c4dbgp(
"seqblck[RVAL]: start child seqflow");
6329 _handle_annotations_before_blck_val_scalar();
6330 m_evt_handler->begin_seq_val_flow();
6332 _line_progressed(1);
6333 _set_indentation(m_evt_handler->m_parent->indref + 1u);
6334 goto seqblck_finish;
6336 else if(first ==
'{')
6338 _c4dbgp(
"seqblck[RVAL]: start child mapflow");
6340 _handle_annotations_before_blck_val_scalar();
6341 m_evt_handler->begin_map_val_flow();
6343 _line_progressed(1);
6344 _set_indentation(m_evt_handler->m_parent->indref + 1u);
6345 goto seqblck_finish;
6347 else if(first ==
'-')
6349 _c4dbgp(
"seqblck[RVAL]: dash");
6350 _handle_block_check_leading_tabs(startmark);
6351 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
6352 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6353 _c4dbgp(
"seqblck[RVAL]: start child seqblck");
6354 _RYML_ASSERT_PARSE_(this->callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6356 _handle_annotations_before_blck_val_scalar();
6357 m_evt_handler->begin_seq_val_block();
6359 _set_indentation(startindent);
6361 _line_progressed(1);
6363 else if(first ==
':')
6365 _c4dbgp(
"seqblck[RVAL]: start child mapblck with empty key");
6367 _handle_annotations_before_start_mapblck(startline);
6369 m_evt_handler->begin_map_val_block();
6370 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6371 m_evt_handler->set_key_scalar_plain_empty();
6373 _line_progressed(1);
6374 _maybe_skip_whitespace_tokens();
6375 goto seqblck_finish;
6377 else if(first ==
'&')
6379 const csubstr anchor = _scan_anchor();
6380 _c4dbgpf(
"seqblck[RVAL]: anchor! {}", _prs(anchor));
6383 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6385 else if(first ==
'*')
6387 csubstr ref = _scan_ref_seq();
6388 _c4dbgpf(
"seqblck[RVAL]: ref! {}", _prs(ref));
6389 if(!_maybe_scan_following_colon())
6391 _c4dbgp(
"seqblck[RVAL]: set ref as val!");
6392 _handle_valref(ref);
6397 _c4dbgp(
"seqblck[RVAL]: ref is key of map");
6399 _handle_annotations_before_start_mapblck(startline);
6400 m_evt_handler->begin_map_val_block();
6401 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6402 _handle_keyref(ref);
6404 _set_indentation(startindent);
6405 _maybe_skip_whitespace_tokens();
6406 goto seqblck_finish;
6409 else if(first ==
'!')
6412 _c4dbgpf(
"seqblck[RVAL]: val tag! {}", _prs(tag));
6415 _add_annotation(&m_pending_tags, tag, startindent, startline);
6417 else if(first ==
'?')
6419 _c4dbgp(
"seqblck[RVAL]: start child mapblck, explicit key");
6421 m_evt_handler->begin_map_val_block();
6423 _set_indentation(startindent);
6424 _line_progressed(1);
6425 _maybe_skipchars(
' ');
6426 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6428 _c4dbgp(
"seqblck[RVAL]: seqblck starts after ?");
6430 m_evt_handler->begin_seq_key_block();
6432 _save_indentation();
6433 _line_progressed(1);
6434 _maybe_skipchars(
' ');
6436 goto seqblck_finish;
6445 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
6446 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
6450 _c4dbgpf(
"seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6451 if(C4_LIKELY(m_evt_handler->m_curr->at_line_beginning()))
6453 _c4dbgp(
"seqblck[RNXT]: at line begin");
6454 if(m_evt_handler->m_curr->indentation_ge())
6456 _c4dbgpf(
"seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6457 _line_progressed(m_evt_handler->m_curr->indref);
6458 if(!m_evt_handler->m_curr->line_contents.rem.len)
6461 else if(m_evt_handler->m_curr->indentation_lt())
6463 _c4dbgp(
"seqblck[RNXT]: smaller indentation!");
6464 _handle_indentation_pop_from_block_seq();
6467 _c4dbgp(
"seqblck[RNXT]: still seqblck!");
6468 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RNXT), m_evt_handler->m_curr->pos);
6469 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6470 if(!m_evt_handler->m_curr->line_contents.rem.len)
6475 _c4dbgp(
"seqblck[RNXT]: no longer seqblck!");
6476 goto seqblck_finish;
6479 else if(m_evt_handler->m_curr->line_contents.indentation ==
npos)
6481 _c4dbgpf(
"seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
6482 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6483 if(!m_evt_handler->m_curr->line_contents.rem.len)
6489 _c4dbgp(
"seqblck[RNXT]: NOT at line begin");
6490 if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(
" \t"))
6497 if(!m_evt_handler->m_curr->line_contents.rem.len)
6499 _c4dbgp(
"seqblck[RNXT]: again");
6507 _c4assert(m_evt_handler->m_curr->line_contents.rem.len > 0);
6508 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6509 _c4dbgpf(
"seqblck[RNXT]: '{}' node_id={}", _c4prc(first), m_evt_handler->m_curr->node_id);
6512 if(m_evt_handler->m_curr->indref > 0
6513 || m_evt_handler->m_curr->line_contents.indentation > 0
6514 || !_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6516 if(C4_LIKELY(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem)))
6518 _c4dbgp(
"seqblck[RNXT]: expect next val");
6520 m_evt_handler->add_sibling();
6521 _line_progressed(1);
6530 _c4dbgp(
"seqblck[RNXT]: start doc");
6531 _start_doc_suddenly();
6532 _line_progressed(3);
6533 _maybe_skip_whitespace_tokens();
6534 goto seqblck_finish;
6537 else if(first ==
':')
6543 if(C4_LIKELY(m_evt_handler->m_parent && (m_evt_handler->m_parent->flags &
RMAP)))
6545 _c4dbgp(
"seqblck[RNXT]: actually this seq was '?' key of parent map");
6546 m_evt_handler->end_seq_block();
6547 goto seqblck_finish;
6554 else if(first ==
'.')
6556 _c4dbgp(
"seqblck[RNXT]: maybe doc?");
6557 if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6559 _c4dbgp(
"seqblck[RNXT]: end doc");
6560 _end_doc_suddenly();
6561 _line_progressed(3);
6562 _maybe_skip_whitespace_tokens();
6563 _check_doc_end_tokens();
6564 goto seqblck_finish;
6575 _print_state_stack();
6577 if(m_evt_handler->m_parent
6578 && has_all(
RMAP|
RBLCK, m_evt_handler->m_parent)
6579 && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
6581 _c4dbgpf(
"seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
6582 _RYML_ASSERT_PARSE_(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent, m_evt_handler->m_curr->pos);
6583 _handle_indentation_pop(m_evt_handler->m_parent);
6584 _RYML_ASSERT_PARSE_(this->callbacks(), has_all(
RMAP|
RBLCK), m_evt_handler->m_curr->pos);
6585 m_evt_handler->add_sibling();
6587 goto seqblck_finish;
6589 else if(first ==
'\t')
6591 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(
'\t');
6594 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6603 _c4dbgt(
"seqblck: go again", 0);
6604 if(_finished_line())
6609 if(_finished_file())
6611 _c4dbgp(
"seqblck: finish!");
6613 goto seqblck_finish;
6620 _c4dbgp(
"seqblck: finish");
6626template<
class EventHandler>
6627void ParseEngine<EventHandler>::_handle_map_block()
6630 _c4dbgpf(
"handle_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6634 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
6635 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RBLCK), m_evt_handler->m_curr->pos);
6636 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY|
RKCL|
RVAL|
RNXT|
QMRK), m_evt_handler->m_curr->pos);
6637 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(
RKEY) + has_any(
RKCL) + has_any(
RVAL) + has_any(
RNXT) + has_any(
QMRK)), m_evt_handler->m_curr->pos);
6639 _maybe_skip_comment();
6640 if(!m_evt_handler->m_curr->line_contents.rem.len)
6645 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
6646 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
6647 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
6648 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
6652 if(m_evt_handler->m_curr->at_line_beginning())
6654 if(m_evt_handler->m_curr->indentation_eq())
6656 _c4dbgpf(
"mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
6657 _line_progressed(m_evt_handler->m_curr->indref);
6658 if(!m_evt_handler->m_curr->line_contents.rem.len)
6661 else if(m_evt_handler->m_curr->indentation_lt())
6663 _c4dbgp(
"mapblck[RKEY]: smaller indentation!");
6664 _handle_indentation_pop_from_block_map();
6665 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6668 _c4dbgp(
"mapblck[RKEY]: still mapblck!");
6669 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(
RKEY), m_evt_handler->m_curr->pos);
6670 if(!m_evt_handler->m_curr->line_contents.rem.len)
6675 _c4dbgp(
"mapblck[RKEY]: no longer mapblck!");
6676 goto mapblck_finish;
6681 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt(), m_evt_handler->m_curr->pos);
6682 _c4err(
"invalid indentation");
6688 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6689 const size_t startline = m_evt_handler->m_curr->pos.line;
6690 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6691 _c4dbgpf(
"mapblck[RKEY]: '{}'", _c4prc(first));
6695 _c4dbgp(
"mapblck[RKEY]: scanning single-quoted scalar");
6696 sc = _scan_scalar_squot();
6697 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6698 _handle_annotations_before_blck_key_scalar();
6699 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6701 if(!_maybe_scan_following_colon())
6702 _c4err(
"could not find ':' colon after key");
6704 _maybe_skip_whitespace_tokens();
6706 else if(first ==
'"')
6708 _c4dbgp(
"mapblck[RKEY]: scanning double-quoted scalar");
6709 sc = _scan_scalar_dquot();
6710 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6711 _handle_annotations_before_blck_key_scalar();
6712 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6714 if(!_maybe_scan_following_colon())
6715 _c4err(
"could not find ':' colon after key");
6717 _maybe_skip_whitespace_tokens();
6721 else if(C4_UNLIKELY(first ==
'|'))
6723 _c4err(
"block map: literal keys must be enclosed in '?'");
6725 else if(C4_UNLIKELY(first ==
'>'))
6727 _c4err(
"block map: folded keys must be enclosed in '?'");
6729 else if(_scan_scalar_plain_map_blck(&sc))
6731 _c4dbgp(
"mapblck[RKEY]: plain scalar");
6732 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6733 _handle_annotations_before_blck_key_scalar();
6734 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6736 if(!_maybe_scan_following_colon())
6737 _c4err(
"could not find ':' colon after key");
6739 _maybe_skip_whitespace_tokens();
6741 else if(first ==
'?')
6743 _c4dbgp(
"mapblck[RKEY]: key token!");
6745 _line_progressed(1);
6746 _maybe_skipchars(
' ');
6747 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6749 _c4dbgp(
"mapblck[RKEY]: seqblck starts after ?");
6751 m_evt_handler->begin_seq_key_block();
6753 _save_indentation();
6754 _line_progressed(1);
6755 _maybe_skipchars(
' ');
6756 goto mapblck_finish;
6760 else if(first ==
':')
6762 _c4dbgp(
"mapblck[RKEY]: setting empty key");
6763 _handle_annotations_before_blck_key_scalar();
6764 m_evt_handler->set_key_scalar_plain_empty();
6766 _line_progressed(1);
6768 _maybe_skip_whitespace_tokens();
6770 else if(first ==
'*')
6772 csubstr ref = _scan_ref_map();
6773 _c4dbgpf(
"mapblck[RKEY]: key ref! {}", _prs(ref));
6774 _handle_keyref(ref);
6776 if(!_maybe_scan_following_colon())
6777 _c4err(
"could not find ':' colon after key");
6779 _maybe_skip_whitespace_tokens();
6781 else if(first ==
'&')
6783 csubstr anchor = _scan_anchor();
6784 _c4dbgpf(
"mapblck[RKEY]: key anchor! {}", _prs(anchor));
6785 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6787 else if(first ==
'!')
6790 _c4dbgpf(
"mapblck[RKEY]: key tag! {}", _prs(tag));
6791 _add_annotation(&m_pending_tags, tag, startindent, startline);
6793 else if(first ==
'[')
6798 _c4dbgp(
"mapblck[RKEY]: start child seqflow (!)");
6799 _handle_annotations_before_blck_key_scalar();
6800 m_evt_handler->begin_seq_key_flow();
6802 _line_progressed(1);
6803 _set_indentation(startindent);
6804 goto mapblck_finish;
6806 else if(first ==
'{')
6811 _c4dbgp(
"mapblck[RKEY]: start child mapflow (!)");
6812 _handle_annotations_before_blck_key_scalar();
6813 m_evt_handler->begin_map_key_flow();
6815 _line_progressed(1);
6816 _set_indentation(startindent);
6817 goto mapblck_finish;
6819 else if(first ==
'-')
6821 _c4dbgp(
"mapblck[RKEY]: maybe doc?");
6822 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6824 _c4dbgp(
"mapblck[RKEY]: end+start doc");
6825 _start_doc_suddenly();
6826 _line_progressed(3);
6827 _maybe_skip_whitespace_tokens();
6828 goto mapblck_finish;
6835 else if(first ==
'.')
6837 _c4dbgp(
"mapblck[RKEY]: maybe end doc?");
6838 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6840 _c4dbgp(
"mapblck[RKEY]: end doc");
6841 _end_doc_suddenly();
6842 _line_progressed(3);
6843 _maybe_skip_whitespace_tokens();
6844 _check_doc_end_tokens();
6845 goto mapblck_finish;
6857 else if(has_any(
RVAL))
6859 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
6860 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
6861 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
6862 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
6866 if(m_evt_handler->m_curr->at_line_beginning())
6868 _c4dbgpf(
"mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6869 m_evt_handler->m_curr->more_indented =
false;
6870 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref !=
npos, m_evt_handler->m_curr->pos);
6871 if(m_evt_handler->m_curr->indentation_eq_extra())
6873 _c4dbgp(
"mapblck[RVAL]: skip indentation!");
6874 _line_progressed(m_evt_handler->m_curr->indref + 1);
6875 if(!m_evt_handler->m_curr->line_contents.rem.len)
6878 else if(m_evt_handler->m_curr->indentation_gt_extra())
6880 _c4dbgp(
"mapblck[RVAL]: more indented!");
6881 m_evt_handler->m_curr->more_indented =
true;
6882 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6883 if(!m_evt_handler->m_curr->line_contents.rem.len)
6886 else if(m_evt_handler->m_curr->indentation_lt_extra())
6888 if(m_evt_handler->m_curr->indentation_eq())
6890 _c4dbgp(
"mapblck[RVAL]: smaller indentation than RVAL!");
6892 if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation)))
6894 _c4dbgp(
"mapblck[RVAL]: smaller indentation than RVAL!");
6895 _handle_annotations_before_blck_val_scalar();
6896 m_evt_handler->set_val_scalar_plain_empty();
6903 _c4dbgp(
"mapblck[RVAL]: smaller indentation than RKEY!");
6904 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6905 _handle_indentation_pop_from_block_map();
6908 _c4dbgp(
"mapblck[RVAL]: still mapblck!");
6909 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6912 _c4dbgp(
"mapblck[RVAL]: speculatively expect next keyval");
6913 m_evt_handler->add_sibling();
6920 _c4dbgp(
"mapblck[RVAL]: no longer mapblck!");
6921 goto mapblck_finish;
6926 const size_t startcol = _handle_block_skip_leading_whitespace();
6927 if(startcol ==
npos)
6929 _c4dbgp(
"mapblck[RVAL]: whitespace only");
6932 const size_t tabmark = _handle_block_get_whitespace_mark();
6936 _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
6937 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6938 const size_t startline = m_evt_handler->m_curr->pos.line;
6939 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6940 _c4dbgpf(
"mapblck[RVAL]: '{}'", _c4prc(first));
6944 _c4dbgp(
"mapblck[RVAL]: scanning single-quoted scalar");
6945 sc = _scan_scalar_squot();
6946 if(!_maybe_scan_following_colon())
6948 _c4dbgp(
"mapblck[RVAL]: set as val");
6949 _handle_annotations_before_blck_val_scalar();
6950 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6951 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6957 _c4assert(startindent > m_evt_handler->m_curr->indref);
6958 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6959 _handle_block_check_leading_tabs(startcol);
6960 _handle_annotations_before_start_mapblck(startline);
6963 m_evt_handler->begin_map_val_block();
6964 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6965 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6966 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6967 _maybe_skip_whitespace_tokens();
6972 else if(first ==
'"')
6974 _c4dbgp(
"mapblck[RVAL]: scanning double-quoted scalar");
6975 sc = _scan_scalar_dquot();
6976 if(!_maybe_scan_following_colon())
6978 _c4dbgp(
"mapblck[RVAL]: set as val");
6979 _handle_annotations_before_blck_val_scalar();
6980 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6981 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6987 _c4assert(startindent > m_evt_handler->m_curr->indref);
6988 _c4dbgp(
"mapblck[RVAL]: start new block map, set scalar as key");
6989 _handle_block_check_leading_tabs(startcol);
6990 _handle_annotations_before_start_mapblck(startline);
6993 m_evt_handler->begin_map_val_block();
6994 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6995 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6996 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6997 _maybe_skip_whitespace_tokens();
7004 else if(first ==
'|')
7006 _c4dbgp(
"mapblck[RVAL]: scanning block-literal scalar");
7008 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7009 _handle_annotations_before_blck_val_scalar();
7010 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7011 m_evt_handler->set_val_scalar_literal(maybe_filtered);
7014 else if(first ==
'>')
7016 _c4dbgp(
"mapblck[RVAL]: scanning block-folded scalar");
7018 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7019 _handle_annotations_before_blck_val_scalar();
7020 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7021 m_evt_handler->set_val_scalar_folded(maybe_filtered);
7024 else if(_scan_scalar_plain_map_blck(&sc))
7026 _c4dbgp(
"mapblck[RVAL]: plain scalar.");
7027 if(!_maybe_scan_following_colon())
7029 _c4dbgp(
"mapblck[RVAL]: set as val");
7030 _handle_annotations_before_blck_val_scalar();
7031 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
7032 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7038 _c4assert(startindent > m_evt_handler->m_curr->indref);
7039 _c4dbgpf(
"mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
7040 _handle_block_check_leading_tabs(startcol, tabmark);
7042 _handle_annotations_before_start_mapblck(startline);
7044 m_evt_handler->begin_map_val_block();
7045 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7046 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
7047 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7048 _maybe_skip_whitespace_tokens();
7053 else if(first ==
'-' && _is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7055 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7057 _c4dbgp(
"mapblck[RVAL]: start val seqblck");
7058 _handle_block_check_leading_tabs(startcol);
7060 _handle_annotations_before_blck_val_scalar();
7061 m_evt_handler->begin_seq_val_block();
7063 _set_indentation(startindent);
7064 _line_progressed(1);
7065 _maybe_skip_whitespace_tokens();
7066 goto mapblck_finish;
7068 else if(first ==
'[')
7070 _c4dbgp(
"mapblck[RVAL]: start val seqflow");
7072 _handle_annotations_before_blck_val_scalar();
7073 m_evt_handler->begin_seq_val_flow();
7075 _set_indentation(m_evt_handler->m_parent->indref + 1u);
7076 _line_progressed(1);
7077 goto mapblck_finish;
7079 else if(first ==
'{')
7081 _c4dbgp(
"mapblck[RVAL]: start val mapflow");
7083 _handle_annotations_before_blck_val_scalar();
7084 m_evt_handler->begin_map_val_flow();
7086 m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
7087 _set_indentation(m_evt_handler->m_parent->indref + 1u);
7088 _line_progressed(1);
7089 goto mapblck_finish;
7091 else if(first ==
'*')
7093 csubstr ref = _scan_ref_map();
7094 _c4dbgpf(
"mapblck[RVAL]: ref! {}", _prs(ref));
7095 if(_maybe_scan_following_colon())
7097 _c4dbgp(
"mapblck[RVAL]: start child map, block");
7099 _handle_annotations_before_blck_val_scalar();
7100 m_evt_handler->begin_map_val_block();
7101 _handle_keyref(ref);
7102 _set_indentation(startindent);
7108 _c4dbgp(
"mapblck[RVAL]: was val ref");
7109 _handle_valref(ref);
7112 _maybe_skip_whitespace_tokens();
7114 else if(first ==
'&')
7116 csubstr anchor = _scan_anchor();
7117 _c4dbgpf(
"mapblck[RVAL]: anchor! {}", _prs(anchor));
7120 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7122 else if(first ==
'!')
7125 _c4dbgpf(
"mapblck[RVAL]: tag! {}", _prs(tag));
7128 _add_annotation(&m_pending_tags, tag, startindent, startline);
7130 else if(first ==
'?')
7132 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7134 _c4dbgp(
"mapblck[RVAL]: start val mapblck");
7136 _handle_annotations_before_blck_val_scalar();
7137 m_evt_handler->begin_map_val_block();
7139 _set_indentation(startindent);
7140 _line_progressed(1);
7141 _maybe_skipchars(
' ');
7142 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7144 _c4dbgp(
"mapblck[RVAL]: seqblck starts after ?");
7146 m_evt_handler->begin_seq_key_block();
7148 _save_indentation();
7149 _line_progressed(1);
7150 _maybe_skipchars(
' ');
7151 goto mapblck_finish;
7155 else if(first ==
':')
7157 _c4dbgp(
"mapblck[RVAL]: start val mapblck");
7159 _handle_annotations_before_start_mapblck(startline);
7161 m_evt_handler->begin_map_val_block();
7162 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7163 m_evt_handler->set_key_scalar_plain_empty();
7166 _line_progressed(1);
7167 _maybe_skip_whitespace_tokens();
7175 else if(has_any(
RNXT))
7177 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
7178 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
7179 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
7180 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
7184 if(m_evt_handler->m_curr->at_line_beginning())
7186 _c4dbgpf(
"mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
7187 if(m_evt_handler->m_curr->indentation_eq())
7189 _c4dbgpf(
"mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
7190 _line_progressed(m_evt_handler->m_curr->indref);
7191 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
7192 m_evt_handler->add_sibling();
7196 else if(m_evt_handler->m_curr->indentation_lt())
7198 _c4dbgp(
"mapblck[RNXT]: smaller indentation!");
7199 _handle_indentation_pop_from_block_map();
7202 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7205 _c4dbgp(
"mapblck[RNXT]: speculatively expect next keyval");
7206 m_evt_handler->add_sibling();
7213 goto mapblck_finish;
7219 _c4dbgp(
"mapblck[RNXT]: NOT at line begin");
7220 if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(
" \t"))
7227 if(!m_evt_handler->m_curr->line_contents.rem.len)
7229 _c4dbgp(
"seqblck[RNXT]: again");
7237 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7238 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7239 _c4dbgpf(
"mapblck[RNXT]: '{}'", _c4prc(first));
7242 _c4dbgp(
"mapblck[RNXT]: skip spaces");
7243 _maybe_skip_whitespace_tokens();
7250 else if(has_any(
QMRK))
7252 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
7253 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKCL), m_evt_handler->m_curr->pos);
7254 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
7255 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
7256 if(_handle_map_block_qmrk())
7259 goto mapblck_finish;
7261 else if(has_any(
RKCL))
7263 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RKEY), m_evt_handler->m_curr->pos);
7264 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RVAL), m_evt_handler->m_curr->pos);
7265 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT), m_evt_handler->m_curr->pos);
7266 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
QMRK), m_evt_handler->m_curr->pos);
7267 if(_handle_map_block_rkcl())
7270 goto mapblck_finish;
7274 _c4dbgt(
"mapblck: again", 0);
7275 if(_finished_line())
7279 if(_finished_file())
7281 _c4dbgp(
"mapblck: file finished!");
7283 goto mapblck_finish;
7290 _c4dbgp(
"mapblck: finish");
7297template<
class EventHandler>
7298bool ParseEngine<EventHandler>::_handle_map_block_qmrk()
7303 if(m_evt_handler->m_curr->at_line_beginning())
7305 _c4dbgpf(
"mapblck[QMRK]: at line beginning. ind={} indref={}", m_evt_handler->m_curr->line_contents.indentation, m_evt_handler->m_curr->indref);
7306 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation !=
npos, m_evt_handler->m_curr->pos);
7307 if(m_evt_handler->m_curr->indentation_eq_extra())
7309 _c4dbgpf(
"mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref + 1);
7310 _line_progressed(m_evt_handler->m_curr->indref + 1);
7311 if(!m_evt_handler->m_curr->line_contents.rem.len)
7315 else if(m_evt_handler->m_curr->indentation_gt_extra())
7317 _c4dbgp(
"mapblck[QMRK]: larger indentation !");
7318 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7319 if(!m_evt_handler->m_curr->line_contents.rem.len)
7324 _c4dbgp(
"mapblck[QMRK]: smaller indentation!");
7325 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt_extra(), m_evt_handler->m_curr->pos);
7326 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7327 if(m_evt_handler->m_curr->indentation_eq()
7329 && m_evt_handler->m_curr->line_contents.rem.str[0] !=
'-')
7331 _c4dbgp(
"mapblck[QMRK]: QMRK finished!");
7332 _handle_annotations_before_blck_key_scalar();
7333 m_evt_handler->set_key_scalar_plain_empty();
7337 else if(m_evt_handler->m_curr->indentation_lt())
7339 _c4dbgp(
"mapblck[QMRK]: indentation pop!");
7340 _handle_indentation_pop_from_block_map();
7341 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7344 _c4dbgp(
"mapblck[QMRK]: still mapblck!");
7349 _c4dbgp(
"mapblck[QMRK]: no longer mapblck!");
7358 _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
7359 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7360 const size_t startline = m_evt_handler->m_curr->pos.line;
7361 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
7362 _c4dbgpf(
"mapblck[QMRK]: '{}'", first);
7366 _c4dbgp(
"mapblck[QMRK]: scanning single-quoted scalar");
7367 sc = _scan_scalar_squot();
7368 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
7370 if(!_maybe_scan_following_colon())
7372 _c4dbgp(
"mapblck[QMRK]: set as key");
7373 _handle_annotations_before_blck_key_scalar();
7374 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7378 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7379 _handle_annotations_before_start_mapblck_as_key();
7380 m_evt_handler->begin_map_key_block();
7381 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7382 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7383 _maybe_skip_whitespace_tokens();
7384 _set_indentation(startindent);
7389 else if(first ==
'"')
7391 _c4dbgp(
"mapblck[QMRK]: scanning double-quoted scalar");
7392 sc = _scan_scalar_dquot();
7393 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
7395 if(!_maybe_scan_following_colon())
7397 _c4dbgp(
"mapblck[QMRK]: set as key");
7398 _handle_annotations_before_blck_key_scalar();
7399 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7403 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7404 _handle_annotations_before_start_mapblck_as_key();
7405 m_evt_handler->begin_map_key_block();
7406 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7407 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7408 _maybe_skip_whitespace_tokens();
7409 _set_indentation(startindent);
7414 else if(first ==
'|')
7416 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
7418 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7419 csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb);
7420 _handle_annotations_before_blck_key_scalar();
7421 m_evt_handler->set_key_scalar_literal(maybe_filtered);
7424 else if(first ==
'>')
7426 _c4dbgp(
"mapblck[QMRK]: scanning block-literal scalar");
7428 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7429 csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb);
7430 _handle_annotations_before_blck_key_scalar();
7431 m_evt_handler->set_key_scalar_folded(maybe_filtered);
7434 else if(_scan_scalar_plain_map_blck(&sc))
7436 _c4dbgp(
"mapblck[QMRK]: plain scalar");
7437 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
7439 if(!_maybe_scan_following_colon())
7441 _c4dbgp(
"mapblck[QMRK]: set as key");
7442 _handle_annotations_before_blck_key_scalar();
7443 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7447 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set scalar as key");
7448 _handle_annotations_before_start_mapblck_as_key();
7449 m_evt_handler->begin_map_key_block();
7450 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7451 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7452 _maybe_skip_whitespace_tokens();
7453 _set_indentation(startindent);
7458 else if(first ==
':')
7460 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), empty key");
7462 _handle_annotations_before_start_mapblck_as_key();
7463 m_evt_handler->begin_map_key_block();
7464 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7465 m_evt_handler->set_key_scalar_plain_empty();
7466 _line_progressed(1);
7467 _maybe_skip_whitespace_tokens();
7468 _set_indentation(startindent);
7472 else if(first ==
'*')
7474 csubstr ref = _scan_ref_map();
7475 _c4dbgpf(
"mapblck[QMRK]: key ref! {}", _prs(ref));
7477 if(!_maybe_scan_following_colon())
7479 _c4dbgp(
"mapblck[QMRK]: set ref as key");
7480 _handle_keyref(ref);
7484 _c4dbgp(
"mapblck[QMRK]: start new block map as key (!), set ref as key");
7485 _handle_annotations_before_start_mapblck_as_key();
7486 m_evt_handler->begin_map_key_block();
7487 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7488 _handle_keyref(ref);
7489 _set_indentation(startindent);
7493 _maybe_skip_whitespace_tokens();
7495 else if(first ==
'&')
7497 csubstr anchor = _scan_anchor();
7498 _c4dbgpf(
"mapblck[QMRK]: key anchor! {}", _prs(anchor));
7499 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7501 else if(first ==
'!')
7504 _c4dbgpf(
"mapblck[QMRK]: key tag! {}", _prs(tag));
7505 _add_annotation(&m_pending_tags, tag, startindent, startline);
7507 else if(first ==
'-')
7509 _c4dbgp(
"mapblck[QMRK]: maybe seq or doc?");
7510 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7512 _c4dbgp(
"mapblck[QMRK]: start child seqblck (!)");
7514 _handle_annotations_before_blck_key_scalar();
7515 m_evt_handler->begin_seq_key_block();
7517 _set_indentation(startindent);
7518 _line_progressed(1);
7522 _c4dbgp(
"mapblck[QMRK]: end+start doc");
7523 _c4assert(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem));
7524 _start_doc_suddenly();
7525 _line_progressed(3);
7527 _maybe_skip_whitespace_tokens();
7530 else if(first ==
'[')
7532 _c4dbgp(
"mapblck[QMRK]: start child seqflow (!)");
7534 _handle_annotations_before_blck_key_scalar();
7535 m_evt_handler->begin_seq_key_flow();
7537 _set_indentation(m_evt_handler->m_parent->indref + 1);
7538 _line_progressed(1);
7541 else if(first ==
'{')
7543 _c4dbgp(
"mapblck[QMRK]: start child mapflow (!)");
7545 _handle_annotations_before_blck_key_scalar();
7546 m_evt_handler->begin_map_key_flow();
7548 _set_indentation(m_evt_handler->m_parent->indref + 1);
7549 _line_progressed(1);
7552 else if(first ==
'?')
7554 _c4dbgpf(
"mapblck[QMRK]: another QMRK '?'. ind={} indref={}", startindent, m_evt_handler->m_curr->indref);
7555 _RYML_ASSERT_PARSE_(callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
7556 _c4dbgp(
"mapblck[QMRK]: ? indent gt - start child mapblck (!)");
7558 _handle_annotations_before_blck_key_scalar();
7559 m_evt_handler->begin_map_key_block();
7561 _set_indentation(startindent);
7563 _line_progressed(1);
7564 _maybe_skipchars(
' ');
7565 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7567 _c4dbgp(
"mapblck[RVAL]: seqblck starts after ?");
7569 m_evt_handler->begin_seq_key_block();
7571 _save_indentation();
7572 _line_progressed(1);
7573 _maybe_skipchars(
' ');
7588template<
class EventHandler>
7589bool ParseEngine<EventHandler>::_handle_map_block_rkcl()
7594 if(m_evt_handler->m_curr->at_line_beginning())
7596 if(m_evt_handler->m_curr->indentation_eq())
7598 _c4dbgpf(
"mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
7599 _line_progressed(m_evt_handler->m_curr->indref);
7600 if(!m_evt_handler->m_curr->line_contents.rem.len)
7603 else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
7605 _c4err(
"invalid indentation");
7608 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7609 _c4dbgpf(
"mapblck[RKCL]: '{}'", first);
7612 _c4dbgp(
"mapblck[RKCL]: found the colon");
7613 _line_progressed(1);
7614 _maybe_skipchars(
' ');
7615 #if defined(__GNUC__) && ( \
7616 ((__GNUC__ >= 12) && ((C4_WORDSIZE == 4) || defined(C4_CPU_S390_X) || defined(C4_CPU_PPC64))) \
7618 (__GNUC__ == 16 && defined(C4_CPU_X86_64)))
7619 C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem);
7622 if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7629 _c4dbgp(
"mapblck[RKCL]: start val seqblck");
7631 m_evt_handler->begin_seq_val_block();
7633 _save_indentation();
7634 _line_progressed(1);
7635 _maybe_skipchars(
' ');
7639 else if(first ==
'?')
7641 _c4dbgp(
"mapblck[RKCL]: got '?'. val was empty");
7642 m_evt_handler->set_val_scalar_plain_empty();
7643 m_evt_handler->add_sibling();
7645 _line_progressed(1);
7646 _maybe_skipchars(
' ');
7647 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7649 _c4dbgp(
"mapblck[RKCL]: seqblck starts after ?");
7651 m_evt_handler->begin_seq_key_block();
7653 _save_indentation();
7654 _line_progressed(1);
7655 _maybe_skipchars(
' ');
7659 else if(first ==
'-')
7661 if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7663 _c4dbgp(
"mapblck[RKCL]: end+start doc");
7664 _RYML_CHECK_PARSE_(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
7665 _start_doc_suddenly();
7666 _line_progressed(3);
7667 _maybe_skip_whitespace_tokens();
7675 else if(first ==
'.')
7677 _c4dbgp(
"mapblck[RKCL]: maybe end doc?");
7678 csubstr rs = m_evt_handler->m_curr->line_contents.rem.
sub(1);
7679 if(rs ==
".." || rs.begins_with(
".. "))
7681 _c4dbgp(
"mapblck[RKCL]: end+start doc");
7682 _end_doc_suddenly();
7683 _line_progressed(3);
7684 _maybe_skip_whitespace_tokens();
7685 _check_doc_end_tokens();
7695 _c4dbgp(
"mapblck[RKCL]: missing :");
7696 if(C4_UNLIKELY(!m_evt_handler->m_curr->indentation_eq()))
7698 m_evt_handler->set_val_scalar_plain_empty();
7699 m_evt_handler->add_sibling();
7708template<
class EventHandler>
7709void ParseEngine<EventHandler>::_handle_unk_json()
7711 _c4dbgpf(
"handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7713 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP), m_evt_handler->m_curr->pos);
7714 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP), m_evt_handler->m_curr->pos);
7716 _maybe_skip_comment();
7717 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7724 pos = pos !=
npos ? pos : rem.len;
7725 _c4dbgpf(
"skipping indentation of {}", pos);
7726 _line_progressed(pos);
7727 rem = m_evt_handler->m_curr->line_contents.rem;
7730 _c4dbgpf(
"rem is now {}", _prs(rem));
7733 if(rem.begins_with(
'['))
7735 _c4dbgp(
"it's a seq");
7736 _check_trailing_doc_token();
7738 m_evt_handler->begin_seq_val_flow();
7740 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7741 m_doc_empty =
false;
7742 _line_progressed(1);
7744 else if(rem.begins_with(
'{'))
7746 _c4dbgp(
"it's a map");
7747 _check_trailing_doc_token();
7749 m_evt_handler->begin_map_val_flow();
7751 m_doc_empty =
false;
7752 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7753 _line_progressed(1);
7755 else if(_handle_bom())
7757 _c4dbgp(
"byte order mark");
7761 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
7762 _maybe_skip_whitespace_tokens();
7763 csubstr s = m_evt_handler->m_curr->line_contents.rem;
7766 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7767 const char first = s.
str[0];
7771 _c4dbgp(
"runk_json: scanning double-quoted scalar");
7772 _check_trailing_doc_token();
7775 m_doc_empty =
false;
7776 sc = _scan_scalar_dquot();
7777 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7778 if(!_maybe_scan_following_colon())
7780 _c4dbgp(
"runk_json: set as val");
7781 _handle_annotations_before_blck_val_scalar();
7782 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7789 else if(_scan_scalar_plain_unk(&sc))
7791 _c4dbgp(
"runk_json: got a plain scalar");
7792 _check_trailing_doc_token();
7795 m_doc_empty =
false;
7796 if(!_maybe_scan_following_colon())
7798 _c4dbgp(
"runk_json: set as val");
7799 _handle_annotations_before_blck_val_scalar();
7800 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7801 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7818template<
class EventHandler>
7819void ParseEngine<EventHandler>::_handle_unk()
7821 _c4dbgpf(
"handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7823 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RNXT|
RSEQ|
RMAP), m_evt_handler->m_curr->pos);
7824 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RTOP), m_evt_handler->m_curr->pos);
7826 _maybe_skipchars(
' ');
7827 _maybe_skip_comment();
7829 if(!m_evt_handler->m_curr->line_contents.rem.len)
7832 _c4dbgpf(
"runk: rem is now {}", _prs(m_evt_handler->m_curr->line_contents.rem));
7834 if(m_evt_handler->m_curr->line_contents.indentation == 0u && (m_evt_handler->m_curr->at_line_beginning() || (m_bom_len && (m_evt_handler->m_curr->pos.line == m_bom_line))))
7836 _c4dbgpf(
"runk: rtop: zero indent + at line begin. offset={}", m_evt_handler->m_curr->pos.offset);
7837 _c4dbgp(
"runk: check BOM");
7840 m_bom_line = m_evt_handler->m_curr->pos.line;
7841 _c4dbgpf(
"runk: byte order mark! line={} offset={}", m_bom_line, m_evt_handler->m_curr->pos.offset);
7844 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7845 _c4dbgpf(
"runk: rtop: first={}", _c4prc(first));
7848 _c4dbgp(
"runk: rtop: suspecting doc");
7849 if(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7851 _c4dbgp(
"runk: rtop: begin doc");
7854 _set_indentation(0);
7856 _line_progressed(3u);
7857 _maybe_skip_whitespace_tokens();
7861 else if(first ==
'.')
7863 _c4dbgp(
"runk: rtop: suspecting doc end");
7864 if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
7866 _c4dbgp(
"runk: rtop: end doc");
7873 _c4dbgp(
"runk: rtop: ignore end doc");
7876 _line_progressed(3u);
7877 _maybe_skip_whitespace_tokens();
7878 _check_doc_end_tokens();
7882 else if(first ==
'%')
7884 _c4dbgpf(
"directive: {}", m_evt_handler->m_curr->line_contents.rem);
7885 if(C4_UNLIKELY(has_any(
RDOC) || (!m_doc_empty && has_none(
NDOC))))
7886 _c4err(
"need document footer before directives");
7887 _handle_directive(m_evt_handler->m_curr->line_contents.rem);
7894 size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7895 size_t remindent = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
7898 _c4dbgpf(
"runk: prev BOMlen={}", m_bom_len);
7899 if(m_evt_handler->m_curr->pos.line == m_bom_line)
7901 _c4dbgpf(
"runk: BOM remindent={} offset={}", remindent, m_evt_handler->m_curr->pos.offset);
7902 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, remindent >= m_bom_len, m_evt_handler->m_curr->pos);
7903 remindent -= m_bom_len;
7911 size_t startcol = _handle_block_skip_leading_whitespace();
7912 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7916 _c4dbgp(
"runk: flow seq?");
7917 _handle_unk_begin_doc();
7918 if(C4_LIKELY( ! _annotations_require_key_container()))
7920 _c4dbgp(
"runk: it's a seq, flow");
7921 _handle_annotations_before_blck_val_scalar();
7922 m_evt_handler->begin_seq_val_flow();
7924 _set_indentation(0);
7928 _c4dbgp(
"runk: start new block map, set flow seq as key (!)");
7929 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7930 m_evt_handler->begin_map_val_block();
7932 _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7933 m_evt_handler->begin_seq_key_flow();
7935 _set_indentation(0);
7937 _line_progressed(1);
7939 else if(first ==
'{')
7941 _c4dbgp(
"runk: flow map?");
7942 _handle_unk_begin_doc();
7943 if(C4_LIKELY( ! _annotations_require_key_container()))
7945 _c4dbgp(
"runk: it's a map, flow");
7946 _handle_annotations_before_blck_val_scalar();
7947 m_evt_handler->begin_map_val_flow();
7949 _set_indentation(0);
7953 _c4dbgp(
"runk: start new block map, set flow map as key (!)");
7954 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7955 m_evt_handler->begin_map_val_block();
7957 _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7958 m_evt_handler->begin_map_key_flow();
7960 _set_indentation(0);
7962 _line_progressed(1);
7964 else if(first ==
'-' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7966 _c4dbgp(
"runk: it's a seq, block");
7967 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7968 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col,
false);
7969 _handle_unk_begin_doc();
7970 _handle_annotations_before_blck_val_scalar();
7971 m_evt_handler->begin_seq_val_block();
7973 _set_indentation(startindent);
7974 _line_progressed(1);
7975 _maybe_skipchars(
' ');
7977 else if(first ==
'?' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7979 _c4dbgp(
"runk: it's a map + this key is complex");
7980 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7981 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col,
false);
7982 _handle_block_check_leading_tabs(startcol);
7983 _handle_unk_begin_doc();
7984 _handle_annotations_before_blck_val_scalar();
7985 m_evt_handler->begin_map_val_block();
7987 _set_indentation(startindent);
7988 _line_progressed(1);
7989 _maybe_skipchars(
' ');
7990 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7992 _c4dbgp(
"runk: seqblck key starts after ?");
7994 m_evt_handler->begin_seq_key_block();
7996 _save_indentation();
7997 _line_progressed(1);
7998 _maybe_skipchars(
' ');
8001 else if(first ==
':' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
8003 if(m_doc_empty || (m_pending_anchors.num_entries | m_pending_tags.num_entries))
8005 _c4dbgp(
"runk: it's a map with an empty key");
8006 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
8007 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col);
8008 _handle_block_check_leading_tabs(startcol);
8009 const size_t startline = m_evt_handler->m_curr->pos.line;
8010 _handle_unk_begin_doc();
8011 _handle_annotations_before_start_mapblck(startline);
8013 m_evt_handler->begin_map_val_block();
8014 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8015 m_evt_handler->set_key_scalar_plain_empty();
8016 _set_indentation(startindent);
8020 _c4err(
"block colon cannot occur on a new line unless ? is used");
8023 _line_progressed(1);
8024 _maybe_skip_whitespace_tokens();
8026 else if(first ==
'&')
8028 csubstr anchor = _scan_anchor();
8029 _c4dbgpf(
"anchor! {}", _prs(anchor));
8030 const size_t line = m_evt_handler->m_curr->pos.line;
8031 _handle_unk_begin_doc();
8032 _add_annotation(&m_pending_anchors, anchor, remindent, line);
8033 _set_indentation(0);
8035 else if(first ==
'*')
8037 csubstr ref = _scan_ref_map();
8038 _c4dbgpf(
"runk: ref! {}", _prs(ref));
8039 _handle_unk_begin_doc();
8040 if(!_maybe_scan_following_colon())
8042 _c4dbgp(
"runk: set val ref");
8043 _handle_valref(ref);
8047 _c4dbgp(
"runk: start new block map, set ref as key");
8048 _handle_block_check_leading_tabs(startcol);
8049 const size_t startline = m_evt_handler->m_curr->pos.line;
8050 _handle_annotations_before_start_mapblck(startline);
8051 m_evt_handler->begin_map_val_block();
8052 _handle_keyref(ref);
8053 _maybe_skip_whitespace_tokens();
8054 _set_indentation(0);
8058 else if(first ==
'!')
8061 csubstr tag = _scan_tag(&tag_orig);
8062 _c4dbgpf(
"runk: val tag! {}", _prs(tag));
8065 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
8066 const size_t line = m_evt_handler->m_curr->pos.line;
8067 _add_annotation(&m_pending_tags, tag, indentation, line, tag_orig);
8071 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
8072 const size_t startscalar = _handle_block_get_whitespace_mark();
8073 const size_t startline = m_evt_handler->m_curr->pos.line;
8074 auto beginmap = [&](
size_t startindent_){
8075 if(C4_UNLIKELY(m_evt_handler->m_curr->pos.line > startline))
8076 _c4err(
"multiline scalars cannot be used as implicit keys");
8077 _handle_block_check_leading_tabs(startcol, startscalar);
8078 _handle_annotations_before_start_mapblck(startline);
8080 m_evt_handler->begin_map_val_block();
8081 _handle_annotations_and_indentation_after_start_mapblck(startindent_, startline);
8083 auto after_beginmap = [&](
size_t startindent_){
8084 _maybe_skip_whitespace_tokens();
8085 _set_indentation(startindent_);
8090 _c4dbgp(
"runk: block-literal scalar");
8091 _handle_unk_begin_doc();
8093 _scan_block(&sb, startindent);
8094 _handle_annotations_before_blck_val_scalar();
8095 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8096 m_evt_handler->set_val_scalar_literal(maybe_filtered);
8098 else if(first ==
'>')
8100 _c4dbgp(
"runk: block-folded scalar");
8101 _handle_unk_begin_doc();
8103 _scan_block(&sb, startindent);
8104 _handle_annotations_before_blck_val_scalar();
8105 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8106 m_evt_handler->set_val_scalar_folded(maybe_filtered);
8108 else if(first ==
'\'')
8110 _c4dbgp(
"runk: single-quoted scalar");
8111 _handle_unk_begin_doc();
8112 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8113 size_t col = m_evt_handler->m_curr->pos.col;
8114 ScannedScalar sc = _scan_scalar_squot();
8115 if(!_maybe_scan_following_colon())
8117 _c4dbgp(
"runk: set as val");
8118 _handle_annotations_before_blck_val_scalar();
8119 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8120 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8124 _c4dbgp(
"runk: start new block map, set single-quoted scalar as key");
8125 if(C4_UNLIKELY(m_evt_handler->m_curr->pos.line > startline))
8128 startindent = _handle_unk_check_left_tokens(startindent, col);
8129 beginmap(startindent);
8130 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8131 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8132 after_beginmap(startindent);
8135 else if(first ==
'"')
8137 _c4dbgp(
"runk: double-quoted scalar");
8138 _handle_unk_begin_doc();
8139 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8140 size_t col = m_evt_handler->m_curr->pos.col;
8141 ScannedScalar sc = _scan_scalar_dquot();
8142 if(!_maybe_scan_following_colon())
8144 _c4dbgp(
"runk: set as val");
8145 _handle_annotations_before_blck_val_scalar();
8146 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8147 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8151 _c4dbgp(
"runk: start new block map, set double-quoted scalar as key");
8152 if(C4_UNLIKELY(m_evt_handler->m_curr->pos.line > startline))
8155 startindent = _handle_unk_check_left_tokens(startindent, col);
8156 beginmap(startindent);
8157 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8158 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8159 after_beginmap(startindent);
8164 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8165 size_t col = m_evt_handler->m_curr->pos.col;
8167 if(_scan_scalar_plain_unk(&sc))
8169 _c4dbgp(
"runk: plain scalar");
8170 _handle_unk_begin_doc();
8171 if(!_maybe_scan_following_colon())
8173 _c4dbgp(
"runk: set as val");
8174 _handle_annotations_before_blck_val_scalar();
8175 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8176 m_evt_handler->set_val_scalar_plain(maybe_filtered);
8180 _c4dbgp(
"runk: start new block map, set plain scalar as key");
8185 _c4assert(m_evt_handler->m_curr->pos.line == startline);
8187 startindent = _handle_unk_check_left_tokens(startindent, col);
8188 beginmap(startindent);
8189 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8190 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8191 after_beginmap(startindent);
8201 if(m_bom_len && has_none(
RUNK))
8203 _c4dbgpf(
"runk: BOMlen={} BOMline={} now={} at_end={}", m_bom_len, m_bom_line, m_evt_handler->m_curr->pos.line, !m_evt_handler->m_curr->line_contents.rem.len);
8204 if(m_evt_handler->m_curr->pos.line != m_bom_line || !m_evt_handler->m_curr->line_contents.rem.len)
8206 _c4dbgp(
"runk: clear BOMlen");
8212template<
class EventHandler>
8213void ParseEngine<EventHandler>::_handle_unk_begin_doc()
8215 _c4dbgp(
"runk: begin doc");
8216 _check_trailing_doc_token();
8219 m_doc_empty =
false;
8222template<
class EventHandler>
8223size_t ParseEngine<EventHandler>::_handle_unk_check_left_tokens(
size_t realindent,
size_t col,
bool skip_annotations)
8228 csubstr s = m_evt_handler->m_curr->line_contents.full.
range(m_bom_len, col);
8230 _c4dbgpf(
"runk: check left tokens: s={}", _prs(s,
true));
8231 if(skip_annotations)
8233 _handle_unk_get_first_non_pending_token_pos(s, &realindent, &pos);
8234 _c4dbgpf(
"runk: skip annotations: realindent={} pos={}", realindent, pos);
8236 size_t firstns = s.first_not_of(
' ', pos);
8239 _c4dbgpf(
"runk: check left tokens:\n"
8240 " tokens={} skipped={}\n"
8241 " bomlen={} first={} col={}\n"
8242 " (bomlen+first)={} vs {}=col\n"
8243 " startindent={} lineindent={}"
8244 , _prs(s,
true), _prs(s.sub(firstns),
true)
8245 , m_bom_len, firstns, col
8246 , m_bom_len+firstns, col,
8247 realindent, m_evt_handler->m_curr->line_contents.indentation);
8248 if(m_bom_len + firstns != col)
8250 if(!skip_annotations)
8251 realindent = firstns;
8252 _c4dbgpf(
"runk: pos={} firstns={} -> realindent={}", pos, firstns, realindent);
8258template<
class EventHandler>
8259void ParseEngine<EventHandler>::_handle_unk_get_first_non_pending_token_pos(
csubstr s,
size_t *indent,
size_t *first_non_token_pos)
8262 uint32_t total = _get_annotations_same_line(s, &first, &second);
8263 _c4dbgpf(
"runk: before skip: {}", _prs(s,
true));
8264 size_t pos = s.first_not_of(
" \t");
8269 *indent = *first_non_token_pos = pos;
8272 _c4assert(!s.sub(pos).begins_with_any(
" \t"));
8273 _c4dbgpf(
"runk: after skip leading {} whitespace: {}", pos, _prs(s.sub(pos),
true));
8274 _c4dbgpf(
"runk: first annotation: {}", first);
8278 _c4assert(s.sub(pos).begins_with(first));
8281 _c4dbgpf(
"runk: after skip first annotation: pos={} {}", pos, _prs(s.sub(pos),
true));
8284 _c4dbgpf(
"runk: second annotation: {}", second);
8292 _c4dbgpf(
"runk: next nonspace: {}", pos + more);
8294 _c4dbgpf(
"runk: after skip annotation whitespace: pos={} {}", pos, _prs(s.sub(pos),
true));
8295 _c4assert(s.sub(pos).begins_with(second));
8297 _c4dbgpf(
"runk: after skip annotation 2: pos={} {}", pos, _prs(s.sub(pos),
true));
8299 *first_non_token_pos = pos;
8303template<
class EventHandler>
8304uint32_t ParseEngine<EventHandler>::_get_annotations_same_line(
csubstr token_soup,
csubstr *first_,
csubstr *second_)
const
8306 _c4assert(!m_evt_handler->m_curr->at_first_token());
8308 using EntryPtr =
typename Annotation::Entry
const* C4_RESTRICT;
8309 EntryPtr first =
nullptr;
8310 EntryPtr second =
nullptr;
8311 uint32_t total = (uint32_t)(m_pending_anchors.num_entries + m_pending_tags.num_entries);
8314 _c4dbgpf(
"there are {} pending annotations: {} anchors + {} tags", total, m_pending_anchors.num_entries, m_pending_tags.num_entries);
8315 auto valid_if_same_line = [
this](EntryPtr entry){
8316 _c4dbgpf(
"pending: {} indent={} line={} vs currline={}", _maybe_null_str(entry->str), entry->indentation, entry->line, m_evt_handler->m_curr->pos.line);
8317 return (entry->line == m_evt_handler->m_curr->pos.line) ? entry :
nullptr;
8321 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8322 total += !!valid_if_same_line(&m_pending_anchors.annotations[i]);
8323 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
8324 total += !!valid_if_same_line(&m_pending_tags.annotations[i]);
8325 _c4dbgpf(
"{} annotations on same line", total);
8330 auto get_first_on_same_line = [
this](EntryPtr not_this_one){
8331 for(
size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8332 if(&m_pending_anchors.annotations[i] != not_this_one
8333 && m_pending_anchors.annotations[i].line == m_evt_handler->m_curr->pos.line)
8334 return &m_pending_anchors.annotations[i];
8335 for(
size_t i = 0; i < m_pending_tags.num_entries; ++i)
8336 if(&m_pending_tags.annotations[i] != not_this_one
8337 && m_pending_tags.annotations[i].line == m_evt_handler->m_curr->pos.line)
8338 return &m_pending_tags.annotations[i];
8340 return (EntryPtr)
nullptr;
8344 first = get_first_on_same_line(
nullptr);
8346 _c4dbgpf(
"first annotation: {} indent={} line={}", _maybe_null_str(first->str), first->indentation, first->line);
8351 second = get_first_on_same_line(first);
8353 _c4dbgpf(
"second annotation: {} indent={} line={}", _maybe_null_str(second->str), second->indentation, second->line);
8355 auto extract_string = [&](EntryPtr e){
8357 if(!e->str.str || e->str.begins_with_any(
"!<"))
8363 _c4dbgpf(
"tag: {} -> {}", _maybe_null_str(e->str), tag);
8371 _c4assert(anchor.str - token_soup.str > 0);
8376 _c4dbgpf(
"anchor: {} -> {}", e->str, anchor);
8379 *first_ = first ? extract_string(first) : nullptr;
8380 *second_ = second ? extract_string(second) : nullptr;
8381 if(total > 1 && (first_->str > second_->str))
8386 _c4dbgpf(
"swap first and second: {} -> {}", *first_, *second_);
8395template<
class EventHandler>
8396C4_COLD
void ParseEngine<EventHandler>::_handle_usty()
8398 _c4dbgpf(
"handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
8400 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(
RBLCK|
RFLOW), m_evt_handler->m_curr->pos);
8402 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
8405 _c4dbgp(
"usty[RNXT]: finishing!");
8410 _maybe_skip_comment();
8411 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
8418 pos = pos !=
npos ? pos : rem.len;
8419 _c4dbgpf(
"skipping indentation of {}", pos);
8420 _line_progressed(pos);
8421 rem = m_evt_handler->m_curr->line_contents.rem;
8424 _c4dbgpf(
"rem is now {}", _prs(rem));
8427 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, rem.len > 0, m_evt_handler->m_curr->pos);
8428 size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
8429 char first = rem.str[0];
8432 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RMAP), m_evt_handler->m_curr->pos);
8433 _c4dbgpf(
"usty[RSEQ]: first='{}'", _c4prc(first));
8436 _c4dbgp(
"usty[RSEQ]: it's a flow seq. merging it");
8438 m_evt_handler->_push();
8440 _set_indentation(startindent);
8441 _line_progressed(1);
8442 _maybe_skip_whitespace_tokens();
8444 else if(first ==
'-' && _is_blck_token(rem))
8446 _c4dbgp(
"usty[RSEQ]: it's a block seq. merging it");
8448 m_evt_handler->_push();
8450 _set_indentation(startindent);
8451 _line_progressed(1);
8452 _maybe_skip_whitespace_tokens();
8456 _c4err(
"can only parse a seq into an existing seq");
8459 else if(has_any(
RMAP))
8461 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ), m_evt_handler->m_curr->pos);
8462 _c4dbgpf(
"usty[RMAP]: first='{}'", _c4prc(first));
8465 _c4dbgp(
"usty[RMAP]: it's a flow map. merging it");
8467 _handle_annotations_before_blck_val_scalar();
8468 m_evt_handler->_push();
8470 _set_indentation(startindent);
8471 _line_progressed(1);
8472 _maybe_skip_whitespace_tokens();
8474 else if(first ==
'?' && _is_blck_token(rem))
8476 _c4dbgp(
"usty[RMAP]: it's a block map + this key is complex");
8478 _handle_annotations_before_blck_val_scalar();
8479 m_evt_handler->_push();
8481 _save_indentation();
8482 _line_progressed(1);
8483 _maybe_skip_whitespace_tokens();
8485 else if(first ==
':' && _is_blck_token(rem))
8487 _c4dbgp(
"usty[RMAP]: it's a map with an empty key");
8489 _handle_annotations_before_blck_val_scalar();
8490 m_evt_handler->_push();
8491 m_evt_handler->set_key_scalar_plain_empty();
8493 _save_indentation();
8494 _line_progressed(1);
8495 _maybe_skip_whitespace_tokens();
8497 else if(rem.begins_with(
'&'))
8499 csubstr anchor = _scan_anchor();
8500 _c4dbgpf(
"usty[RMAP]: anchor! {}", _prs(anchor));
8501 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8502 const size_t line = m_evt_handler->m_curr->pos.line;
8503 _add_annotation(&m_pending_anchors, anchor, indentation, line);
8504 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8506 else if(first ==
'*')
8508 csubstr ref = _scan_ref_map();
8509 _c4dbgpf(
"usty[RMAP]: ref! {}", _prs(ref));
8510 if(!_maybe_scan_following_colon())
8512 _c4err(
"cannot read a VAL to a map");
8516 _c4dbgp(
"usty[RMAP]: start new block map, set ref as key");
8517 const size_t startline = m_evt_handler->m_curr->pos.line;
8519 _handle_annotations_before_start_mapblck(startline);
8520 m_evt_handler->_push();
8521 _handle_keyref(ref);
8522 _maybe_skip_whitespace_tokens();
8523 _set_indentation(startindent);
8527 else if(first ==
'!')
8530 _c4dbgpf(
"usty[RMAP]: val tag! {}", _prs(tag));
8533 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8534 const size_t line = m_evt_handler->m_curr->pos.line;
8535 _add_annotation(&m_pending_tags, tag, indentation, line);
8537 else if(first ==
'[' || (first ==
'-' && _is_blck_token(rem)))
8539 _c4err(
"cannot parse a seq into an existing map");
8543 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
8544 startindent = m_evt_handler->m_curr->line_contents.indentation;
8545 const size_t startline = m_evt_handler->m_curr->pos.line;
8547 _c4dbgpf(
"usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
8550 _c4dbgp(
"usty[RMAP]: scanning single-quoted scalar");
8551 sc = _scan_scalar_squot();
8552 if(!_maybe_scan_following_colon())
8554 _c4err(
"cannot read a VAL to a map");
8558 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
8560 _handle_annotations_before_start_mapblck(startline);
8561 m_evt_handler->_push();
8562 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8563 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8564 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8565 _set_indentation(startindent);
8567 _maybe_skip_whitespace_tokens();
8570 else if(first ==
'"')
8572 _c4dbgp(
"usty[RMAP]: scanning double-quoted scalar");
8573 sc = _scan_scalar_dquot();
8574 if(!_maybe_scan_following_colon())
8576 _c4err(
"cannot read a VAL to a map");
8580 _c4dbgp(
"usty[RMAP]: start new block map, set double-quoted scalar as key");
8582 _handle_annotations_before_start_mapblck(startline);
8583 m_evt_handler->_push();
8584 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8585 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8586 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8587 _set_indentation(startindent);
8589 _maybe_skip_whitespace_tokens();
8592 else if(first ==
'|')
8594 _c4err(
"block literal keys must be enclosed in '?'");
8596 else if(first ==
'>')
8598 _c4err(
"block literal keys must be enclosed in '?'");
8600 else if(_scan_scalar_plain_unk(&sc))
8602 _c4dbgp(
"usty[RMAP]: got a plain scalar");
8603 if(!_maybe_scan_following_colon())
8605 _c4err(
"cannot read a VAL to a map");
8609 _c4dbgp(
"usty[RMAP]: start new block map, set scalar as key");
8611 _handle_annotations_before_start_mapblck(startline);
8612 m_evt_handler->_push();
8613 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8614 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8615 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8616 _set_indentation(startindent);
8618 _maybe_skip_whitespace_tokens();
8629 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
RSEQ), m_evt_handler->m_curr->pos);
8630 _c4dbgpf(
"usty[UNK]: first='{}'", _c4prc(first));
8633 _c4dbgp(
"usty[UNK]: it's a flow seq");
8635 _handle_annotations_before_blck_val_scalar();
8636 m_evt_handler->begin_seq_val_flow();
8638 _set_indentation(startindent);
8639 _line_progressed(1);
8640 _maybe_skip_whitespace_tokens();
8642 else if(first ==
'-' && _is_blck_token(rem))
8644 _c4dbgp(
"usty[UNK]: it's a block seq");
8646 _handle_annotations_before_blck_val_scalar();
8647 m_evt_handler->begin_seq_val_block();
8649 _set_indentation(startindent);
8650 _line_progressed(1);
8651 _maybe_skip_whitespace_tokens();
8653 else if(first ==
'{')
8655 _c4dbgp(
"usty[UNK]: it's a flow map");
8657 _handle_annotations_before_blck_val_scalar();
8658 m_evt_handler->begin_map_val_flow();
8660 _set_indentation(startindent);
8661 _line_progressed(1);
8662 _maybe_skip_whitespace_tokens();
8664 else if(first ==
'?' && _is_blck_token(rem))
8666 _c4dbgp(
"usty[UNK]: it's a map + this key is complex");
8668 _handle_annotations_before_blck_val_scalar();
8669 m_evt_handler->begin_map_val_block();
8671 _save_indentation();
8672 _line_progressed(1);
8673 _maybe_skip_whitespace_tokens();
8675 else if(first ==
':' && _is_blck_token(rem))
8677 _c4dbgp(
"usty[UNK]: it's a map with an empty key");
8679 _handle_annotations_before_blck_val_scalar();
8680 m_evt_handler->begin_map_val_block();
8681 m_evt_handler->set_key_scalar_plain_empty();
8683 _save_indentation();
8684 _line_progressed(1);
8685 _maybe_skip_whitespace_tokens();
8687 else if(first ==
'&')
8689 csubstr anchor = _scan_anchor();
8690 _c4dbgpf(
"usty[UNK]: anchor! {}", _prs(anchor));
8691 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8692 const size_t line = m_evt_handler->m_curr->pos.line;
8693 _add_annotation(&m_pending_anchors, anchor, indentation, line);
8694 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8696 else if(first ==
'*')
8698 csubstr ref = _scan_ref_map();
8699 _c4dbgpf(
"usty[UNK]: ref! {}", _prs(ref));
8700 if(!_maybe_scan_following_colon())
8702 _c4dbgp(
"usty[UNK]: set val ref");
8703 _handle_valref(ref);
8707 _c4dbgp(
"usty[UNK]: start new block map, set ref as key");
8708 const size_t startline = m_evt_handler->m_curr->pos.line;
8710 _handle_annotations_before_start_mapblck(startline);
8711 m_evt_handler->begin_map_val_block();
8712 _handle_keyref(ref);
8713 _maybe_skip_whitespace_tokens();
8714 _set_indentation(startindent);
8718 else if(first ==
'!')
8721 _c4dbgpf(
"usty[UNK]: val tag! {}", _prs(tag));
8724 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8725 const size_t line = m_evt_handler->m_curr->pos.line;
8726 _add_annotation(&m_pending_tags, tag, indentation, line);
8730 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(
SSCL), m_evt_handler->m_curr->pos);
8731 startindent = m_evt_handler->m_curr->line_contents.indentation;
8732 const size_t startline = m_evt_handler->m_curr->pos.line;
8735 _c4dbgpf(
"usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
8738 _c4dbgp(
"usty[UNK]: scanning single-quoted scalar");
8739 sc = _scan_scalar_squot();
8740 if(!_maybe_scan_following_colon())
8742 _c4dbgp(
"usty[UNK]: set as val");
8743 _handle_annotations_before_blck_val_scalar();
8744 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8745 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8750 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
8752 _handle_annotations_before_start_mapblck(startline);
8753 m_evt_handler->begin_map_val_block();
8754 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8755 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8756 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8757 _set_indentation(startindent);
8759 _maybe_skip_whitespace_tokens();
8762 else if(first ==
'"')
8764 _c4dbgp(
"usty[UNK]: scanning double-quoted scalar");
8765 sc = _scan_scalar_dquot();
8766 if(!_maybe_scan_following_colon())
8768 _c4dbgp(
"usty[UNK]: set as val");
8769 _handle_annotations_before_blck_val_scalar();
8770 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8771 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8776 _c4dbgp(
"usty[UNK]: start new block map, set double-quoted scalar as key");
8778 _handle_annotations_before_start_mapblck(startline);
8779 m_evt_handler->begin_map_val_block();
8780 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8781 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8782 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8783 _set_indentation(startindent);
8785 _maybe_skip_whitespace_tokens();
8788 else if(first ==
'|')
8790 _c4dbgp(
"usty[UNK]: scanning block-literal scalar");
8792 _scan_block(&sb, startindent);
8793 _c4dbgp(
"usty[UNK]: set as val");
8794 _handle_annotations_before_blck_val_scalar();
8795 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8796 m_evt_handler->set_val_scalar_literal(maybe_filtered);
8799 else if(first ==
'>')
8801 _c4dbgp(
"usty[UNK]: scanning block-folded scalar");
8803 _scan_block(&sb, startindent);
8804 _c4dbgp(
"usty[UNK]: set as val");
8805 _handle_annotations_before_blck_val_scalar();
8806 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8807 m_evt_handler->set_val_scalar_folded(maybe_filtered);
8810 else if(_scan_scalar_plain_unk(&sc))
8812 _c4dbgp(
"usty[UNK]: got a plain scalar");
8813 if(!_maybe_scan_following_colon())
8815 _c4dbgp(
"usty[UNK]: set as val");
8816 _handle_annotations_before_blck_val_scalar();
8817 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8818 m_evt_handler->set_val_scalar_plain(maybe_filtered);
8823 _c4dbgp(
"usty[UNK]: start new block map, set scalar as key");
8825 _handle_annotations_before_start_mapblck(startline);
8826 m_evt_handler->begin_map_val_block();
8827 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8828 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8829 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8830 _set_indentation(startindent);
8832 _maybe_skip_whitespace_tokens();
8846template<
class EventHandler>
8849 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8851 m_evt_handler->start_parse(filename.str, src);
8852 m_evt_handler->begin_stream();
8854 while( ! _finished_file())
8857 while( ! _finished_line())
8860 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8865 else if(has_any(
RMAP))
8869 else if(has_any(
RUNK))
8875 _c4err(
"internal error");
8878 if(_finished_file())
8883 m_evt_handler->finish_parse();
8889template<
class EventHandler>
8892 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8894 m_evt_handler->start_parse(filename.str, src);
8895 m_evt_handler->begin_stream();
8897 while( ! _finished_file())
8900 while( ! _finished_line())
8903 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8914 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
8923 else if(has_any(
RBLCK))
8927 _handle_seq_block();
8931 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(
RMAP), m_evt_handler->m_curr->pos);
8932 _handle_map_block();
8935 else if(has_any(
RUNK))
8939 else if(has_any(
USTY))
8945 _c4err(
"internal error");
8948 if(_finished_file())
8953 m_evt_handler->finish_parse();
8962#undef _c4dbgnextline
8966C4_SUPPRESS_WARNING_MSVC_POP
8967C4_SUPPRESS_WARNING_GCC_CLANG_POP
Lightweight generic type-safe wrappers for converting individual values to/from strings.
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
ParseEngine(EventHandler *evt_handler, ParserOptions const &opts={})
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&) noexcept
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
#define RYML_NOEXCEPT
Conditionally expands to noexcept when RYML_USE_ASSERT is 0 and is empty otherwise.
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
void err_parse(ErrorDataParse const &errdata, const char *msg)
trigger a parse error to its respective handler, with a non-formatted error message.
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
basic_substring< char > substr
a mutable string view
basic_substring< const char > csubstr
an immutable string view
bool is_valid_tag_handle(csubstr handle)
bool is_custom_tag(csubstr tag)
is a tag of the form !handle!tag?
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
@ npos
a null string position
int ParserFlag_t
data type for ParserState_e
@ RTOP
reading at top level
@ RSET
the (implicit) map being read is a !!set.
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ QSCL
stored scalar was quoted
@ RBLCK
reading in block mode
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RFLOW
reading is inside explicit flow chars: [] or {}
size_t adjust_pos_with_escapes(csubstr scalar, size_t pos, bool keep_newlines=false)
Adjust a position in a scalar, increasing it to account for any escaped characters.
size_t to_chars(substr buf, escaped_scalar e)
formatting implementation to escape a scalar with escape_scalar()
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
@ UTF16BE
UTF16, Big-Endian.
@ UTF16LE
UTF16, Little-Endian.
@ NOBOM
No Byte Order Mark was found.
@ UTF32BE
UTF32, Big-Endian.
@ UTF32LE
UTF32, Little-Endian.
enum c4::yml::Encoding_ Encoding_e
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
#define _ryml_relocate(s)
#define _RYML_SAVE_TEST_YAML(filename, src)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _RYML_WITH_TAB_TOKENS(...)
#define _RYML_SAVE_TEST_JSON(filename, src)
basic_substring range(size_t first, size_t last=npos) const noexcept
return [first,last[.
size_t first_not_of(const C c) const
basic_substring triml(const C c) const
trim left
size_t first_of(const C c, size_t start=0) const
basic_substring first(size_t num) const noexcept
return the first num elements: [0,num[
basic_substring sub(size_t first) const noexcept
return [first,len[
basic_substring trimr(const C c) const
trim the character c from the right
C * str
a restricted pointer to the first character of the substring
Filters an input string into a different output string.
Abstracts the fact that a scalar filter result may not fit in the intended memory.
Abstracts the fact that a scalar filter result may not fit in the intended memory.
Helper to control the line contents while parsing a buffer.
holds a source or yaml file position, for example when an error is detected; See also location_format...
csubstr name
name of the file
Options to give to the ParseEngine to control its behavior.
Accelerator structure to reduce memory requirements by enabling reuse of resolved tags.
formatting helper to escape a scalar with escape_scalar_fn()
utilities for UTF and Byte Order Mark