rapidyaml 0.15.2
parse and emit YAML, and do it fast
Loading...
Searching...
No Matches
parse_engine.def.hpp
Go to the documentation of this file.
1#ifndef C4_YML_PARSE_ENGINE_DEF_HPP_
2#define C4_YML_PARSE_ENGINE_DEF_HPP_
3
4#ifndef C4_YML_PARSE_ENGINE_HPP_
6#endif
7#ifndef C4_CHARCONV_HPP_
8#include "c4/charconv.hpp"
9#endif
10#ifndef C4_UTF_HPP_
11#include "c4/utf.hpp"
12#endif
13#ifndef C4_YML_FILTER_PROCESSOR_HPP_
15#endif
16#ifndef C4_YML_TAG_HPP_
17#include "c4/yml/tag.hpp"
18#endif
19#ifndef C4_YML_NODE_TYPE_HPP_
20#include "c4/yml/node_type.hpp"
21#endif
22
23#ifndef C4_YML_DETAIL_DBGPRINT_HPP_
24#include "c4/yml/detail/dbgprint.hpp"
25#endif
26
27#ifdef RYML_DBG
28#ifndef C4_DUMP_HPP_
29#include <c4/dump.hpp>
30#endif
31#define _c4err(...) \
32 do { RYML_DEBUG_BREAK(); this->_err(RYML_LOC_HERE(), __VA_ARGS__); } while(0)
33#else
34#define _c4err(...) \
35 this->_err(RYML_LOC_HERE(), __VA_ARGS__)
36#endif
37#define _c4assert(...) \
38 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, __VA_ARGS__, m_evt_handler->m_curr->pos)
39
40
41#if defined(RYML_WITH_TAB_TOKENS)
42#define RYML_WITH_TAB_TOKENS_(...) __VA_ARGS__
43#define RYML_WITHOUT_TAB_TOKENS_(...)
44#define RYML_WITH_OR_WITHOUT_TAB_TOKENS_(with, without) with
45#else
46#define RYML_WITH_TAB_TOKENS_(...)
47#define RYML_WITHOUT_TAB_TOKENS_(...) __VA_ARGS__
48#define RYML_WITH_OR_WITHOUT_TAB_TOKENS_(with, without) without
49#endif
50
51// helper to export cases to the YAML test suite
52#ifndef RYML_SAVE_TEST_YAML
53#define RYML_SAVE_TEST_YAML_(filename, src)
54#define RYML_SAVE_TEST_JSON_(filename, src)
55#else
56#define RYML_SAVE_TEST_YAML_(filename, src) c4::yml::ryml_save_test_yaml(filename, src)
57#define RYML_SAVE_TEST_JSON_(filename, src) c4::yml::ryml_save_test_json(filename, src)
58namespace c4 {
59namespace yml {
60void ryml_save_test_yaml(csubstr filename, csubstr src);
61void ryml_save_test_json(csubstr filename, csubstr src);
62} // namespace yml
63} // namespace c4
64#endif
65
66
67// scaffold:
68#define _c4dbgnextline() \
69 do { \
70 _c4dbgq("\n-----------"); \
71 _c4dbgt("handling line={}, offset={}B", \
72 m_evt_handler->m_curr->pos.line, \
73 m_evt_handler->m_curr->pos.offset); \
74 } while(0)
75
76
77C4_SUPPRESS_WARNING_MSVC_PUSH
78C4_SUPPRESS_WARNING_MSVC(4296) // expression is always 'boolean_value'
79C4_SUPPRESS_WARNING_MSVC(4702) // unreachable code
80C4_SUPPRESS_WARNING_GCC_CLANG_PUSH
81C4_SUPPRESS_WARNING_GCC_CLANG("-Wtype-limits") // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
82C4_SUPPRESS_WARNING_GCC_CLANG("-Wformat-nonliteral")
83C4_SUPPRESS_WARNING_GCC_CLANG("-Wold-style-cast")
84#if defined(__GNUC__) && (__GNUC__ >= 6)
85C4_SUPPRESS_WARNING_GCC("-Wnull-dereference")
86#endif
87#if defined(__GNUC__) && (__GNUC__ >= 7)
88C4_SUPPRESS_WARNING_GCC("-Wduplicated-branches")
89#endif
90
91// NOLINTBEGIN(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered,modernize-avoid-c-style-cast)
92
93namespace c4 {
94namespace yml {
95
96namespace { // NOLINT
97
98C4_HOT C4_ALWAYS_INLINE void _set_first(substr &C4_RESTRICT subject, size_t pos) noexcept
99{
100 // avoids reassigning the ptr in substr
101 subject.len = pos != npos ? pos : subject.len;
102}
103C4_HOT C4_ALWAYS_INLINE void _set_first(csubstr &C4_RESTRICT subject, size_t pos) noexcept
104{
105 // avoids reassigning the ptr in substr
106 subject.len = pos != npos ? pos : subject.len;
107}
108C4_HOT C4_ALWAYS_INLINE void _set_first_strict(substr &C4_RESTRICT subject, size_t pos) RYML_NOEXCEPT
109{
110 // avoids reassigning the ptr in substr
111 RYML_ASSERT_BASIC_(pos != npos); // LCOV_EXCL_LINE
112 subject.len = pos;
113}
114C4_HOT C4_ALWAYS_INLINE void _set_first_strict(csubstr &C4_RESTRICT subject, size_t pos) RYML_NOEXCEPT
115{
116 // avoids reassigning the ptr in substr
117 RYML_ASSERT_BASIC_(pos != npos); // LCOV_EXCL_LINE
118 subject.len = pos;
119}
120
121C4_HOT C4_ALWAYS_INLINE bool _is_blck_token(csubstr s) RYML_NOEXCEPT
122{
123 RYML_ASSERT_BASIC_(s.len > 0);
124 RYML_ASSERT_BASIC_(s.str[0] == '-' || s.str[0] == ':' || s.str[0] == '?');
125 return ((s.len == 1) || ((s.str[1] == ' ') RYML_WITH_TAB_TOKENS_( || (s.str[1] == '\t'))));
126}
127
128C4_HOT C4_ALWAYS_INLINE bool _is_blck_seq_token_maybe(csubstr const& C4_RESTRICT s) noexcept
129{
130 return ((s.len >= 1) && (s.str[0] == '-') && ((s.len == 1) || ((s.str[1] == ' ') RYML_WITH_TAB_TOKENS_( || (s.str[1] == '\t')))));
131}
132
133inline bool _is_doc_begin_token(csubstr s) RYML_NOEXCEPT
134{
135 RYML_ASSERT_BASIC_(s.begins_with('-'));
136 RYML_ASSERT_BASIC_(!s.ends_with("\n"));
137 RYML_ASSERT_BASIC_(!s.ends_with("\r"));
138 return (s.len >= 3 && s.str[1] == '-' && s.str[2] == '-')
139 && (s.len == 3 || (s.str[3] == ' ' RYML_WITH_TAB_TOKENS_(|| s.str[3] == '\t')));
140}
141
142inline bool _is_doc_end_token(csubstr s) RYML_NOEXCEPT
143{
144 RYML_ASSERT_BASIC_(s.begins_with('.'));
145 RYML_ASSERT_BASIC_(!s.ends_with("\n"));
146 RYML_ASSERT_BASIC_(!s.ends_with("\r"));
147 return (s.len >= 3 && s.str[1] == '.' && s.str[2] == '.')
148 && (s.len == 3 || (s.str[3] == ' ' RYML_WITH_TAB_TOKENS_(|| s.str[3] == '\t')));
149}
150
151inline bool _is_doc_token(csubstr s) noexcept
152{
153 if(s.len >= 3)
154 {
155 switch(s.str[0])
156 {
157 case '-':
158 //return _is_doc_begin_token(s); // this was failing with gcc -O2
159 return (s.str[1] == '-' && s.str[2] == '-')
160 && (s.len == 3 || (s.str[3] == ' ' RYML_WITH_TAB_TOKENS_(|| s.str[3] == '\t')));
161 case '.':
162 //return _is_doc_end_token(s); // this was failing with gcc -O2
163 return (s.str[1] == '.' && s.str[2] == '.')
164 && (s.len == 3 || (s.str[3] == ' ' RYML_WITH_TAB_TOKENS_(|| s.str[3] == '\t')));
165 }
166 }
167 return false;
168}
169
170inline size_t _begins_with_special_json_scalar(csubstr s) RYML_NOEXCEPT
171{
172 RYML_ASSERT_BASIC_(s.len);
173 switch(s.str[0])
174 {
175 case 'f':
176 return s.begins_with("false") ? 5u : 0u;
177 case 't':
178 return s.begins_with("true") ? 4u : 0u;
179 case 'n':
180 return s.begins_with("null") ? 4u : 0u;
181 }
182 return 0u;
183}
184
185
186//-----------------------------------------------------------------------------
187
188C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following)
189{
190 return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n');
191}
192
193//! look for the next newline chars, and jump to the right of those
194inline substr _from_next_line(substr rem)
195{
196 size_t nlpos = rem.first_of("\r\n");
197 if(nlpos == csubstr::npos)
198 return {};
199 const char nl = rem[nlpos];
200 rem = rem.right_of(nlpos);
201 if(rem.empty())
202 return {};
203 if(_extend_from_combined_newline(nl, rem.front()))
204 rem = rem.sub(1);
205 return rem;
206}
207
208
209//-----------------------------------------------------------------------------
210
211inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i)
212{
213 RYML_ASSERT_BASIC_(r[*i] == '\n');
214 size_t numnl_following = 0;
215 ++(*i);
216 for( ; *i < r.len; ++(*i))
217 {
218 if(r.str[*i] == '\n')
219 ++numnl_following;
220 // skip leading whitespace
221 else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
222 ;
223 else
224 break;
225 }
226 return numnl_following;
227}
228
229/** @p i is set to the first non whitespace character after the line
230 * @return the number of empty lines after the initial position */
231inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation)
232{
233 RYML_ASSERT_BASIC_(r[*i] == '\n');
234 size_t numnl_following = 0;
235 ++(*i);
236 if(indentation == 0)
237 {
238 for( ; *i < r.len; ++(*i))
239 {
240 const char c = r.str[*i];
241 if(c == '\n')
242 ++numnl_following;
243 // skip leading whitespace
244 else if(c != ' ' && c != '\t' && c != '\r')
245 break;
246 }
247 }
248 else
249 {
250 for( ; *i < r.len; ++(*i))
251 {
252 char c = r.str[*i];
253 if(c == '\n')
254 {
255 ++numnl_following;
256 // skip the indentation after the newline
257 size_t stop = *i + indentation;
258 for( ; *i < r.len; ++(*i))
259 {
260 c = r.str[*i];
261 if(c != ' ' && c != '\r')
262 break;
263 RYML_ASSERT_BASIC_(*i < stop); // LCOV_EXCL_LINE
264 }
265 C4_UNUSED(stop);
266 }
267 // skip leading whitespace
268 else if(c != ' ' && c != '\t' && c != '\r')
269 {
270 break;
271 }
272 }
273 }
274 return numnl_following;
275}
276
277} // anon namespace
278
279
280//-----------------------------------------------------------------------------
281//-----------------------------------------------------------------------------
282//-----------------------------------------------------------------------------
283
284template<class EventHandler>
286{
287 _free();
288 _clr();
289}
290
291template<class EventHandler>
292ParseEngine<EventHandler>::ParseEngine(EventHandler *evt_handler, ParserOptions const& opts)
293 : m_options(opts)
294 , m_evt_handler(evt_handler)
295 , m_pending_anchors()
296 , m_pending_tags()
297 , m_has_directives_yaml(false)
298 , m_has_directives(false)
299 , m_doc_empty(true)
300 , m_prev_colon(npos)
301 , m_prev_val_end(npos)
302 , m_encoding(NOBOM)
303 , m_newline_offsets()
304 , m_newline_offsets_size(0)
305 , m_newline_offsets_capacity(0)
306{
307 RYML_CHECK_BASIC_(evt_handler);
308}
309
310template<class EventHandler>
312 : m_options(that.m_options)
313 , m_evt_handler(that.m_evt_handler)
314 , m_pending_anchors(that.m_pending_anchors)
315 , m_pending_tags(that.m_pending_tags)
316 , m_has_directives_yaml(that.m_has_directives_yaml)
317 , m_has_directives(that.m_has_directives)
318 , m_doc_empty(that.m_doc_empty)
319 , m_prev_colon(npos)
320 , m_prev_val_end(npos)
321 , m_encoding(NOBOM)
322 , m_newline_offsets(that.m_newline_offsets)
323 , m_newline_offsets_size(that.m_newline_offsets_size)
324 , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
325{
326 that._clr();
327}
328
329template<class EventHandler>
331 : m_options(that.m_options)
332 , m_evt_handler(that.m_evt_handler)
333 , m_pending_anchors(that.m_pending_anchors)
334 , m_pending_tags(that.m_pending_tags)
335 , m_has_directives_yaml(that.m_has_directives_yaml)
336 , m_has_directives(that.m_has_directives)
337 , m_doc_empty(that.m_doc_empty)
338 , m_prev_colon(npos)
339 , m_prev_val_end(npos)
340 , m_encoding(NOBOM)
341 , m_newline_offsets()
342 , m_newline_offsets_size()
343 , m_newline_offsets_capacity()
344{
345 if(that.m_newline_offsets_capacity)
346 {
347 _resize_locations(that.m_newline_offsets_capacity);
348 RYML_CHECK_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
349 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
350 m_newline_offsets_size = that.m_newline_offsets_size;
351 }
352}
353
354template<class EventHandler>
356{
357 _free();
358 m_options = (that.m_options);
359 m_evt_handler = that.m_evt_handler;
360 m_pending_anchors = that.m_pending_anchors;
361 m_pending_tags = that.m_pending_tags;
362 m_has_directives_yaml = that.m_has_directives_yaml;
363 m_has_directives = that.m_has_directives;
364 m_doc_empty = that.m_doc_empty;
365 m_prev_colon = that.m_prev_colon;
366 m_prev_val_end = that.m_prev_val_end;
367 m_encoding = that.m_encoding;
368 m_newline_offsets = (that.m_newline_offsets);
369 m_newline_offsets_size = (that.m_newline_offsets_size);
370 m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
371 that._clr();
372 return *this;
373}
374
375template<class EventHandler>
377{
378 if(&that != this)
379 {
380 _free();
381 m_options = (that.m_options);
382 m_evt_handler = that.m_evt_handler;
383 m_pending_anchors = that.m_pending_anchors;
384 m_pending_tags = that.m_pending_tags;
385 m_has_directives_yaml = that.m_has_directives_yaml;
386 m_has_directives = that.m_has_directives;
387 m_doc_empty = that.m_doc_empty;
388 m_prev_colon = that.m_prev_colon;
389 m_prev_val_end = that.m_prev_val_end;
390 m_encoding = that.m_encoding;
391 if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
392 _resize_locations(that.m_newline_offsets_capacity);
393 RYML_CHECK_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
394 RYML_CHECK_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
395 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
396 m_newline_offsets_size = that.m_newline_offsets_size;
397 }
398 return *this;
399}
400
401template<class EventHandler>
402void ParseEngine<EventHandler>::_clr()
403{
404 m_options = {};
405 m_evt_handler = {};
406 m_pending_anchors = {};
407 m_pending_tags = {};
408 m_has_directives_yaml = false;
409 m_has_directives = false;
410 m_doc_empty = true;
411 m_prev_colon = npos;
412 m_prev_val_end = npos;
413 m_encoding = NOBOM;
414 m_newline_offsets = {};
415 m_newline_offsets_size = {};
416 m_newline_offsets_capacity = {};
417}
418
419template<class EventHandler>
420void ParseEngine<EventHandler>::_free()
421{
422 if(m_newline_offsets)
423 {
424 RYML_CB_FREE_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
425 m_newline_offsets = nullptr;
426 m_newline_offsets_size = 0u;
427 m_newline_offsets_capacity = 0u;
428 }
429}
430
431
432//-----------------------------------------------------------------------------
433
434template<class EventHandler>
435void ParseEngine<EventHandler>::_reset()
436{
437 m_pending_anchors = {};
438 m_pending_tags = {};
439 m_has_directives_yaml = false;
440 m_has_directives = false;
441 m_doc_empty = true;
442 m_prev_colon = npos;
443 m_prev_val_end = npos;
444 m_bom_len = 0;
445 m_encoding = NOBOM;
446 m_bom_line = 0;
447 if(m_options.locations())
448 {
449 _prepare_locations();
450 }
451}
452
453
454//-----------------------------------------------------------------------------
455
456template<class EventHandler>
457void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena, substr *other)
458{
459 _c4dbgp("relocate to new arena");
460 const char *pb = prev_arena.str;
461 const char *pe = prev_arena.str + prev_arena.len;
462 #define _ryml_relocate(s) \
463 if((s).str >= pb && (s).str <= pe) \
464 { \
465 (s).str = next_arena.str + ((s).str - pb); \
466 } \
467 ((void)0)
468 for(ParserState &st : m_evt_handler->m_stack)
469 {
470 _ryml_relocate(st.line_contents.rem);
471 _ryml_relocate(st.line_contents.full);
472 }
473 _ryml_relocate(m_evt_handler->m_src);
474 for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
475 {
476 _ryml_relocate(m_pending_tags.annotations[i].str); // LCOV_EXCL_LINE
477 _ryml_relocate(m_pending_tags.annotations[i].orig); // LCOV_EXCL_LINE
478 }
479 for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
480 {
481 _ryml_relocate(m_pending_anchors.annotations[i].str);
482 _ryml_relocate(m_pending_anchors.annotations[i].orig);
483 }
484 {
485 TagDirectives &tds = m_evt_handler->tag_directives();
486 for(size_t i = 0, sz = tds.size(); i < sz; ++i)
487 {
488 _ryml_relocate(tds.m_directives[i].handle);
489 _ryml_relocate(tds.m_directives[i].prefix);
490 }
491 }
492 {
493 TagCache &tch = m_evt_handler->tag_cache();
494 for(id_type i = 0, sz = tch.m_entries.size(); i < sz; ++i)
495 {
496 _ryml_relocate(tch.m_entries[i].tag);
497 _ryml_relocate(tch.m_entries[i].resolved);
498 }
499 }
500 if(other)
501 {
502 _ryml_relocate(*other);
503 }
504 #undef _ryml_relocate
505}
506
507/** @cond dev */
508template<class EventHandler>
510{
511 csubstr prev = m_evt_handler->arena();
512 substr out = m_evt_handler->alloc_arena(len);
513 substr curr = m_evt_handler->arena();
514 if(curr.str != prev.str)
515 _relocate_arena(prev, curr, other);
516 return out;
517}
518/** @endcond */
519
520
521//-----------------------------------------------------------------------------
522
523#ifdef RYML_DBG
524template<class EventHandler>
525template<class DumpFn>
526C4_NO_INLINE void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn) const
527{
528 ParserState const *const C4_RESTRICT st = m_evt_handler->m_curr;
529 LineContents const& C4_RESTRICT lc = st->line_contents;
530 csubstr contents = lc.full.first(lc.num_cols);
531 if(contents.len)
532 {
533 // print the yaml src line
534 size_t offs = 3u + to_chars(substr{}, st->pos.line) + to_chars(substr{}, st->pos.col);
535 csubstr m_file = m_evt_handler->m_curr->pos.name;
536 if(m_file.len)
537 {
538 dbg_dump_(std::forward<DumpFn>(dumpfn), "{}:", m_file);
539 offs += m_file.len + 1;
540 }
541 dbg_dump_(std::forward<DumpFn>(dumpfn), "{}:{}: ", st->pos.line, st->pos.col);
542 csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
543 csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("..."));
544 dbg_dump_(std::forward<DumpFn>(dumpfn), "{}{} (size={})\n", escaped_scalar(maybe_full_content, /*escape*/true), maybe_ellipsis, contents.len);
545 // highlight the remaining portion of the previous line
546 size_t firstcol = (size_t)(lc.rem.str - lc.full.str);
547 size_t lastcol = firstcol + lc.rem.len;
548 size_t firstcol_adj = adjust_pos_with_escapes(lc.full, firstcol);
549 size_t len = adjust_pos_with_escapes(lc.rem, lc.rem.len);
550 for(size_t i = 0; i < offs + firstcol_adj; ++i)
551 std::forward<DumpFn>(dumpfn)(" ");
552 std::forward<DumpFn>(dumpfn)("^");
553 for(size_t i = 1, e = (len < 80u ? len : 80u); i < e; ++i)
554 std::forward<DumpFn>(dumpfn)("~");
555 dbg_dump_(std::forward<DumpFn>(dumpfn), "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
556 }
557 else
558 {
559 std::forward<DumpFn>(dumpfn)("\n");
560 }
561 // next line: print the state flags
562 {
563 char flagbuf_[128];
564 dbg_dump_(std::forward<DumpFn>(dumpfn), "top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
565 }
566}
567
568template<class EventHandler>
570{
571 if(dbg_enabled_())
572 {
573 for(ParserState const& s : m_evt_handler->m_stack)
574 dbg_printf_("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(buf, s.flags));
575 }
576}
577
578template<class EventHandler>
580{
581 char buf[128];
582 _print_state_stack(buf);
583}
584#endif
585
586
587//-----------------------------------------------------------------------------
588
589template<class EventHandler>
590template<class ...Args>
591C4_NORETURN C4_NO_INLINE void ParseEngine<EventHandler>::_err(Location const& cpploc, Location const& ymlloc, const char* fmt, Args const& ...args) const
592{
593 m_evt_handler->cancel_parse();
594 err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, ymlloc}, fmt, args...);
595}
596
597template<class EventHandler>
598template<class ...Args>
599C4_NORETURN C4_NO_INLINE void ParseEngine<EventHandler>::_err(Location const& cpploc, const char *fmt, Args const& ...args) const
600{
601 m_evt_handler->cancel_parse();
602 err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, m_evt_handler->m_curr->pos}, fmt, args...);
603}
604
605
606//-----------------------------------------------------------------------------
607#ifdef RYML_DBG
608template<class EventHandler>
609template<class ...Args>
610void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args const& ...args) const
611{
612 if(dbg_enabled_())
613 {
614 dbg_printf_(fmt, args...);
615 dbg_dumper_("\n");
616 _fmt_msg(dbg_dumper_);
617 }
618}
619#endif
620
621
622//-----------------------------------------------------------------------------
623template<class EventHandler>
624bool ParseEngine<EventHandler>::_finished_file() const
625{
626 bool ret = m_evt_handler->m_curr->pos.offset >= _buf().len;
627 #ifdef RYML_DBG
628 if(ret)
629 {
630 _c4dbgp("finished file!!!");
631 }
632 #endif
633 return ret;
634}
635
636template<class EventHandler>
637C4_HOT C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_finished_line() const // LCOV_EXCL_LINE
638{
639 return m_evt_handler->m_curr->line_contents.rem.empty();
640}
641
642
643//-----------------------------------------------------------------------------
644
645template<class EventHandler>
646void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
647{
648 if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' RYML_WITH_TAB_TOKENS_(|| m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')))
649 {
650 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(RYML_WITH_OR_WITHOUT_TAB_TOKENS_(" \t", ' '));
651 if(pos == npos)
652 pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just all whitespace
653 _c4dbgpf("skip {} whitespace characters", pos);
654 _line_progressed(pos);
655 }
656}
657
658template<class EventHandler>
659void ParseEngine<EventHandler>::_maybe_skipchars(char c)
660{
661 if(m_evt_handler->m_curr->line_contents.rem.len && m_evt_handler->m_curr->line_contents.rem.str[0] == c)
662 {
663 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(c);
664 if(pos == npos)
665 pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just all c
666 _c4dbgpf("skip {}x'{}'", pos, _c4prc(c));
667 _line_progressed(pos);
668 }
669}
670
671template<class EventHandler>
672template<size_t N>
673void ParseEngine<EventHandler>::_skipchars(const char (&chars)[N])
674{
675 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars), m_evt_handler->m_curr->pos);
676 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
677 if(pos == npos)
678 pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just whitespace
679 _c4dbgpf("skip {} characters", pos);
680 _line_progressed(pos);
681}
682
683template<class EventHandler>
684void ParseEngine<EventHandler>::_skip_comment()
685{
686 LineContents const& C4_RESTRICT lc = m_evt_handler->m_curr->line_contents;
687 const size_t col = m_evt_handler->m_curr->pos.col - 1u;
688 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, lc.rem.begins_with('#'), m_evt_handler->m_curr->pos);
689 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, lc.rem.is_sub(lc.full), m_evt_handler->m_curr->pos);
690 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col >= 1, m_evt_handler->m_curr->pos); // 1-based
691 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, col == ((size_t)(lc.rem.str - lc.full.str)), m_evt_handler->m_curr->pos);
692 // raise an error if the comment is not preceded by whitespace
693 if(lc.rem.str != lc.full.str) // not at line beginning
694 {
695 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, col > 0, m_evt_handler->m_curr->pos);
696 const char prev = lc.full.str[col - 1u];
697 if C4_UNLIKELY(prev != ' ' && prev != '\t')
698 _c4err("comment not preceded by whitespace");
699 }
700 _c4dbgpf("comment was '{}'", m_evt_handler->m_curr->line_contents.rem);
701 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
702}
703
704template<class EventHandler>
705void ParseEngine<EventHandler>::_maybe_skip_comment_strict()
706{
707 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
708 if(pos != npos)
709 {
710 if('#' == m_evt_handler->m_curr->line_contents.rem[pos])
711 {
712 _line_progressed(pos);
713 _skip_comment();
714 }
715 }
716}
717
718template<class EventHandler>
719void ParseEngine<EventHandler>::_maybe_skip_comment()
720{
721 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
722 if(pos != npos)
723 {
724 if('#' == m_evt_handler->m_curr->line_contents.rem[pos])
725 {
726 _line_progressed(pos);
727 _skip_comment();
728 }
729 }
730 else
731 {
732 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
733 }
734}
735
736template<class EventHandler>
737bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
738{
739 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
740 if(pos != npos)
741 {
742 if(':' == m_evt_handler->m_curr->line_contents.rem[pos])
743 {
744 // bump pos to skip the colon as well, and check the colon
745 // is followed by space or tab
746 if(++pos < m_evt_handler->m_curr->line_contents.rem.len)
747 {
748 const char next = m_evt_handler->m_curr->line_contents.rem.str[pos];
749 if(next == ' ' RYML_WITH_TAB_TOKENS_(|| next == '\t'))
750 ++pos;
751 else
752 return false;
753 }
754 _line_progressed(pos);
755 return true;
756 }
757 }
758 else
759 {
760 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
761 }
762 return false;
763}
764
765
766//-----------------------------------------------------------------------------
767
768template<class EventHandler>
769csubstr ParseEngine<EventHandler>::_scan_anchor()
770{
771 csubstr s = m_evt_handler->m_curr->line_contents.rem;
772 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.begins_with('&'), m_evt_handler->m_curr->pos);
773 csubstr anchor = s.range(1, s.first_of(" ,]}\t"));
774 _line_progressed(1u + anchor.len);
775 _maybe_skipchars(' ');
776 return anchor;
777}
778
779template<class EventHandler>
780csubstr ParseEngine<EventHandler>::_scan_ref_seq()
781{
782 csubstr s = m_evt_handler->m_curr->line_contents.rem;
783 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'), m_evt_handler->m_curr->pos);
784 _set_first(s, s.first_of(" ,]\t"));
785 _line_progressed(s.len);
786 return s;
787}
788
789template<class EventHandler>
790csubstr ParseEngine<EventHandler>::_scan_ref_map()
791{
792 csubstr s = m_evt_handler->m_curr->line_contents.rem;
793 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'), m_evt_handler->m_curr->pos);
794 _set_first(s, s.first_of(" ,}\t"));
795 _line_progressed(s.len);
796 return s;
797}
798
799template<class EventHandler>
800csubstr ParseEngine<EventHandler>::_scan_tag()
801{
802 csubstr t = m_evt_handler->m_curr->line_contents.rem;
803 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, t.begins_with('!'), m_evt_handler->m_curr->pos);
804 if(!t.begins_with("!<"))
805 {
806 _c4dbgp("begins with '!'");
807 _set_first(t, t.first_of(" ,]}\t"));
808 if C4_UNLIKELY(t.first_of("[{") != npos)
809 _c4err("invalid tag");
810 _line_progressed(t.len);
811 if(m_options.resolve_tags_all() || (m_options.resolve_tags() && is_custom_tag(t)))
812 t = _resolve_tag(t);
813 }
814 else
815 {
816 _c4dbgp("begins with '!<'");
817 size_t pos = t.find('>');
818 if C4_UNLIKELY(pos == npos)
819 _c4err("invalid tag");
820 _set_first_strict(t, pos+1);
821 _line_progressed(t.len);
822 t = t.sub(1);
823 }
824 _maybe_skip_whitespace_tokens();
825 return t;
826}
827
828template<class EventHandler>
829csubstr ParseEngine<EventHandler>::_scan_tag(csubstr *orig)
830{
831 csubstr t = m_evt_handler->m_curr->line_contents.rem;
832 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, t.begins_with('!'), m_evt_handler->m_curr->pos);
833 if(!t.begins_with("!<"))
834 {
835 _c4dbgp("begins with '!'");
836 _set_first(t, t.first_of(" ,\t"));
837 if C4_UNLIKELY(t.first_of("[{") != npos)
838 _c4err("invalid tag");
839 _line_progressed(t.len);
840 *orig = t;
841 if(m_options.resolve_tags_all() || (m_options.resolve_tags() && is_custom_tag(t)))
842 t = _resolve_tag(t);
843 }
844 else
845 {
846 _c4dbgp("begins with '!<'");
847 size_t pos = t.find('>');
848 if C4_UNLIKELY(pos == npos)
849 _c4err("invalid tag");
850 _set_first_strict(t, pos+1);
851 _line_progressed(t.len);
852 *orig = t;
853 t = t.sub(1);
854 }
855 _maybe_skip_whitespace_tokens();
856 return t;
857}
858
859
860//-----------------------------------------------------------------------------
861
862template<class EventHandler>
863bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_block_token(csubstr s)
864{
865 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
866 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(":-"), m_evt_handler->m_curr->pos);
867 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.count('\n') == 0, m_evt_handler->m_curr->pos);
868 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.count('\r') == 0, m_evt_handler->m_curr->pos);
869 if(s.len > 1)
870 {
871 switch(s.str[1])
872 {
873 case ' ':
874 case ',':
875 case '}':
876 case ']':
877 case '\t':
878 if(s.str[0] == ':')
879 {
880 _c4dbgpf("not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
881 return false;
882 }
883 else
884 {
885 _c4err("invalid scalar");
886 }
887 break;
888 case '{':
889 case '[':
890 _c4err("invalid token \":{}\"", _c4prc(s.str[1]));
891 break;
892 default:
893 break;
894 }
895 }
896 else
897 {
898 if(s.str[0] == '-')
899 _c4err("invalid scalar");
900 return false;
901 }
902 return true;
903}
904
905template<class EventHandler>
906bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_qmrk(csubstr s)
907{
908 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
909 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s[0] == '?', m_evt_handler->m_curr->pos);
910 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.count('\n') == 0, m_evt_handler->m_curr->pos);
911 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.count('\r') == 0, m_evt_handler->m_curr->pos);
912 if(s.len > 1)
913 {
914 switch(s.str[1])
915 {
916 case ' ':
917 case '\t':
918 _c4dbgpf("not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
919 return false;
920 case '{':
921 case '}':
922 case '[':
923 case ']':
924 _c4err("invalid token \"?{}\"", _c4prc(s.str[1]));
925 break;
926 default:
927 break;
928 }
929 }
930 else
931 {
932 return false;
933 }
934 return true;
935}
936
937
938template<class EventHandler>
939bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
940{
941 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, !s.empty(), m_evt_handler->m_curr->pos);
942 // it's not a scalar if it starts with any of these characters:
943 switch(s.str[0])
944 {
945 // these are all legal tokens which mean no scalar is starting:
946 case '[':
947 case ']':
948 case '{':
949 case '}':
950 case '&':
951 case '*':
952 case '!':
953 case '|':
954 case '>':
955 case '#':
956 case ',':
957 _c4dbgpf("not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
958 return false;
959 // '-' and ':' are illegal at the beginning if not followed by a scalar character
960 case '-':
961 case ':':
962 _c4dbgpf("suspicious token='{}' len={}", _c4prc(s.str[0]), s.len);
963 return _is_valid_start_scalar_plain_flow_check_block_token(s);
964 case '?':
965 _c4dbgpf("qmrk='{}' len={}", _c4prc(s.str[0]), s.len);
966 return _is_valid_start_scalar_plain_flow_check_qmrk(s);
967 // everything else is a legal starting character
968 default:
969 return true;
970 }
971}
972
973
974template<class EventHandler>
975bool ParseEngine<EventHandler>::_scan_scalar_plain_handle_newline(csubstr s, size_t offs)
976{
977 _c4dbgpf("newl[PLAIN]: found '\\n'. offs={} line={} sofar={}", offs, m_evt_handler->m_curr->pos.line, prs_(s.first(offs), true));
978 if(s.len > offs + 1)
979 {
980 _c4dbgp("newl[PLAIN]: buffer continues");
981 csubstr next_line = s.sub(offs + 1);
982 size_t next_line_indentation = next_line.first_not_of(' ');
983 if(next_line_indentation != npos)
984 {
985 _c4dbgpf("newl[PLAIN]: line={} indentation={} indref={}", m_evt_handler->m_curr->pos.line + 1, next_line_indentation, m_evt_handler->m_curr->indref);
986 next_line = next_line.first(next_line.first_of("\n\r"));
987 _c4dbgpf("newl[PLAIN]: has indentation. next_line={}", prs_(next_line));
988 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, next_line_indentation <= next_line.len, m_evt_handler->m_curr->pos);
989 if C4_LIKELY(next_line_indentation >= m_evt_handler->m_curr->indref)
990 {
991 _c4dbgp("newl[PLAIN]: larger indentation");
992 next_line = next_line.sub(next_line_indentation);
993 }
994 else if C4_UNLIKELY(next_line.len && next_line.triml(' ').len)
995 {
996 _c4dbgp("newl[PLAIN]: err, smaller indentation");
997 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
998 _line_ended();
999 _scan_line();
1000 if(m_evt_handler->m_curr->line_contents.indentation != npos)
1001 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
1002 _c4err("parse error"); // cannot reduce indentation here
1003 }
1004 _c4dbgpf("newl[PLAIN]: next_line.len={}", next_line.len);
1005 if(next_line.len)
1006 {
1007 size_t fno = next_line.first_not_of(" \t");
1008 if(fno != csubstr::npos)
1009 {
1010 _c4assert(fno < next_line.len);
1011 switch(next_line.str[fno])
1012 {
1013 case ',': case ']': case '#':
1014 _c4dbgpf("newl[PLAIN]: found terminating character beginning next line: '{}'", next_line.str[fno]);
1015 return false;
1016 case ':': // cannot be succeeded by whitespace
1017 _c4dbgp("newl[PLAIN]: found :");
1018 if(fno + 1 == next_line.len || _is_blck_token(next_line.sub(fno)))
1019 {
1020 _c4dbgpf("newl[PLAIN]: found terminating character beginning next line: '{}'", next_line.str[fno]);
1021 return false;
1022 }
1023 break;
1024 }
1025 }
1026 }
1027 }
1028 }
1029 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
1030 _line_ended();
1031 _scan_line();
1032 return true;
1033}
1034
1035template<class EventHandler>
1036bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
1037{
1038 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP), m_evt_handler->m_curr->pos);
1039 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1040 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ|RSEQIMAP), m_evt_handler->m_curr->pos);
1041 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1042 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL), m_evt_handler->m_curr->pos);
1043
1044 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(' '), m_evt_handler->m_curr->pos);
1045 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with('\n'), m_evt_handler->m_curr->pos);
1046
1047 if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1048 return false;
1049
1050 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset);
1051 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1052
1053 _c4dbgp("scanning seqflow scalar...");
1054
1055 bool needs_filter = false;
1056 size_t col = 0; // zero-based column
1057 size_t offs = 0; // offset
1058 for( ; offs < s.len; ++offs, ++col)
1059 {
1060 const char c = s.str[offs];
1061 switch(c)
1062 {
1063 case ',':
1064 case ']':
1065 _c4dbgpf("found terminating character at {}: '{}'", offs, c);
1066 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1067 goto ended_scalar;
1068 case '\n':
1069 _c4dbgpf("found '\\n' at col={}", col);
1070 if(!_scan_scalar_plain_handle_newline(s, offs))
1071 goto ended_scalar;
1072 col = (size_t)-1; // so that col is 0 in the next loop iteration
1073 needs_filter = true;
1074 break;
1075 case '\r':
1076 --col; // don't count \r when calling _line_progressed()
1077 needs_filter = true;
1078 break;
1079 case ':':
1080 _c4dbgp("found suspicious ':'");
1081 if(s.len > offs + 1)
1082 {
1083 char next = s.str[offs + 1];
1084 _c4dbgpf("next char is '{}'", _c4prc(next));
1085 if(next == '\r')
1086 {
1087 csubstr after = s.sub(offs + 1).triml('\r');
1088 if(after.len)
1089 {
1090 next = after.str[0];
1091 _c4dbgpf("skip \\r to '{}'", _c4prc(next));
1092 }
1093 }
1094 // no else here.
1095 if(next == ' ' RYML_WITH_TAB_TOKENS_(|| next == '\t') || next == ',' || next == '\n' || next == ']')
1096 {
1097 _c4dbgp("map starting!");
1098 goto ended_scalar;
1099 }
1100 else
1101 {
1102 _c4dbgp("':' nothing to see here");
1103 }
1104 }
1105 else
1106 {
1107 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.len == offs + 1, m_evt_handler->m_curr->pos);
1108 _line_progressed(col);
1109 _c4err("missing termination: '{}'", c); // noreturn
1110 }
1111 break;
1112 case '#':
1113 {
1114 _c4dbgp("found suspicious '#'");
1115 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1116 char prev = s.str[offs - 1];
1117 if(prev == ' ' RYML_WITH_TAB_TOKENS_(|| prev == '\t'))
1118 {
1119 _c4dbgpf("found terminating character at {}: '{}'", offs, c);
1120 goto ended_scalar;
1121 }
1122 }
1123 break;
1124 case '[':
1125 case '{':
1126 case '}':
1127 _line_progressed(col); // advance to report the proper position in the error
1128 _c4err("invalid character: '{}'", c); // noreturn
1129 case '-':
1130 case '.':
1131 _c4dbgpf("doc token character: '{}', offs={}", c, offs);
1132 if(offs == 0 && m_evt_handler->m_curr->at_line_beginning())
1133 {
1134 _c4dbgp("at line beginning");
1135 if(s.len >= 3 && s.str[1] == c && s.str[2] == c)
1136 {
1137 _c4err("parse error"); // no return
1138 }
1139 }
1140 break;
1141 default:
1142 ;
1143 }
1144 }
1145
1146ended_scalar:
1147
1148 _line_progressed(col);
1149 _set_first(s, offs);
1150 sc->scalar = s.trimr(RYML_WITH_OR_WITHOUT_TAB_TOKENS_(" \t", ' '));
1151 sc->needs_filter = needs_filter;
1152
1153 _c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true);
1154
1155 return true;
1156}
1157
1158template<class EventHandler>
1159bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
1160{
1161 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ) || has_any(RSEQIMAP), m_evt_handler->m_curr->pos);
1162 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1163 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP|RSEQIMAP), m_evt_handler->m_curr->pos);
1164 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1165 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK), m_evt_handler->m_curr->pos);
1166
1167 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(' '), m_evt_handler->m_curr->pos);
1168 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with('\n'), m_evt_handler->m_curr->pos);
1169
1170 if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1171 return false;
1172
1173 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset);
1174 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1175
1176 _c4dbgp("scanning mapflow scalar...");
1177
1178 bool needs_filter = false;
1179 size_t col = 0; // zero-based column
1180 size_t offs = 0; // offset
1181 for( ; offs < s.len; ++offs, ++col)
1182 {
1183 const char c = s.str[offs];
1184 switch(c)
1185 {
1186 case ',':
1187 case '}':
1188 _c4dbgpf("found terminating character at {}: '{}'", offs, c);
1189 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1190 goto ended_scalar;
1191 case '\n':
1192 _c4dbgpf("found '\\n' at col={}", col);
1193 if(!_scan_scalar_plain_handle_newline(s, offs))
1194 goto ended_scalar;
1195 col = (size_t)-1; // so that col is 0 in the next loop iteration
1196 needs_filter = true;
1197 break;
1198 case '\r':
1199 --col; // don't count \r when calling _line_progressed()
1200 needs_filter = true;
1201 break;
1202 case ':':
1203 _c4dbgpf("found ':'", c);
1204 if(s.len == offs+1)
1205 break;
1206 {
1207 const char next = s.str[offs+1];
1208 _c4dbgpf("next='{}'", c);
1209 if(next == ' ' || next == ',' || next == '}' || next == '\n' || next == '\r' RYML_WITH_TAB_TOKENS_(|| next == '\t'))
1210 {
1211 _c4dbgpf("found terminating character: '{}'", c);
1212 goto ended_scalar;
1213 }
1214 }
1215 break;
1216 case '{':
1217 case '[':
1218 _line_progressed(col);
1219 _c4err("invalid character: '{}'", c); // noreturn
1220 break;
1221 case ']':
1222 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQIMAP), m_evt_handler->m_curr->pos);
1223 goto ended_scalar;
1224 default:
1225 ;
1226 }
1227 }
1228
1229ended_scalar:
1230
1231 _line_progressed(col);
1232 s = s.first(offs);
1233 sc->scalar = s.trimr(RYML_WITH_OR_WITHOUT_TAB_TOKENS_(" \t", ' '));
1234 sc->needs_filter = needs_filter;
1235
1236 _c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true);
1237
1238 return sc->scalar.len > 0u;
1239}
1240
1241template<class EventHandler>
1242bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1243{
1244 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP), m_evt_handler->m_curr->pos);
1245 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1246 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ), m_evt_handler->m_curr->pos);
1247 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1248
1249 substr s = m_evt_handler->m_curr->line_contents.rem;
1250 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '), m_evt_handler->m_curr->pos);
1251 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1252
1253 _c4dbgp("seq_json: scanning scalar...");
1254
1255 switch(s.str[0])
1256 {
1257 case ']':
1258 case '{':
1259 case ',':
1260 _c4dbgp("seq_json: not a scalar.");
1261 return false;
1262 }
1263
1264 {
1265 const size_t len = _begins_with_special_json_scalar(s);
1266 if(len)
1267 {
1268 char c = s.len > len ? s.str[len] : ',';
1269 if(c == ',' || c == ']' || c == ' ' || c == '\n' || c == '\t' || c == '\r')
1270 {
1271 sc->scalar = s.first(len);
1272 sc->needs_filter = false;
1273 _c4dbgpf("seq_json: special scalar: '{}'", sc->scalar);
1274 _line_progressed(len);
1275 return true;
1276 }
1277 else
1278 {
1279 return false;
1280 }
1281 }
1282 }
1283
1284 // must be a number or special scalar
1285 size_t i = 0;
1286 for( ; i < s.len; ++i)
1287 {
1288 const char c = s.str[i];
1289 switch(c)
1290 {
1291 case ',':
1292 case ']':
1293 case ' ':
1294 case '\t':
1295 _c4dbgpf("seq_json: found terminating character: '{}'", c);
1296 goto ended_scalar;
1297 default:
1298 ;
1299 }
1300 }
1301
1302ended_scalar:
1303
1304 _line_progressed(i);
1305 sc->scalar = s.first(i);
1306 sc->needs_filter = false;
1307 _c4dbgpf("seq_json: scalar was {}", prs_(sc->scalar, /*escape*/true));
1308
1309 return true;
1310}
1311
1312template<class EventHandler>
1313bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1314{
1315 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ), m_evt_handler->m_curr->pos);
1316 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1317 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP), m_evt_handler->m_curr->pos);
1318 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1319 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL), m_evt_handler->m_curr->pos);
1320
1321 substr s = m_evt_handler->m_curr->line_contents.rem;
1322 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '), m_evt_handler->m_curr->pos);
1323 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1324
1325 _c4dbgp("scanning scalar...");
1326
1327 {
1328 const size_t len = _begins_with_special_json_scalar(s);
1329 if(len)
1330 {
1331 char c = s.len > len ? s.str[len] : ',';
1332 _c4dbgpf("begins with special scalar: {} next='{}'", s.first(len), _c4prc(c));
1333 if(c == ',' || c == '}' || c == ' ' || c == '\n' || c == '\t' || c == '\r')
1334 {
1335 sc->scalar = s.first(len);
1336 sc->needs_filter = false;
1337 _c4dbgpf("special json scalar: '{}'", prs_(sc->scalar));
1338 _line_progressed(len);
1339 return true;
1340 }
1341 else
1342 {
1343 return false;
1344 }
1345 }
1346 }
1347
1348 // must be a number
1349 size_t i = 0;
1350 for( ; i < s.len; ++i)
1351 {
1352 const char c = s.str[i];
1353 switch(c)
1354 {
1355 case ',':
1356 case '}':
1357 case ' ':
1358 case '\t':
1359 _c4dbgpf("found terminating character: '{}'", c);
1360 goto ended_scalar;
1361 default:
1362 ;
1363 }
1364 }
1365
1366ended_scalar:
1367
1368 if C4_LIKELY(i > 0)
1369 {
1370 _line_progressed(i);
1371 sc->scalar = s.first(i);
1372 sc->needs_filter = false;
1373 _c4dbgpf("scalar was {}", prs_(sc->scalar));
1374 return true;
1375 }
1376
1377 return false;
1378}
1379
1380template<class EventHandler>
1381bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1382{
1383 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s[0] == '-', m_evt_handler->m_curr->pos);
1384 return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_begin_token(s));
1385}
1386
1387template<class EventHandler>
1388bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1389{
1390 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s[0] == '.', m_evt_handler->m_curr->pos);
1391 return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_end_token(s));
1392}
1393
1394template<class EventHandler>
1395bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc, size_t indentation)
1396{
1397 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1398 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP), m_evt_handler->m_curr->pos);
1399 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK|RUNK|USTY), m_evt_handler->m_curr->pos);
1400
1401 substr s = m_evt_handler->m_curr->line_contents.rem;
1402 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '), m_evt_handler->m_curr->pos);
1403 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1404
1405 switch(s.str[0])
1406 {
1407 case '-':
1408 if(_is_blck_token(s))
1409 {
1410 return false;
1411 }
1412 else if(_is_doc_begin(s))
1413 {
1414 _c4dbgp("token is doc start");
1415 return false;
1416 }
1417 break;
1418 case ':':
1419 case '?':
1420 if(_is_blck_token(s))
1421 return false;
1422 break;
1423 case '[':
1424 case '{':
1425 case '&':
1426 case '*':
1427 case '!':
1428 case '\t':
1429 case ',':
1430 case '%':
1431 return false;
1432 case '.':
1433 if(_is_doc_end(s))
1434 {
1435 _c4dbgp("token is doc end");
1436 return false;
1437 }
1438 break;
1439 }
1440
1441 _c4dbgpf("plain scalar! indentation={}", indentation);
1442
1443 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1444 const size_t start_line = m_evt_handler->m_curr->pos.line;
1445
1446 bool needs_filter = false;
1447 while(true)
1448 {
1449 _c4dbgpf("plain scalar line: {}", prs_(s));
1450 for(size_t i = 0; i < s.len; ++i)
1451 {
1452 const char curr = s.str[i];
1453 //_c4dbgpf("[{}]='{}'", i, _c4prc(curr));
1454 switch(curr)
1455 {
1456 case ':':
1457 _c4dbgpf("[{}]: got suspicious ':'", i);
1458 // are there more characters?
1459 if((i + 1 == s.len) || ((s.str[i+1] == ' ') RYML_WITH_TAB_TOKENS_( || (s.str[i+1] == '\t'))))
1460 {
1461 _c4dbgpf("followed by '{}'", i+1 == s.len ? csubstr("\\n") : _c4prc(s.str[i+1]));
1462 _line_progressed(i);
1463 // ': ' is accepted only on the first line
1464 if C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line)
1465 {
1466 _c4dbgp("start line. scalar ends here");
1467 goto ended_scalar;
1468 }
1469 else
1470 {
1471 _c4err("multiline scalars cannot be used as keys");
1472 }
1473 }
1474 else
1475 {
1476 size_t j = i;
1477 while(j + 1 < s.len && s.str[j+1] == ':')
1478 {
1479 _c4dbgp("skip colon");
1480 ++j;
1481 }
1482 i = j > i ? j-1 : i;
1483 _c4dbgp("nothing to see here");
1484 }
1485 break;
1486 case '#':
1487 _c4dbgp("got suspicious '#'");
1488 if(!i || (s.str[i-1] == ' ' || s.str[i-1] == '\t'))
1489 {
1490 _c4dbgp("comment! scalar ends here");
1491 _line_progressed(i);
1492 goto ended_scalar;
1493 }
1494 else
1495 {
1496 _c4dbgp("nothing to see here");
1497 }
1498 break;
1499 }
1500 }
1501 _line_progressed(s.len);
1502 csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1503 next_peeked = next_peeked.trimr("\n\r");
1504 const size_t next_indentation = next_peeked.first_not_of(' ');
1505 _c4dbgpf("indentation curr={} next={}", indentation, next_indentation);
1506 if(next_indentation < indentation)
1507 {
1508 _c4dbgp("smaller indentation! scalar ended");
1509 goto ended_scalar;
1510 }
1511 else if(next_indentation == 0 && next_peeked.len > 0)
1512 {
1513 const char first = next_peeked.str[0];
1514 switch(first)
1515 {
1516 case '-':
1517 _c4dbgpf("doc begin? peeked={}", prs_(next_peeked, size_t(3)));
1518 if(_is_doc_begin_token(next_peeked))
1519 {
1520 _c4dbgp("doc begin! scalar ended");
1521 goto ended_scalar;
1522 }
1523 break;
1524 case '.':
1525 _c4dbgpf("doc end? peeked={}", prs_(next_peeked, size_t(3)));
1526 if(_is_doc_end_token(next_peeked))
1527 {
1528 _c4dbgp("doc end! scalar ended");
1529 goto ended_scalar;
1530 }
1531 break;
1532 }
1533 }
1534 // load with next line
1535 _c4dbgp("next line!");
1536 if(!_finished_file())
1537 {
1538 _c4dbgp("next line!");
1539 _line_ended();
1540 _scan_line();
1541 }
1542 else
1543 {
1544 _c4dbgp("file finished!");
1545 goto ended_scalar;
1546 }
1547 s = m_evt_handler->m_curr->line_contents.rem;
1548 needs_filter = true;
1549 }
1550
1551ended_scalar:
1552
1553 sc->scalar = _buf().range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(" \n\r\t");
1554 sc->needs_filter = needs_filter;
1555
1556 _c4dbgpf("scalar was {}", prs_(sc->scalar));
1557
1558 return true;
1559}
1560
1561template<class EventHandler>
1562C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc) // LCOV_EXCL_LINE
1563{
1564 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP), m_evt_handler->m_curr->pos);
1565 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1566 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP), m_evt_handler->m_curr->pos);
1567 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ), m_evt_handler->m_curr->pos);
1568 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK), m_evt_handler->m_curr->pos);
1569 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL), m_evt_handler->m_curr->pos);
1570 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1571}
1572
1573template<class EventHandler>
1574C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc) // LCOV_EXCL_LINE
1575{
1576 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ), m_evt_handler->m_curr->pos);
1577 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1578 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP), m_evt_handler->m_curr->pos);
1579 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK), m_evt_handler->m_curr->pos);
1580 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK), m_evt_handler->m_curr->pos);
1581 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1582}
1583
1584template<class EventHandler>
1585C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc) // LCOV_EXCL_LINE
1586{
1587 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RUNK|USTY), m_evt_handler->m_curr->pos);
1588 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1589}
1590
1591
1592//-----------------------------------------------------------------------------
1593
1594template<class EventHandler>
1595substr ParseEngine<EventHandler>::_peek_next_line(size_t pos) const
1596{
1597 substr rem{}; // declare here because of the goto
1598 size_t nlpos{}; // declare here because of the goto
1599 pos = pos == npos ? m_evt_handler->m_curr->pos.offset : pos;
1600 if(pos >= _buf().len)
1601 goto next_is_empty;
1602
1603 // look for the next newline chars, and jump to the right of those
1604 rem = _from_next_line(_buf().sub(pos));
1605 if(rem.empty())
1606 goto next_is_empty;
1607
1608 // now get everything up to and including the following newline chars
1609 nlpos = rem.first_of("\r\n");
1610 if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len))
1611 nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1612 rem = rem.left_of(nlpos, /*include_pos*/true);
1613
1614 _c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n"));
1615 return rem;
1616
1617next_is_empty:
1618 _c4dbgpf("peek next line @ {}: (len=0)''", pos);
1619 return rem;
1620}
1621
1622//-----------------------------------------------------------------------------
1623
1624template<class EventHandler>
1625void ParseEngine<EventHandler>::_scan_line()
1626{
1627 if C4_LIKELY(m_evt_handler->m_curr->pos.offset < _buf().len)
1628 m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf(), m_evt_handler->m_curr->pos.offset);
1629 else
1630 m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf().last(0), 0);
1631}
1632
1633template<class EventHandler>
1634void ParseEngine<EventHandler>::_line_progressed(size_t ahead)
1635{
1636 _c4dbgpf("line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}",
1637 m_evt_handler->m_curr->pos.line,
1638 m_evt_handler->m_curr->line_contents.full.len,
1639 ahead, m_evt_handler->m_curr->pos.col,
1640 m_evt_handler->m_curr->pos.col+ahead,
1641 m_evt_handler->m_curr->pos.offset,
1642 m_evt_handler->m_curr->pos.offset+ahead);
1643 m_evt_handler->m_curr->pos.offset += ahead;
1644 m_evt_handler->m_curr->pos.col += ahead;
1645 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.num_cols+1, m_evt_handler->m_curr->pos);
1646 m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1647}
1648
1649template<class EventHandler>
1650void ParseEngine<EventHandler>::_line_ended()
1651{
1652 _c4dbgpf("line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1653 m_evt_handler->m_curr->pos.line,
1654 m_evt_handler->m_curr->line_contents.full.len,
1655 m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols,
1656 m_evt_handler->m_curr->pos.col, 1);
1657 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.num_cols + 1, m_evt_handler->m_curr->pos);
1658 m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1659 ++m_evt_handler->m_curr->pos.line;
1660 m_evt_handler->m_curr->pos.col = 1;
1661}
1662
1663template<class EventHandler>
1664void ParseEngine<EventHandler>::_line_ended_undo()
1665{
1666 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u, m_evt_handler->m_curr->pos);
1667 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u, m_evt_handler->m_curr->pos);
1668 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols, m_evt_handler->m_curr->pos);
1669 const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1670 _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1671 m_evt_handler->m_curr->pos.offset -= delta;
1672 --m_evt_handler->m_curr->pos.line;
1673 m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.num_cols + 1u;
1674 // don't forget to undo also the changes to the remainder of the line
1675 //RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= _buf().len || _buf()[m_evt_handler->m_curr->pos.offset] == '\n' || _buf()[m_evt_handler->m_curr->pos.offset] == '\r', m_evt_handler->m_curr->pos);
1676 m_evt_handler->m_curr->line_contents.rem = _buf().sub(m_evt_handler->m_curr->pos.offset, 0);
1677}
1678
1679
1680//-----------------------------------------------------------------------------
1681template<class EventHandler>
1682void ParseEngine<EventHandler>::_set_indentation(size_t indentation) noexcept
1683{
1684 m_evt_handler->m_curr->indref = indentation;
1685 _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1686}
1687
1688template<class EventHandler>
1689void ParseEngine<EventHandler>::_save_indentation()
1690{
1691 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full), m_evt_handler->m_curr->pos);
1692 m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1693 _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1694}
1695
1696template<class EventHandler>
1697void ParseEngine<EventHandler>::_mark_seqflow_val_end() noexcept
1698{
1699 _c4dbgpf("SEQFLOW. mark val end at line={}", m_evt_handler->m_curr->pos.line);
1700 m_prev_val_end = m_evt_handler->m_curr->pos.line;
1701}
1702
1703
1704//-----------------------------------------------------------------------------
1705
1706template<class EventHandler>
1707void ParseEngine<EventHandler>::_flow_container_was_a_key(size_t orig_indent)
1708{
1709 _c4dbgpf("flow container is followed by colon! orig_indent={}", orig_indent);
1710 m_evt_handler->actually_val_is_first_key_of_new_map_block();
1711 addrem_flags(RMAP|RVAL|RBLCK, RKCL|RSEQ|RUNK);
1712 _set_indentation(orig_indent);
1713 _maybe_skip_whitespace_tokens();
1714}
1715
1716template<class EventHandler>
1717void ParseEngine<EventHandler>::_end_flow_container(size_t orig_indent, bool multiline)
1718{
1719 // this is called AFTER ending the flow container,
1720 // so now we're at the parent container's scope
1721 if(has_all(RMAP|RBLCK) && has_none(RKCL|RVAL|RNXT))
1722 {
1723 _c4dbgp("flow container: end as vanilla block map key!");
1724 if C4_UNLIKELY(multiline)
1725 _c4err("multiline key is invalid");
1726 if C4_UNLIKELY(!_maybe_scan_following_colon())
1727 _c4err("could not find ':' colon after key");
1728 _maybe_skip_whitespace_tokens();
1729 addrem_flags(RVAL, RKEY|RKCL|RNXT);
1730 }
1731 else if(has_none(RFLOW))
1732 {
1733 _c4dbgp("end_flow_container: now not in flow!");
1734 if(has_any(RUNK|RSEQ|RKCL) && _maybe_scan_following_colon())
1735 {
1736 if C4_UNLIKELY(multiline)
1737 _c4err("multiline key is invalid");
1738 _flow_container_was_a_key(orig_indent);
1739 }
1740 else
1741 {
1742 _c4dbgp("end_flow_container: end map as key!");
1743 }
1744 }
1745 else if(has_any(RSEQ))
1746 {
1747 _c4dbgp("end_flow_container: now in a flow seq");
1748 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1749 _mark_seqflow_val_end();
1750 }
1751}
1752
1753template<class EventHandler>
1754void ParseEngine<EventHandler>::_end_map_flow()
1755{
1756 bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1757 size_t orig_indent = m_evt_handler->m_curr->indref;
1758 _c4dbgpf("mapflow: end, multiline={}", multiline);
1759 m_evt_handler->end_map_flow(multiline && m_options.detect_flow_ml(), m_options.flow_ml_style().m_bits);
1760 _end_flow_container(orig_indent, multiline);
1761}
1762
1763template<class EventHandler>
1764void ParseEngine<EventHandler>::_end_seq_flow()
1765{
1766 bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1767 size_t orig_indent = m_evt_handler->m_curr->indref;
1768 _c4dbgpf("seqflow: end, multiline={}", multiline);
1769 m_evt_handler->end_seq_flow(multiline && m_options.detect_flow_ml(), m_options.flow_ml_style().m_bits);
1770 _end_flow_container(orig_indent, multiline);
1771}
1772
1773template<class EventHandler>
1774void ParseEngine<EventHandler>::_end_map_blck()
1775{
1776 _c4dbgp("mapblck: end");
1777 if(has_any(RKCL|RVAL))
1778 {
1779 _c4dbgp("mapblck: set missing val");
1780 _handle_annotations_before_blck_val_scalar();
1781 m_evt_handler->set_val_scalar_plain_empty();
1782 }
1783 else if(has_any(QMRK))
1784 {
1785 _c4dbgp("mapblck: set missing keyval");
1786 _handle_annotations_before_blck_key_scalar();
1787 m_evt_handler->set_key_scalar_plain_empty();
1788 _handle_annotations_before_blck_val_scalar();
1789 m_evt_handler->set_val_scalar_plain_empty();
1790 }
1791 m_evt_handler->end_map_block();
1792}
1793
1794template<class EventHandler>
1795void ParseEngine<EventHandler>::_end_seq_blck()
1796{
1797 if(has_any(RVAL))
1798 {
1799 _c4dbgp("seqblck: set missing val");
1800 _handle_annotations_before_blck_val_scalar();
1801 m_evt_handler->set_val_scalar_plain_empty();
1802 }
1803 m_evt_handler->end_seq_block();
1804}
1805
1806template<class EventHandler>
1807void ParseEngine<EventHandler>::_end2_map()
1808{
1809 _c4dbgp("map: end");
1810 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP), m_evt_handler->m_curr->pos);
1811 if(has_any(RBLCK))
1812 {
1813 _end_map_blck();
1814 }
1815 else
1816 {
1817 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1818 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(USTY), m_evt_handler->m_curr->pos);
1819 m_evt_handler->_pop();
1820 }
1821}
1822
1823template<class EventHandler>
1824void ParseEngine<EventHandler>::_end2_seq()
1825{
1826 _c4dbgp("seq: end");
1827 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ), m_evt_handler->m_curr->pos);
1828 if(has_any(RBLCK))
1829 {
1830 _end_seq_blck();
1831 }
1832 else
1833 {
1834 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1835 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(USTY), m_evt_handler->m_curr->pos);
1836 m_evt_handler->_pop();
1837 }
1838}
1839
1840template<class EventHandler>
1841void ParseEngine<EventHandler>::_begin2_doc()
1842{
1843 _c4dbgp("begin_doc");
1844 m_has_directives_yaml = false;
1845 m_has_directives = false;
1846 m_doc_empty = true;
1847 add_flags(RDOC);
1848 m_evt_handler->begin_doc();
1849 m_evt_handler->m_curr->indref = 0; // ?
1850}
1851
1852template<class EventHandler>
1853void ParseEngine<EventHandler>::_begin2_doc_expl()
1854{
1855 _c4dbgp("begin_doc_expl");
1856 m_has_directives_yaml = false;
1857 m_has_directives = false;
1858 m_doc_empty = true;
1859 add_flags(RDOC);
1860 m_evt_handler->begin_doc_expl();
1861 m_evt_handler->m_curr->indref = 0; // ?
1862}
1863
1864template<class EventHandler>
1865void ParseEngine<EventHandler>::_end2_doc()
1866{
1867 _c4dbgp("doc: end");
1868 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RDOC), m_evt_handler->m_curr->pos);
1869 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1870 {
1871 _c4dbgp("doc was empty; add empty val");
1872 _handle_annotations_before_blck_val_scalar();
1873 m_evt_handler->set_val_scalar_plain_empty();
1874 }
1875 m_evt_handler->end_doc();
1876 m_bom_len = 0;
1877}
1878
1879template<class EventHandler>
1880void ParseEngine<EventHandler>::_end2_doc_expl()
1881{
1882 _c4dbgp("doc: end");
1883 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1884 {
1885 _c4dbgp("doc: no children; add empty val");
1886 _handle_annotations_before_blck_val_scalar();
1887 m_evt_handler->set_val_scalar_plain_empty();
1888 }
1889 m_evt_handler->end_doc_expl();
1890 m_bom_len = 0;
1891}
1892
1893template<class EventHandler>
1894void ParseEngine<EventHandler>::_maybe_begin_doc()
1895{
1896 if(has_none(RDOC))
1897 {
1898 _c4dbgp("doc must be started");
1899 _begin2_doc();
1900 }
1901}
1902template<class EventHandler>
1903void ParseEngine<EventHandler>::_maybe_end_doc()
1904{
1905 if(has_any(RDOC))
1906 {
1907 _c4dbgp("doc must be finished");
1908 _end2_doc();
1909 }
1910 else if(m_doc_empty && (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1911 {
1912 _c4dbgp("no doc to finish, but pending annotations");
1913 m_evt_handler->begin_doc();
1914 _handle_annotations_before_blck_val_scalar();
1915 m_evt_handler->set_val_scalar_plain_empty();
1916 m_evt_handler->end_doc();
1917 }
1918}
1919
1920template<class EventHandler>
1921void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1922{
1923 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1, m_evt_handler->m_curr->pos);
1924 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack[0].flags & RDOC, m_evt_handler->m_curr->pos);
1925 _c4dbgp("root is RDOC");
1926 if(m_evt_handler->m_curr->level != 0)
1927 _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1928 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RDOC), m_evt_handler->m_curr->pos);
1929}
1930
1931/** Check whether the current parse tokens are trailing on the
1932 * previous doc, and raise an error if they are */
1933template<class EventHandler>
1934void ParseEngine<EventHandler>::_check_trailing_doc_token()
1935{
1936 const bool is_root = (m_evt_handler->m_stack.size() == 1u);
1937 const bool isndoc = (m_evt_handler->m_curr->flags & NDOC) != 0;
1938 const bool suspicious = m_evt_handler->template has_any_<MAP|SEQ|VAL>();
1939 _c4dbgpf("target={} isroot={} suspicious={} ndoc={}", m_evt_handler->m_curr->node_id, is_root, suspicious, isndoc);
1940 if((is_root || m_evt_handler->template has_any_<DOC>()) && suspicious && !isndoc)
1941 _c4err("parse error");
1942}
1943
1944template<class EventHandler>
1945void ParseEngine<EventHandler>::_end_doc_suddenly()
1946{
1947 _c4dbgp("end doc suddenly");
1948 _end_doc_suddenly__pop();
1949 _end2_doc_expl();
1950 addrem_flags(RUNK|RTOP|NDOC, RMAP|RSEQ|RDOC);
1951}
1952
1953template<class EventHandler>
1954void ParseEngine<EventHandler>::_check_doc_end_tokens() const
1955{
1956 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1957 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, !rem.begins_with_any(". \t"), m_evt_handler->m_curr->pos);
1958 if C4_UNLIKELY(rem.len && !rem.begins_with('#'))
1959 {
1960 _c4err("parse error");
1961 }
1962}
1963
1964template<class EventHandler>
1965void ParseEngine<EventHandler>::_start_doc_suddenly()
1966{
1967 _c4dbgp("start doc suddenly");
1968 _end_doc_suddenly__pop();
1969 _end2_doc();
1970 _begin2_doc_expl();
1971}
1972
1973template<class EventHandler>
1974void ParseEngine<EventHandler>::_end_stream()
1975{
1976 _c4dbgpf("end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1977 if C4_UNLIKELY(has_all(RSEQ|RFLOW))
1978 _c4err("missing terminating ]");
1979 else if C4_UNLIKELY(has_all(RMAP|RFLOW))
1980 _c4err("missing terminating }");
1981 if(m_evt_handler->m_stack.size() > 1)
1982 _handle_indentation_pop(m_evt_handler->m_stack.begin());
1983 if(has_all(RDOC))
1984 {
1985 _end2_doc();
1986 }
1987 else if(has_all(RTOP|RUNK))
1988 {
1989 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1990 {
1991 if(m_doc_empty)
1992 {
1993 m_evt_handler->begin_doc();
1994 _handle_annotations_before_blck_val_scalar();
1995 m_evt_handler->set_val_scalar_plain_empty();
1996 m_evt_handler->end_doc();
1997 }
1998 }
1999 }
2000 m_evt_handler->end_stream();
2001 if C4_UNLIKELY(m_has_directives)
2002 _c4err("directives cannot be used without a document");
2003}
2004
2005
2006template<class EventHandler>
2007void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState const* popto)
2008{
2009 _c4dbgpf("popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ? "s" : ""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
2010 while(m_evt_handler->m_curr != popto)
2011 {
2012 if(has_any(RSEQ))
2013 {
2014 _c4dbgpf("popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2015 _end2_seq();
2016 }
2017 else if(has_any(RMAP))
2018 {
2019 _c4dbgpf("popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2020 _end2_map();
2021 }
2022 else
2023 {
2024 break;
2025 }
2026 }
2027 _c4dbgpf("current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
2028}
2029
2030template<class EventHandler>
2031void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
2032{
2033 // search the stack frame to jump to based on its indentation
2034 using state_type = typename EventHandler::state;
2035 state_type const* popto = nullptr;
2036 auto &stack = m_evt_handler->m_stack;
2037 RYML_ASSERT_PARSE_CB_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos); // this search relies on the stack being contiguous
2038 RYML_ASSERT_PARSE_CB_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2039 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2040 #ifdef RYML_DBG
2041 _print_state_stack();
2042 #endif
2043 for(state_type const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
2044 {
2045 _c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
2046 if(s->indref == ind)
2047 {
2048 _c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id);
2049 popto = s;
2050 break;
2051 }
2052 }
2053 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2054 {
2055 _c4err("parse error: incorrect indentation?");
2056 }
2057 _handle_indentation_pop(popto);
2058}
2059
2060template<class EventHandler>
2061void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
2062{
2063 // search the stack frame to jump to based on its indentation
2064 using state_type = typename EventHandler::state;
2065 auto &stack = m_evt_handler->m_stack;
2066 RYML_ASSERT_PARSE_CB_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos); // this search relies on the stack being contiguous
2067 RYML_ASSERT_PARSE_CB_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2068 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2069 state_type const* popto = nullptr;
2070 #ifdef RYML_DBG
2071 char flagbuf_[128];
2072 _print_state_stack(flagbuf_);
2073 #endif
2074 for(state_type const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s) // never go to the stack bottom. that's the root
2075 {
2076 _c4dbgpf("searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
2077 if(s->indref < ind)
2078 {
2079 break;
2080 }
2081 else if(s->indref == ind)
2082 {
2083 _c4dbgpf("same indentation!!! level={} node={}", s->level, s->node_id);
2084 if(popto && has_any(RTOP, s) && has_none(RMAP|RSEQ, s))
2085 {
2086 break;
2087 }
2088 popto = s;
2089 if(has_all(RSEQ|RBLCK, s))
2090 {
2091 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2092 const size_t first = rem.first_not_of(' ');
2093 RYML_ASSERT_PARSE_CB_(stack.m_callbacks, first == ind || first == npos, m_evt_handler->m_curr->pos);
2094 rem = rem.right_of(first, true);
2095 _c4dbgpf("indentless? rem='{}' first={}", rem, first);
2096 if(rem.begins_with('-') && _is_blck_token(rem))
2097 {
2098 _c4dbgp("parent was indentless seq");
2099 break;
2100 }
2101 }
2102 }
2103 }
2104 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2105 {
2106 _c4err("parse error: incorrect indentation?");
2107 }
2108 _handle_indentation_pop(popto);
2109}
2110
2111
2112//-----------------------------------------------------------------------------
2113template<class EventHandler>
2114void ParseEngine<EventHandler>::_check_valid_newline_in_quoted_scalar()
2115{
2116 if C4_UNLIKELY(has_all(RMAP|RBLCK|RKEY))
2117 {
2118 _c4err("multiline quoted keys are invalid");
2119 }
2120 else // check contextual indentation
2121 {
2122 const size_t minindent = m_evt_handler->m_curr->indref + ((has_any(RMAP|RSEQ) && has_any(RBLCK)));
2123 _c4dbgpf("indent={} vs minindent={} indref={}", m_evt_handler->m_curr->line_contents.indentation, minindent, m_evt_handler->m_curr->indref);
2124 if(m_evt_handler->m_curr->line_contents.indentation < minindent)
2125 {
2126 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks,
2127 m_evt_handler->m_curr->line_contents.indentation == m_evt_handler->m_curr->line_contents.rem.first_not_of(' '),
2128 m_evt_handler->m_curr->pos);
2129 csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation);
2130 _c4dbgpf("trimmed.len={} line={}", trimmed.len, prs_(m_evt_handler->m_curr->line_contents.rem, true));
2131 if C4_UNLIKELY(!!trimmed.len)
2132 {
2133 _c4err("bad indentation");
2134 }
2135 }
2136 }
2137}
2138
2139
2140//-----------------------------------------------------------------------------
2141template<class EventHandler>
2142ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
2143{
2144 // quoted scalars can spread over multiple lines!
2145 // nice explanation here: http://yaml-multiline.info/
2146
2147 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with('\''), m_evt_handler->m_curr->pos);
2148
2149 // a span to the end of the file, skipping the opening quote
2150 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset + 1);
2151 _line_progressed(1); // advance over the opening quote
2152 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
2153
2154 bool needs_filter = false;
2155 size_t pos = npos; // find the pos of the matching quote
2156 while( ! _finished_file())
2157 {
2158 const csubstr line = m_evt_handler->m_curr->line_contents.rem;
2159 _c4dbgpf("scanning single quoted scalar @ line[{}]: {}", m_evt_handler->m_curr->pos.line, prs_(line));
2160 if C4_UNLIKELY(m_evt_handler->m_curr->at_line_beginning() && _is_doc_token(line))
2161 _c4err("token can not appear at line begin");
2162 for(size_t i = 0; i < line.len; ++i)
2163 {
2164 const char curr = line.str[i];
2165 if(curr == '\'') // single quotes are escaped with two single quotes
2166 {
2167 const char next = i+1 < line.len ? line.str[i+1] : '~';
2168 if(next != '\'') // so just look for the first quote
2169 { // without another after it
2170 _line_progressed(i + 1); // progress beyond the quote
2171 pos = i + (size_t)(line.str - s.str); // set pos to before the quote
2172 goto found_close;
2173 }
2174 else
2175 {
2176 needs_filter = true; // needs filter to remove escaped quotes
2177 ++i; // skip the escaped quote
2178 }
2179 }
2180 }
2181
2182 needs_filter = true;
2183 _line_progressed(line.len);
2184 _line_ended();
2185 _scan_line();
2186 _check_valid_newline_in_quoted_scalar();
2187 }
2188
2189 _c4err("reached end of file while looking for closing quote");
2190
2191found_close:
2192
2193 _c4dbgpf("found closing quote at: {}", pos);
2194 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, pos != npos, m_evt_handler->m_curr->pos);
2195 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2196 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2197 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() == '\'', m_evt_handler->m_curr->pos);
2198 _set_first_strict(s, pos);
2199
2200 _c4prscalar("scanned squoted scalar", s, /*keep_newlines*/true);
2201
2202 return ScannedScalar { s, needs_filter };
2203}
2204
2205
2206//-----------------------------------------------------------------------------
2207template<class EventHandler>
2208ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
2209{
2210 // quoted scalars can spread over multiple lines!
2211 // nice explanation here: http://yaml-multiline.info/
2212
2213 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with('"'), m_evt_handler->m_curr->pos);
2214
2215 // a span to the end of the file, skipping the opening quote
2216 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset + 1);
2217 _line_progressed(1); // advance over the opening quote
2218 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
2219
2220 bool needs_filter = false;
2221 size_t pos = npos; // find the pos of the matching quote
2222 while( ! _finished_file())
2223 {
2224 #if defined(__GNUC__) && (/*__GNUC__ == 12 || */__GNUC__ == 13)
2225 C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem); // prevent hoisting
2226 #endif
2227 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2228 _c4dbgpf("scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, rem);
2229 if C4_UNLIKELY(m_evt_handler->m_curr->at_line_beginning() && _is_doc_token(rem))
2230 _c4err("token can not appear at line begin");
2231 for(size_t i = 0; i < rem.len; ++i)
2232 {
2233 const char curr = rem.str[i];
2234 // every \ is an escape
2235 if(curr == '\\')
2236 {
2237 const char next = i+1 < rem.len ? rem.str[i+1] : '~';
2238 needs_filter = true;
2239 if(next == '"' || next == '\\')
2240 ++i;
2241 }
2242 else if(curr == '"')
2243 {
2244 _line_progressed(i + 1); // progress beyond the quote
2245 pos = i + (size_t)(rem.str - s.str); // set pos to before the quote
2246 goto found_close;
2247 }
2248 }
2249
2250 // leading whitespace also needs filtering
2251 needs_filter = true;
2252 _line_progressed(rem.len);
2253 _line_ended();
2254 _scan_line();
2255 _check_valid_newline_in_quoted_scalar();
2256 }
2257
2258 _c4err("reached end of file while looking for closing quote");
2259
2260found_close:
2261
2262 _c4dbgpf("found closing quote at: {}", pos);
2263 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, pos != npos, m_evt_handler->m_curr->pos);
2264 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2265 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2266 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() == '"', m_evt_handler->m_curr->pos);
2267 _set_first_strict(s, pos);
2268
2269 _c4prscalar("scanned dquoted scalar", s, /*keep_newlines*/true);
2270
2271 return ScannedScalar{s, needs_filter};
2272}
2273
2274
2275//-----------------------------------------------------------------------------
2276template<class EventHandler>
2277void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t indref)
2278{
2279 _c4dbgpf("blck: indref={}", indref);
2280 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, indref != npos, m_evt_handler->m_curr->pos);
2281
2282 // nice explanation here: http://yaml-multiline.info/
2283 csubstr s = m_evt_handler->m_curr->line_contents.rem;
2284 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>'), m_evt_handler->m_curr->pos);
2285
2286 _c4dbgpf("blck: specs={}", prs_(s));
2287
2288 // parse the spec
2289 BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used
2290 size_t indentation = npos; // have to find out if no spec is given
2291 if(s.len > 1)
2292 {
2293 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any("|>"), m_evt_handler->m_curr->pos);
2294 csubstr t = s.sub(1);
2295 _c4dbgpf("blck: spec is multichar: {}", prs_(t));
2296 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, t.len >= 1, m_evt_handler->m_curr->pos);
2297 size_t pos = t.first_of("-+");
2298 _c4dbgpf("blck: spec chomp char: pos={}", pos);
2299 if(pos != npos)
2300 {
2301 _c4dbgpf("blck: spec chomp char: {}", _c4prc(t[pos]));
2302 if(t[pos] == '-')
2303 {
2304 _c4dbgp("blck: chomp=STRIP");
2305 chomp = CHOMP_STRIP;
2306 }
2307 else if(t[pos] == '+')
2308 {
2309 _c4dbgp("blck: chomp=KEEP");
2310 chomp = CHOMP_KEEP;
2311 }
2312 if(pos == 0)
2313 t = t.sub(1);
2314 else
2315 t = t.first(pos);
2316 _c4dbgpf("blck: spec is now: {}", prs_(t));
2317 }
2318 // from here to the end, only digits are considered
2319 pos = t.first_not_of("0123456789");
2320 csubstr rest = t.first(pos);
2321 if( ! rest.empty())
2322 {
2323 _c4dbgpf("blck: parse indentation digits: {}", prs_(rest));
2324 if C4_UNLIKELY(rest.len > 1)
2325 _c4err("parse error: invalid indentation");
2326 if C4_UNLIKELY( ! c4::atou(rest, &indentation))
2327 _c4err("parse error: could not read indentation as decimal"); // LCOV_EXCL_LINE
2328 if C4_UNLIKELY( ! indentation)
2329 _c4err("parse error: null indentation");
2330 _c4dbgpf("blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2331 indentation += m_evt_handler->m_curr->indref;
2332 }
2333 else
2334 {
2335 rest = t.triml(" \t");
2336 _c4dbgpf("blck: digits empty. t={} trimmed={} iscomm={} t.iscomm={}", prs_(t), prs_(rest), rest.begins_with('#'), t.begins_with('#'));
2337 if C4_UNLIKELY(rest.len && (rest.str[0] != '#' || t.str[0] == '#'))
2338 _c4err("parse error: invalid token");
2339 }
2340 }
2341
2342 _c4dbgpf("blck: style={} chomp={} indentation={}", s.begins_with('>') ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation);
2343
2344 // finish the current line
2345 _line_progressed(s.len);
2346 _line_ended();
2347 _scan_line();
2348
2349 // start with a zero-length block, already pointing at the right place
2350 substr raw_block(_buf().data() + m_evt_handler->m_curr->pos.offset, size_t(0));
2351 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.str, m_evt_handler->m_curr->pos);
2352
2353 // read every full line into a raw block,
2354 // from which newlines are to be stripped as needed.
2355 //
2356 // If no explicit indentation was given, pick it from the first
2357 // non-empty line. See
2358 // https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator
2359 size_t num_lines = 0;
2360 size_t first = m_evt_handler->m_curr->pos.line;
2361 size_t provisional_indentation = npos;
2362 LineContents lc;
2363 while(( ! _finished_file()))
2364 {
2365 // peek next line, but do not advance immediately
2366 lc.reset_with_next_line(_buf(), m_evt_handler->m_curr->pos.offset);
2367 #if defined(__GNUC__) && (__GNUC__ == 12 || __GNUC__ == 13)
2368 C4_DONT_OPTIMIZE(lc.rem);
2369 #endif
2370 _c4dbgpf("blck: peeking at {}", prs_(lc.rem.trimr("\r\n"), true));
2371 // evaluate termination conditions
2372 if(indentation != npos)
2373 {
2374 _c4dbgpf("blck: indentation={}", indentation);
2375 // stop when the line is deindented and not empty
2376 if(lc.indentation < indentation && ( ! lc.rem.trim(" \t").empty()))
2377 {
2378 if(raw_block.len)
2379 {
2380 _c4dbgpf("blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2381 }
2382 else
2383 {
2384 _c4err("indentation decreased without any scalar");
2385 }
2386 break;
2387 }
2388 else if(indentation == 0)
2389 {
2390 _c4dbgpf("blck: noindent. lc.rem={}", prs_(lc.rem));
2391 if(_is_doc_token(lc.rem))
2392 {
2393 _c4dbgp("blck: stop. indentation=0 and doc ended");
2394 break;
2395 }
2396 }
2397 }
2398 else
2399 {
2400 const size_t fns = lc.rem.first_not_of(' ');
2401 _c4dbgpf("blck: indentation ref not set. firstnonws={}", fns);
2402 if(fns != npos) // non-empty line
2403 {
2404 _c4dbgpf("blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2405 if C4_UNLIKELY(lc.full.begins_with('\t'))
2406 _c4err("parse error");
2407 if(provisional_indentation == npos)
2408 {
2409 if(lc.indentation < indref)
2410 {
2411 _c4dbgpf("blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2412 if(raw_block.len == 0)
2413 {
2414 _c4dbgp("blck: was empty, undo next line");
2415 _line_ended_undo();
2416 }
2417 break;
2418 }
2419 else if(lc.indentation == m_evt_handler->m_curr->indref)
2420 {
2421 if(has_any(RSEQ|RMAP))
2422 {
2423 _c4dbgpf("blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2424 break;
2425 }
2426 }
2427 _c4dbgpf("blck: set indentation ref from this line: ref={}", lc.indentation);
2428 indentation = lc.indentation;
2429 }
2430 else
2431 {
2432 if(lc.indentation >= provisional_indentation)
2433 {
2434 _c4dbgpf("blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2435 //indentation = provisional_indentation ? provisional_indentation : lc.indentation;
2436 indentation = lc.indentation;
2437 }
2438 else
2439 {
2440 if(lc.indentation >= indref)
2441 _c4err("parse error: first non-empty block line should have at least the original indentation");
2442 _c4dbgp("blck: finished");
2443 break;
2444 }
2445 }
2446 }
2447 else // empty line
2448 {
2449 _c4dbgpf("blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.rem.len, lc.indentation, provisional_indentation);
2450 if(provisional_indentation != npos)
2451 {
2452 if(lc.rem.len >= provisional_indentation)
2453 {
2454 _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.rem.len);
2455 provisional_indentation = lc.rem.len;
2456 }
2457 }
2458 else
2459 {
2460 provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL);
2461 _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2462 if(provisional_indentation == npos)
2463 {
2464 provisional_indentation = lc.rem.len ? lc.rem.len : has_any(RSEQ|RVAL);
2465 _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2466 }
2467 if(provisional_indentation < indref)
2468 {
2469 provisional_indentation = indref;
2470 _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2471 }
2472 }
2473 }
2474 }
2475 // advance now that we know the folded scalar continues
2476 m_evt_handler->m_curr->line_contents = lc;
2477 _c4dbgpf("blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2478 raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2479 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2480 _line_ended();
2481 ++num_lines;
2482 }
2483 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0), m_evt_handler->m_curr->pos);
2484 C4_UNUSED(num_lines);
2485 C4_UNUSED(first);
2486
2487 if(indentation == npos)
2488 {
2489 _c4dbgpf("blck: set indentation from provisional: {}", provisional_indentation);
2490 indentation = provisional_indentation;
2491 }
2492
2493 if(num_lines)
2494 _line_ended_undo();
2495
2496 _c4prscalar("scanned block", raw_block, /*keep_newlines*/true);
2497
2498 sb->scalar = raw_block;
2499 sb->indentation = indentation;
2500 sb->chomp = chomp;
2501}
2502
2503
2504//-----------------------------------------------------------------------------
2505//-----------------------------------------------------------------------------
2506//-----------------------------------------------------------------------------
2507/** @cond dev */
2508
2509// a debugging scaffold:
2510#if 0
2511#define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2512#else
2513#define _c4dbgfws(...)
2514#endif
2515
2516template<class EventHandler>
2517template<class FilterProcessor>
2519{
2520 _c4dbgfws("found whitespace '{}'", _c4prc(proc.curr()));
2521 RYML_ASSERT_PARSE_CB_(this->callbacks(), proc.curr() == ' ' || proc.curr() == '\t', m_evt_handler->m_curr->pos);
2522
2523 const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(" \t", proc.rpos) : proc.src.first_not_of(' ', proc.rpos);
2524 if(first_pos != npos)
2525 {
2526 const char first_char = proc.src[first_pos];
2527 _c4dbgfws("firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2528 if(first_char == '\n' || first_char == '\r') // skip trailing whitespace
2529 {
2530 _c4dbgfws("whitespace is trailing on line", "");
2531 proc.skip(first_pos - proc.rpos);
2532 }
2533 else // a legit whitespace
2534 {
2535 proc.copy();
2536 _c4dbgfws("legit whitespace. sofar={}", prs_(proc.sofar()));
2537 }
2538 return true;
2539 }
2540 _c4dbgfws("whitespace is trailing on line", "");
2541 return false;
2542}
2543
2544template<class EventHandler>
2545template<class FilterProcessor>
2547{
2548 if(!_filter_ws_handle_to_first_non_space(proc))
2549 {
2550 _c4dbgfws("... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2551 proc.copy(proc.src.len - proc.rpos);
2552 }
2553}
2554
2555template<class EventHandler>
2556template<class FilterProcessor>
2558{
2559 if(!_filter_ws_handle_to_first_non_space(proc))
2560 {
2561 _c4dbgfws("... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2562 proc.skip(proc.src.len - proc.rpos);
2563 }
2564}
2565
2566#undef _c4dbgfws
2567
2568
2569//-----------------------------------------------------------------------------
2570//-----------------------------------------------------------------------------
2571//-----------------------------------------------------------------------------
2572/* plain scalars */
2573
2574// a debugging scaffold:
2575#if 0
2576#define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2577#else
2578#define _c4dbgfps(fmt, ...)
2579#endif
2580
2581template<class EventHandler>
2582template<class FilterProcessor>
2583void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation)
2584{
2585 RYML_ASSERT_PARSE_CB_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
2586
2587 _c4dbgfps("found newline. sofar={}", prs_(proc.sofar()));
2588 size_t ii = proc.rpos;
2589 const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2590 if(numnl_following)
2591 {
2592 proc.set('\n', numnl_following);
2593 _c4dbgfps("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2594 }
2595 else
2596 {
2597 const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2598 if(ret != npos)
2599 {
2600 proc.set(' ');
2601 _c4dbgfps("single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, prs_(proc.sofar()));
2602 }
2603 else
2604 {
2605 _c4dbgfps("last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2606 ii = proc.src.len;
2607 }
2608 }
2609 proc.rpos = ii;
2610}
2611
2612template<class EventHandler>
2613template<class FilterProcessor>
2614auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation) -> decltype(proc.result())
2615{
2616 RYML_ASSERT_PARSE_CB_(this->callbacks(), indentation != npos, m_evt_handler->m_curr->pos);
2617 _c4dbgfps("before={}", prs_(proc.src));
2618
2619 while(proc.has_more_chars())
2620 {
2621 const char curr = proc.curr();
2622 _c4dbgfps("'{}', sofar={}", _c4prc(curr), prs_(proc.sofar()));
2623 switch(curr)
2624 {
2625 case ' ':
2626 RYML_WITH_TAB_TOKENS_(case '\t':)
2627 _c4dbgfps("whitespace", curr);
2628 _filter_ws_skip_trailing(proc);
2629 break;
2630 case '\n':
2631 _c4dbgfps("newline", curr);
2632 _filter_nl_plain(proc, /*indentation*/indentation);
2633 break;
2634 case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2635 _c4dbgfps("carriage return, ignore", curr);
2636 proc.skip();
2637 break;
2638 default:
2639 proc.copy();
2640 break;
2641 }
2642 }
2643
2644 _c4dbgfps("after={}", prs_(proc.sofar()));
2645
2646 return proc.result();
2647}
2648
2649#undef _c4dbgfps
2650
2651
2652template<class EventHandler>
2654{
2655 FilterProcessorSrcDst proc(scalar, dst);
2656 return _filter_plain(proc, indentation);
2657}
2658
2659template<class EventHandler>
2661{
2663 return _filter_plain(proc, indentation);
2664}
2665
2666
2667//-----------------------------------------------------------------------------
2668//-----------------------------------------------------------------------------
2669//-----------------------------------------------------------------------------
2670/* single quoted */
2671
2672// a debugging scaffold:
2673#if 0
2674#define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2675#else
2676#define _c4dbgfsq(fmt, ...)
2677#endif
2678
2679template<class EventHandler>
2680template<class FilterProcessor>
2681void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2682{
2683 RYML_ASSERT_PARSE_CB_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
2684
2685 _c4dbgfsq("found newline. sofar={}", prs_(proc.sofar()));
2686 size_t ii = proc.rpos;
2687 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2688 if(numnl_following)
2689 {
2690 proc.set('\n', numnl_following);
2691 _c4dbgfsq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2692 }
2693 else
2694 {
2695 const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2696 if(ret != npos)
2697 {
2698 proc.set(' ');
2699 _c4dbgfsq("single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, prs_(proc.sofar()));
2700 }
2701 else
2702 {
2703 proc.set(' ');
2704 _c4dbgfsq("single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, prs_(proc.sofar()));
2705 }
2706 }
2707 proc.rpos = ii;
2708}
2709
2710template<class EventHandler>
2711template<class FilterProcessor>
2712auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2713{
2714 _c4dbgfsq("before={}", prs_(proc.src));
2715
2716 // from the YAML spec for double-quoted scalars:
2717 // https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted
2718 while(proc.has_more_chars())
2719 {
2720 const char curr = proc.curr();
2721 _c4dbgfsq("'{}', sofar={}", _c4prc(curr), prs_(proc.sofar()));
2722 switch(curr)
2723 {
2724 case ' ':
2725 case '\t':
2726 _c4dbgfsq("whitespace", curr);
2727 _filter_ws_copy_trailing(proc);
2728 break;
2729 case '\n':
2730 _c4dbgfsq("newline", curr);
2731 _filter_nl_squoted(proc);
2732 break;
2733 case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2734 _c4dbgfsq("skip cr", curr);
2735 proc.skip();
2736 break;
2737 case '\'':
2738 _c4dbgfsq("squote", curr);
2739 if(proc.next() == '\'')
2740 {
2741 _c4dbgfsq("two consecutive squotes", curr);
2742 proc.skip();
2743 proc.copy();
2744 }
2745 else
2746 {
2747 _c4err("filter error");
2748 }
2749 break;
2750 default:
2751 proc.copy();
2752 break;
2753 }
2754 }
2755
2756 _c4dbgfsq(": #filteredchars={} after={}", proc.src.len-proc.sofar().len, prs_(proc.sofar()));
2757
2758 return proc.result();
2759}
2760
2761#undef _c4dbgfsq
2762
2763template<class EventHandler>
2765{
2766 FilterProcessorSrcDst proc(scalar, dst);
2767 return _filter_squoted(proc);
2768}
2769
2770template<class EventHandler>
2772{
2774 return _filter_squoted(proc);
2775}
2776
2777
2778//-----------------------------------------------------------------------------
2779//-----------------------------------------------------------------------------
2780//-----------------------------------------------------------------------------
2781/* double quoted */
2782
2783// a debugging scaffold:
2784#if 0
2785#define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2786#else
2787#define _c4dbgfdq(...)
2788#endif
2789
2790template<class EventHandler>
2791template<class FilterProcessor>
2792void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2793{
2794 RYML_ASSERT_PARSE_CB_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
2795
2796 _c4dbgfdq("found newline. sofar={}", prs_(proc.sofar()));
2797 size_t ii = proc.rpos;
2798 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2799 if(numnl_following)
2800 {
2801 proc.set('\n', numnl_following);
2802 _c4dbgfdq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2803 }
2804 else
2805 {
2806 const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2807 if(ret != npos)
2808 {
2809 proc.set(' ');
2810 _c4dbgfdq("single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, prs_(proc.sofar()));
2811 }
2812 else
2813 {
2814 proc.set(' ');
2815 _c4dbgfdq("single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, prs_(proc.sofar()));
2816 }
2817 if(ii < proc.src.len && proc.src.str[ii] == '\\')
2818 {
2819 _c4dbgfdq("backslash at [{}]", ii);
2820 const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] : '\0';
2821 if(next == ' ' || next == '\t')
2822 {
2823 _c4dbgfdq("extend skip to backslash", "");
2824 ++ii;
2825 }
2826 }
2827 }
2828 proc.rpos = ii;
2829}
2830
2831template<class EventHandler>
2832template<class FilterProcessor>
2833void ParseEngine<EventHandler>::_filter_dquoted_backslash_decode(FilterProcessor &C4_RESTRICT proc, size_t sz)
2834{
2835 const size_t szp1 = sz + 1u;
2836 if C4_UNLIKELY(proc.rpos + szp1 >= proc.src.len)
2837 _c4err("codepoint requires {} hex digits. scalar pos={}", sz, proc.rpos);
2838 char readbuf[8];
2839 csubstr codepoint = proc.src.sub(proc.rpos + 2u, sz);
2840 _c4dbgfdq("utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2841 uint32_t codepoint_val = {};
2842 if C4_UNLIKELY(!read_hex(codepoint, &codepoint_val))
2843 _c4err("failed to parse codepoint. scalar pos={}", proc.rpos);
2844 const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2845 if C4_UNLIKELY(numbytes == 0)
2846 _c4err("failed to decode code point={}", proc.rpos);
2847 RYML_ASSERT_PARSE_CB_(callbacks(), numbytes <= 4, m_evt_handler->m_curr->pos);
2848 proc.translate_esc_bulk(readbuf, numbytes, /*nread*/szp1);
2849 _c4dbgfdq("utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2850}
2851
2852template<class EventHandler>
2853template<class FilterProcessor>
2854void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2855{
2856 char next = proc.next();
2857 _c4dbgfdq("backslash, next='{}'", _c4prc(next));
2858 if(next == '\r')
2859 {
2860 if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] == '\n')
2861 {
2862 proc.skip(); // newline escaped with \ -- skip both (add only one as i is loop-incremented)
2863 next = '\n';
2864 _c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2865 }
2866 }
2867
2868 if(next == '\n')
2869 {
2870 size_t ii = proc.rpos + 2;
2871 for( ; ii < proc.src.len; ++ii)
2872 {
2873 // skip leading whitespace
2874 if(proc.src.str[ii] == ' ' || proc.src.str[ii] == '\t')
2875 ;
2876 else
2877 break;
2878 }
2879 proc.skip(ii - proc.rpos);
2880 }
2881 else if(next == '"' || next == '/' || next == ' ' || next == '\t')
2882 {
2883 // escapes for json compatibility
2884 proc.translate_esc(next);
2885 _c4dbgfdq("here, used '{}'", _c4prc(next));
2886 }
2887 else if(next == '\r')
2888 {
2889 proc.skip();
2890 }
2891 else if(next == 'n')
2892 {
2893 proc.translate_esc('\n');
2894 }
2895 else if(next == 'r')
2896 {
2897 proc.translate_esc('\r');
2898 }
2899 else if(next == 't')
2900 {
2901 proc.translate_esc('\t');
2902 }
2903 else if(next == '\\')
2904 {
2905 proc.translate_esc('\\');
2906 }
2907 else if(next == 'x') // 2-digit Unicode escape (\xXX), code point 0x00–0xFF
2908 {
2909 _filter_dquoted_backslash_decode(proc, 2u);
2910 }
2911 else if(next == 'u') // 4-digit Unicode escape (\uXXXX), code point 0x0000–0xFFFF
2912 {
2913 _filter_dquoted_backslash_decode(proc, 4u);
2914 }
2915 else if(next == 'U') // 8-digit Unicode escape (\UXXXXXXXX), full 32-bit code point
2916 {
2917 _filter_dquoted_backslash_decode(proc, 8u);
2918 }
2919 // https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char
2920 else if(next == '0')
2921 {
2922 proc.translate_esc('\0');
2923 }
2924 else if(next == 'b') // backspace
2925 {
2926 proc.translate_esc('\b');
2927 }
2928 else if(next == 'f') // form feed
2929 {
2930 proc.translate_esc('\f');
2931 }
2932 else if(next == 'a') // bell character
2933 {
2934 proc.translate_esc('\a');
2935 }
2936 else if(next == 'v') // vertical tab
2937 {
2938 proc.translate_esc('\v');
2939 }
2940 else if(next == 'e') // escape character
2941 {
2942 proc.translate_esc('\x1b');
2943 }
2944 else if(next == '_') // unicode non breaking space \u00a0
2945 {
2946 // https://www.compart.com/en/unicode/U+00a0
2947 const char payload[] = {
2948 RYML_CHCONST_(-0x3e, 0xc2),
2949 RYML_CHCONST_(-0x60, 0xa0),
2950 };
2951 proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2952 }
2953 else if(next == 'N') // unicode next line \u0085
2954 {
2955 // https://www.compart.com/en/unicode/U+0085
2956 const char payload[] = {
2957 RYML_CHCONST_(-0x3e, 0xc2),
2958 RYML_CHCONST_(-0x7b, 0x85),
2959 };
2960 proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2961 }
2962 else if(next == 'L') // unicode line separator \u2028
2963 {
2964 // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2965 const char payload[] = {
2966 RYML_CHCONST_(-0x1e, 0xe2),
2967 RYML_CHCONST_(-0x80, 0x80),
2968 RYML_CHCONST_(-0x58, 0xa8),
2969 };
2970 proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2971 }
2972 else if(next == 'P') // unicode paragraph separator \u2029
2973 {
2974 // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2975 const char payload[] = {
2976 RYML_CHCONST_(-0x1e, 0xe2),
2977 RYML_CHCONST_(-0x80, 0x80),
2978 RYML_CHCONST_(-0x57, 0xa9),
2979 };
2980 proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2981 }
2982 else if(next == '\0')
2983 {
2984 proc.skip();
2985 }
2986 else
2987 {
2988 _c4err("unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2989 }
2990 _c4dbgfdq("backslash...sofar={}", prs_(proc.sofar()));
2991}
2992
2993
2994template<class EventHandler>
2995template<class FilterProcessor>
2996auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2997{
2998 _c4dbgfdq("before={}", prs_(proc.src));
2999 // from the YAML spec for double-quoted scalars:
3000 // https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted
3001 while(proc.has_more_chars())
3002 {
3003 const char curr = proc.curr();
3004 _c4dbgfdq("'{}' sofar={}", _c4prc(curr), prs_(proc.sofar()));
3005 switch(curr)
3006 {
3007 case ' ':
3008 case '\t':
3009 {
3010 _c4dbgfdq("whitespace", curr);
3011 _filter_ws_copy_trailing(proc);
3012 break;
3013 }
3014 case '\n':
3015 {
3016 _c4dbgfdq("newline", curr);
3017 _filter_nl_dquoted(proc);
3018 break;
3019 }
3020 case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
3021 {
3022 _c4dbgfdq("carriage return, ignore", curr);
3023 proc.skip();
3024 break;
3025 }
3026 case '\\':
3027 {
3028 _filter_dquoted_backslash(proc);
3029 break;
3030 }
3031 default:
3032 {
3033 proc.copy();
3034 break;
3035 }
3036 }
3037 }
3038 _c4dbgfdq("after={}", prs_(proc.sofar()));
3039 return proc.result();
3040}
3041
3042#undef _c4dbgfdq
3043
3044
3045template<class EventHandler>
3047{
3048 FilterProcessorSrcDst proc(scalar, dst);
3049 return _filter_dquoted(proc);
3050}
3051
3052template<class EventHandler>
3054{
3056 return _filter_dquoted(proc);
3057}
3058
3059
3060//-----------------------------------------------------------------------------
3061//-----------------------------------------------------------------------------
3062//-----------------------------------------------------------------------------
3063// block filtering helpers
3064
3065template<class EventHandler>
3066template<class FilterProcessor>
3067void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp, size_t indentation)
3068{
3069 RYML_ASSERT_PARSE_CB_(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP, m_evt_handler->m_curr->pos);
3070 RYML_ASSERT_PARSE_CB_(this->callbacks(), proc.rem().first_not_of(" \n\r") == npos, m_evt_handler->m_curr->pos);
3071
3072 // a debugging scaffold:
3073 #if 0
3074 #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3075 #else
3076 #define _c4dbgchomp(...)
3077 #endif
3078
3079 // advance to the last line having spaces beyond the indentation
3080 {
3081 size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
3082 if(last != npos)
3083 {
3084 _c4dbgchomp("found newline and larger indentation. last={}", last);
3085 last = proc.rpos + last + size_t(1) + indentation; // last started at to-be-read.
3086 RYML_ASSERT_PARSE_CB_(this->callbacks(), last <= proc.src.len, m_evt_handler->m_curr->pos);
3087 // remove indentation spaces, copy the rest
3088 while((proc.rpos < last) && proc.has_more_chars())
3089 {
3090 const char curr = proc.curr();
3091 _c4dbgchomp("curr='{}'", _c4prc(curr));
3092 switch(curr)
3093 {
3094 case '\n':
3095 {
3096 _c4dbgchomp("newline! remlen={}", proc.rem().len);
3097 proc.copy();
3098 // are there spaces after the newline?
3099 csubstr at_next_line = proc.rem();
3100 if(at_next_line.begins_with(' '))
3101 {
3102 _c4dbgchomp("next line begins with spaces. indentation={}", indentation);
3103 // there are spaces.
3104 size_t first_non_space = at_next_line.first_not_of(' ');
3105 _c4dbgchomp("first_non_space={}", first_non_space);
3106 if(first_non_space == npos)
3107 {
3108 _c4dbgchomp("{} spaces, to the end", at_next_line.len);
3109 first_non_space = at_next_line.len;
3110 }
3111 if(first_non_space <= indentation)
3112 {
3113 _c4dbgchomp("skip spaces={}<=indentation={}", first_non_space, indentation);
3114 proc.skip(first_non_space);
3115 }
3116 else
3117 {
3118 _c4dbgchomp("skip indentation={}<spaces={}", indentation, first_non_space);
3119 proc.skip(indentation);
3120 // copy the spaces after the indentation
3121 _c4dbgchomp("copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
3122 proc.copy(first_non_space - indentation);
3123 }
3124 }
3125 break;
3126 }
3127 case '\r':
3128 proc.skip();
3129 break;
3130 }
3131 }
3132 }
3133 }
3134
3135 // from now on, we only have line ends (or indentation spaces)
3136 switch(chomp)
3137 {
3138 case CHOMP_CLIP:
3139 {
3140 bool had_one = false;
3141 while(proc.has_more_chars())
3142 {
3143 const char curr = proc.curr();
3144 _c4dbgchomp("CLIP: '{}'", _c4prc(curr));
3145 switch(curr)
3146 {
3147 case '\n':
3148 {
3149 _c4dbgchomp("copy newline!", curr);
3150 proc.copy();
3151 proc.set_at_end();
3152 had_one = true;
3153 break;
3154 }
3155 case ' ':
3156 case '\r':
3157 _c4dbgchomp("skip!", curr);
3158 proc.skip();
3159 break;
3160 }
3161 }
3162 if(!had_one) // there were no newline characters. add one.
3163 {
3164 _c4dbgchomp("chomp=CLIP: add missing newline @{}", proc.wpos);
3165 proc.set('\n');
3166 }
3167 break;
3168 }
3169 case CHOMP_KEEP:
3170 {
3171 _c4dbgchomp("chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
3172 while(proc.has_more_chars())
3173 {
3174 const char curr = proc.curr();
3175 _c4dbgchomp("KEEP: '{}'", _c4prc(curr));
3176 switch(curr)
3177 {
3178 case '\n':
3179 _c4dbgchomp("copy newline!", curr);
3180 proc.copy();
3181 break;
3182 case ' ':
3183 case '\r':
3184 _c4dbgchomp("skip!", curr);
3185 proc.skip();
3186 break;
3187 }
3188 }
3189 break;
3190 }
3191 case CHOMP_STRIP:
3192 {
3193 _c4dbgchomp("chomp=STRIP: strip {} characters", proc.rem().len);
3194 // nothing to do!
3195 break;
3196 }
3197 }
3198
3199 #undef _c4dbgchomp
3200}
3201
3202
3203// a debugging scaffold:
3204#if 0
3205#define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3206#else
3207#define _c4dbgfb(...)
3208#endif
3209
3210template<class EventHandler>
3211template<class FilterProcessor>
3212void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc, size_t indentation)
3213{
3214 csubstr rem = proc.rem(); // remaining
3215 if(rem.len)
3216 {
3217 size_t first = rem.first_not_of(' ');
3218 if(first != npos)
3219 {
3220 _c4dbgfb("{} spaces follow before next nonws character", first);
3221 if(first < indentation)
3222 {
3223 _c4dbgfb("skip {}<{} spaces from indentation", first, indentation);
3224 proc.skip(first);
3225 }
3226 else
3227 {
3228 _c4dbgfb("skip {} spaces from indentation", indentation);
3229 proc.skip(indentation);
3230 }
3231 }
3232 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
3233 else
3234 {
3235 _c4dbgfb("all spaces to the end: {} spaces", first);
3236 first = rem.len;
3237 if(first)
3238 {
3239 if(first < indentation)
3240 {
3241 _c4dbgfb("skip everything", first);
3242 proc.skip(proc.src.len - proc.rpos);
3243 }
3244 else
3245 {
3246 _c4dbgfb("skip {} spaces from indentation", indentation);
3247 proc.skip(indentation);
3248 }
3249 }
3250 }
3251 #endif
3252 }
3253}
3254
3255template<class EventHandler>
3256template<class FilterProcessor>
3257size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
3258{
3259 csubstr contents = proc.src.trimr(" \n\r");
3260 _c4dbgfb("ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
3261 if(!contents.len)
3262 {
3263 _c4dbgfb("ws: all whitespace: len={}", proc.src.len);
3264 if(chomp == CHOMP_KEEP && proc.src.len)
3265 {
3266 _c4dbgfb("ws: chomp=KEEP all {} newlines", proc.src.count('\n'));
3267 while(proc.has_more_chars())
3268 {
3269 const char curr = proc.curr();
3270 if(curr == '\n')
3271 proc.copy();
3272 else
3273 proc.skip();
3274 }
3275 if(!proc.wpos)
3276 {
3277 proc.set('\n');
3278 }
3279 }
3280 }
3281 return contents.len;
3282}
3283
3284template<class EventHandler>
3285template<class FilterProcessor>
3286size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc, size_t contents_len)
3287{
3288 _c4dbgfb("contents_len={}", contents_len);
3289
3290 RYML_ASSERT_PARSE_CB_(this->callbacks(), contents_len > 0u, m_evt_handler->m_curr->pos);
3291
3292 // extend contents to just before the first newline at the end,
3293 // in case it is preceded by spaces
3294 size_t firstnewl = proc.src.first_of('\n', contents_len);
3295 if(firstnewl != npos)
3296 {
3297 contents_len = firstnewl;
3298 _c4dbgfb("contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3299 }
3300 else
3301 {
3302 contents_len = proc.src.len;
3303 _c4dbgfb("contents_len={} <--- src.len={}", contents_len, proc.src.len);
3304 }
3305
3306 return contents_len;
3307}
3308
3309#undef _c4dbgfb
3310
3311
3312//-----------------------------------------------------------------------------
3313//-----------------------------------------------------------------------------
3314//-----------------------------------------------------------------------------
3315
3316// a debugging scaffold:
3317#if 0
3318#define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3319#else
3320#define _c4dbgfbl(...)
3321#endif
3322
3323template<class EventHandler>
3324template<class FilterProcessor>
3325auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3326{
3327 _c4dbgfbl("indentation={} before={}", indentation, prs_(proc.src));
3328
3329 size_t contents_len = _handle_all_whitespace(proc, chomp);
3330 if(!contents_len)
3331 return proc.result();
3332
3333 contents_len = _extend_to_chomp(proc, contents_len);
3334
3335 _c4dbgfbl("to filter={}", prs_(proc.src.first(contents_len)));
3336
3337 _filter_block_indentation(proc, indentation);
3338
3339 // now filter the bulk
3340 while(proc.has_more_chars(/*maxpos*/contents_len))
3341 {
3342 const char curr = proc.curr();
3343 _c4dbgfbl("'{}' sofar={}", _c4prc(curr), prs_(proc.sofar()));
3344 switch(curr)
3345 {
3346 case '\n':
3347 {
3348 _c4dbgfbl("found newline. skip indentation on the next line", curr);
3349 proc.copy(); // copy the newline
3350 _filter_block_indentation(proc, indentation);
3351 break;
3352 }
3353 case '\r':
3354 proc.skip();
3355 break;
3356 default:
3357 proc.copy();
3358 break;
3359 }
3360 }
3361
3362 _c4dbgfbl("before chomp: #tochomp={} sofar={}", proc.rem().len, prs_(proc.sofar()));
3363
3364 _filter_chomp(proc, chomp, indentation);
3365
3366 _c4dbgfbl("final={}", prs_(proc.sofar()));
3367
3368 return proc.result();
3369}
3370
3371#undef _c4dbgfbl
3372
3373template<class EventHandler>
3374FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3375{
3376 FilterProcessorSrcDst proc(scalar, dst);
3377 return _filter_block_literal(proc, indentation, chomp);
3378}
3379
3380template<class EventHandler>
3381FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3382{
3383 FilterProcessorInplaceEndExtending proc(scalar, cap);
3384 return _filter_block_literal(proc, indentation, chomp);
3385}
3386
3387
3388//-----------------------------------------------------------------------------
3389//-----------------------------------------------------------------------------
3390//-----------------------------------------------------------------------------
3391
3392// a debugging scaffold:
3393#if 0
3394#define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3395#else
3396#define _c4dbgfbf(...)
3397#endif
3398
3399
3400template<class EventHandler>
3401template<class FilterProcessor>
3402void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3403{
3404 _filter_block_indentation(proc, indentation);
3405 while(proc.has_more_chars(len))
3406 {
3407 const char curr = proc.curr();
3408 _c4dbgfbf("'{}' sofar={}", _c4prc(curr), prs_(proc.sofar()));
3409 switch(curr)
3410 {
3411 case '\n':
3412 _c4dbgfbf("newline.", curr);
3413 proc.copy();
3414 _filter_block_indentation(proc, indentation);
3415 break;
3416 case '\r':
3417 proc.skip();
3418 break;
3419 case ' ':
3420 case '\t':
3421 {
3422 size_t first = proc.rem().first_not_of(" \t");
3423 _c4dbgfbf("space. first={}", first);
3424 if(first == npos)
3425 first = proc.rem().len;
3426 _c4dbgfbf("... indentation increased to {}", first);
3427 _filter_block_folded_indented_block(proc, indentation, len, first);
3428 break;
3429 }
3430 default:
3431 _c4dbgfbf("newl leading: not space, not newline. stop.", 0);
3432 return;
3433 }
3434 }
3435}
3436
3437template<class EventHandler>
3438template<class FilterProcessor>
3439size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc, size_t num_newl, size_t wpos_at_first_newl)
3440{
3441 switch(num_newl)
3442 {
3443 case 1u:
3444 _c4dbgfbf("... this is the first newline. turn into space. wpos={}", proc.wpos);
3445 wpos_at_first_newl = proc.wpos;
3446 proc.skip();
3447 proc.set(' ');
3448 break;
3449 case 2u:
3450 _c4dbgfbf("... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3451 RYML_ASSERT_PARSE_CB_(this->callbacks(), wpos_at_first_newl != npos, m_evt_handler->m_curr->pos);
3452 RYML_ASSERT_PARSE_CB_(this->callbacks(), proc.sofar()[wpos_at_first_newl] == ' ', m_evt_handler->m_curr->pos);
3453 RYML_ASSERT_PARSE_CB_(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos, m_evt_handler->m_curr->pos);
3454 proc.skip();
3455 proc.set_at(wpos_at_first_newl, '\n');
3456 RYML_ASSERT_PARSE_CB_(this->callbacks(), proc.sofar()[wpos_at_first_newl] == '\n', m_evt_handler->m_curr->pos);
3457 break;
3458 default:
3459 _c4dbgfbf("... subsequent newline (num_newl={}). copy", num_newl);
3460 proc.copy();
3461 break;
3462 }
3463 return wpos_at_first_newl;
3464}
3465
3466template<class EventHandler>
3467template<class FilterProcessor>
3468void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3469{
3470 RYML_ASSERT_PARSE_CB_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
3471 size_t num_newl = 0;
3472 size_t wpos_at_first_newl = npos;
3473 while(proc.has_more_chars(len))
3474 {
3475 const char curr = proc.curr();
3476 _c4dbgfbf("'{}' sofar={}", _c4prc(curr), prs_(proc.sofar()));
3477 switch(curr)
3478 {
3479 case '\n':
3480 {
3481 _c4dbgfbf("newline. sofar={}", num_newl);
3482 // NOTE: vs2022-32bit-release builds were giving wrong
3483 // results in this block, if it was written as either
3484 // as a switch(num_newl) or its equivalent if-form.
3485 //
3486 // For this reason, we're using a dedicated function
3487 // (**_compress), which seems to work around the issue.
3488 //
3489 // The manifested problem was that somewhere between the
3490 // assignment to curr and this point, proc.wpos (the
3491 // write-position of the processor) jumped to npos, which
3492 // made the write wrap-around! To make things worse,
3493 // enabling prints via _c4dbgpf() and _c4dbgfbf() made the
3494 // problem go away!
3495 //
3496 // The only way to make the problem appear with prints
3497 // enabled was by disabling all prints in this function
3498 // (including in the block which was moved to the compress
3499 // function) and then selectively enabling only some of
3500 // those prints.
3501 //
3502 // This may be due to some bug in the cl-x86 optimizer; or
3503 // it may be triggered by some UB which may be
3504 // inadvertedly present in this function or in the filter
3505 // processor. This is despite our best efforts to weed out
3506 // any such UB problem: neither clang-tidy nor none of the
3507 // sanitizers, or gcc's -fanalyzer pointed to any problems
3508 // in this code.
3509 //
3510 // In the end, moving this block to a separate function
3511 // was the only way to bury the problem. But it may
3512 // resurface again, as The Undead, rising to from the
3513 // grave to haunt us with his terrible presence.
3514 //
3515 // We may have to revisit this. With a stake, and lots of
3516 // garlic.
3517 wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3518 _filter_block_indentation(proc, indentation);
3519 break;
3520 }
3521 case ' ':
3522 case '\t':
3523 {
3524 size_t first = proc.rem().first_not_of(" \t");
3525 _c4dbgfbf("space. first={}", first);
3526 if(first == npos)
3527 first = proc.rem().len;
3528 _c4dbgfbf("... indentation increased to {}", first);
3529 if(num_newl)
3530 {
3531 _c4dbgfbf("... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3532 proc.set_at(wpos_at_first_newl, '\n');
3533 }
3534 if(num_newl > 1u)
3535 {
3536 _c4dbgfbf("... add missing newline", wpos_at_first_newl);
3537 proc.set('\n');
3538 }
3539 _filter_block_folded_indented_block(proc, indentation, len, first);
3540 num_newl = 0;
3541 wpos_at_first_newl = npos;
3542 break;
3543 }
3544 case '\r':
3545 proc.skip();
3546 break;
3547 default:
3548 _c4dbgfbf("not space, not newline. stop.", 0);
3549 return;
3550 }
3551 }
3552}
3553
3554
3555template<class EventHandler>
3556template<class FilterProcessor>
3557void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len, size_t curr_indentation) noexcept
3558{
3559 RYML_ASSERT_PARSE_CB_(this->callbacks(), (proc.rem().first_not_of(" \t") == curr_indentation) || (proc.rem().first_not_of(" \t") == npos), m_evt_handler->m_curr->pos);
3560 if(curr_indentation)
3561 proc.copy(curr_indentation);
3562 while(proc.has_more_chars(len))
3563 {
3564 const char curr = proc.curr();
3565 _c4dbgfbf("'{}' sofar={}", _c4prc(curr), prs_(proc.sofar()));
3566 switch(curr)
3567 {
3568 case '\n':
3569 {
3570 proc.copy();
3571 _filter_block_indentation(proc, indentation);
3572 csubstr rem = proc.rem();
3573 const size_t first = rem.first_not_of(' ');
3574 _c4dbgfbf("newline. firstns={}", first);
3575 if(first == 0)
3576 {
3577 const char c = rem[first];
3578 _c4dbgfbf("firstns={}='{}'", first, _c4prc(c));
3579 if(c != '\n' && c != '\r')
3580 {
3581 _c4dbgfbf("done with indented block", first);
3582 goto endloop;
3583 }
3584 }
3585 else if(first != npos)
3586 {
3587 proc.copy(first);
3588 _c4dbgfbf("copy all {} spaces", first);
3589 }
3590 break;
3591 }
3592 break;
3593 case '\r':
3594 proc.skip();
3595 break;
3596 default:
3597 proc.copy();
3598 break;
3599 }
3600 }
3601 endloop:
3602 return;
3603}
3604
3605
3606template<class EventHandler>
3607template<class FilterProcessor>
3608auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3609{
3610 _c4dbgfbf("indentation={} before={}", indentation, prs_(proc.src));
3611
3612 size_t contents_len = _handle_all_whitespace(proc, chomp);
3613 if(!contents_len)
3614 return proc.result();
3615
3616 contents_len = _extend_to_chomp(proc, contents_len);
3617
3618 _c4dbgfbf("to filter={}", prs_(proc.src.first(contents_len)));
3619
3620 _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3621
3622 // now filter the bulk
3623 while(proc.has_more_chars(/*maxpos*/contents_len))
3624 {
3625 const char curr = proc.curr();
3626 _c4dbgfbf("'{}' sofar={}", _c4prc(curr), prs_(proc.sofar()));
3627 switch(curr)
3628 {
3629 case '\n':
3630 {
3631 _c4dbgfbf("found newline", curr);
3632 _filter_block_folded_newlines(proc, indentation, contents_len);
3633 break;
3634 }
3635 case '\r':
3636 proc.skip();
3637 break;
3638 default:
3639 proc.copy();
3640 break;
3641 }
3642 }
3643
3644 _c4dbgfbf("before chomp: #tochomp={} sofar={}", proc.rem().len, prs_(proc.sofar()));
3645
3646 _filter_chomp(proc, chomp, indentation);
3647
3648 _c4dbgfbf("final={}", proc.sofar().len, prs_(proc.sofar()));
3649
3650 return proc.result();
3651}
3652
3653#undef _c4dbgfbf
3654
3655template<class EventHandler>
3656FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3657{
3658 FilterProcessorSrcDst proc(scalar, dst);
3659 return _filter_block_folded(proc, indentation, chomp);
3660}
3661
3662template<class EventHandler>
3663FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3664{
3665 FilterProcessorInplaceEndExtending proc(scalar, cap);
3666 return _filter_block_folded(proc, indentation, chomp);
3667}
3668
3669
3670//-----------------------------------------------------------------------------
3671//-----------------------------------------------------------------------------
3672//-----------------------------------------------------------------------------
3673
3674template<class EventHandler>
3676{
3677 _c4dbgpf("filtering plain scalar: s={}", prs_(s));
3678 FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3679 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, r.valid(), m_evt_handler->m_curr->pos);
3680 _c4dbgpf("filtering plain scalar: success! s={}", prs_(r.get()));
3681 return r.get();
3682}
3683
3684//-----------------------------------------------------------------------------
3685
3686template<class EventHandler>
3688{
3689 _c4dbgpf("filtering squo scalar: s={}", prs_(s));
3690 FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3691 RYML_ASSERT_PARSE_CB_(this->callbacks(), r.valid(), m_evt_handler->m_curr->pos);
3692 _c4dbgpf("filtering squo scalar: success! s={}", prs_(r.get()));
3693 return r.get();
3694}
3695
3696
3697//-----------------------------------------------------------------------------
3698
3699template<class EventHandler>
3701{
3702 _c4dbgpf("filtering dquo scalar: s={}", prs_(s));
3703 FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3704 if C4_LIKELY(r.valid())
3705 {
3706 _c4dbgpf("filtering dquo scalar: success! s={}", prs_(r.get()));
3707 return r.get();
3708 }
3709 else
3710 {
3711 const size_t len = r.required_len();
3712 _c4dbgpf("filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3713 substr dst = _alloc_arena(len, &s);
3714 _c4dbgpf("filtering dquo scalar: dst.len={}", dst.len);
3715 if(dst.str)
3716 {
3717 RYML_ASSERT_PARSE_CB_(this->callbacks(), dst.len == len, m_evt_handler->m_curr->pos);
3718 FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3719 _c4dbgpf("filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3720 RYML_ASSERT_PARSE_CB_(this->callbacks(), rsd.required_len() <= len, m_evt_handler->m_curr->pos); // may be smaller!
3721 RYML_CHECK_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, rsd.valid(), m_evt_handler->m_curr->pos);
3722 _c4dbgpf("filtering dquo scalar: success! s={}", prs_(rsd.get()));
3723 return rsd.get();
3724 }
3725 return dst;
3726 }
3727}
3728
3729
3730//-----------------------------------------------------------------------------
3731
3732template<class EventHandler>
3734{
3735 if(s.is_sub(_buf()))
3736 {
3737 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.str > _buf().str, m_evt_handler->m_curr->pos);
3738 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, s.str-1 >= _buf().str, m_evt_handler->m_curr->pos);
3739 if(s.len)
3740 memmove(s.str - 1, s.str, s.len);
3741 --s.str;
3742 s.str[s.len] = '\n';
3743 ++s.len;
3744 return s;
3745 }
3746 else
3747 {
3748 substr dst = _alloc_arena(s.len + 1, &s);
3749 if(s.len)
3750 memcpy(dst.str, s.str, s.len);
3751 dst[s.len] = '\n';
3752 return dst;
3753 }
3754}
3755
3756template<class EventHandler>
3757csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s, size_t indentation, BlockChomp_e chomp)
3758{
3759 _c4dbgpf("filtering block literal scalar: s={}", prs_(s));
3760 FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3761 csubstr result;
3762 if C4_LIKELY(r.valid())
3763 {
3764 result = r.get();
3765 }
3766 else
3767 {
3768 _c4dbgpf("filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3769 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3770 // this can only happen when adding a single newline in clip mode.
3771 // so we shift left the scalar by one place
3772 result = _move_scalar_left_and_add_newline(s);
3773 }
3774 _c4dbgpf("filtering block literal scalar: success! s={}", prs_(result));
3775 return result;
3776}
3777
3778
3779//-----------------------------------------------------------------------------
3780template<class EventHandler>
3781csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s, size_t indentation, BlockChomp_e chomp)
3782{
3783 _c4dbgpf("filtering block folded scalar: s={}", prs_(s));
3784 FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3785 csubstr result;
3786 if C4_LIKELY(r.valid())
3787 {
3788 result = r.get();
3789 }
3790 else
3791 {
3792 _c4dbgpf("filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3793 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3794 // this can only happen when adding a single newline in clip mode.
3795 // so we shift left the scalar by one place
3796 result = _move_scalar_left_and_add_newline(s);
3797 }
3798 _c4dbgpf("filtering block folded scalar: success! s={}", prs_(result));
3799 return result;
3800}
3801
3802
3803//-----------------------------------------------------------------------------
3804
3805template<class EventHandler>
3806csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3807{
3808 if(sc.needs_filter)
3809 {
3810 if(m_options.scalar_filtering())
3811 {
3812 return _filter_scalar_plain(sc.scalar, indentation);
3813 }
3814 else
3815 {
3816 _c4dbgp("plain scalar left unfiltered");
3817 m_evt_handler->mark_key_scalar_unfiltered();
3818 }
3819 }
3820 else
3821 {
3822 _c4dbgp("plain scalar doesn't need filtering");
3823 }
3824 return sc.scalar;
3825}
3826
3827template<class EventHandler>
3828csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3829{
3830 if(sc.needs_filter)
3831 {
3832 if(m_options.scalar_filtering())
3833 {
3834 return _filter_scalar_plain(sc.scalar, indentation);
3835 }
3836 else
3837 {
3838 _c4dbgp("plain scalar left unfiltered");
3839 m_evt_handler->mark_val_scalar_unfiltered();
3840 }
3841 }
3842 else
3843 {
3844 _c4dbgp("plain scalar doesn't need filtering");
3845 }
3846 return sc.scalar;
3847}
3848
3849
3850//-----------------------------------------------------------------------------
3851
3852template<class EventHandler>
3853csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3854{
3855 if(sc.needs_filter)
3856 {
3857 if(m_options.scalar_filtering())
3858 {
3859 return _filter_scalar_squot(sc.scalar);
3860 }
3861 else
3862 {
3863 _c4dbgp("squo key scalar left unfiltered");
3864 m_evt_handler->mark_key_scalar_unfiltered();
3865 }
3866 }
3867 else
3868 {
3869 _c4dbgp("squo key scalar doesn't need filtering");
3870 }
3871 return sc.scalar;
3872}
3873
3874template<class EventHandler>
3875csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3876{
3877 if(sc.needs_filter)
3878 {
3879 if(m_options.scalar_filtering())
3880 {
3881 return _filter_scalar_squot(sc.scalar);
3882 }
3883 else
3884 {
3885 _c4dbgp("squo val scalar left unfiltered");
3886 m_evt_handler->mark_val_scalar_unfiltered();
3887 }
3888 }
3889 else
3890 {
3891 _c4dbgp("squo val scalar doesn't need filtering");
3892 }
3893 return sc.scalar;
3894}
3895
3896
3897//-----------------------------------------------------------------------------
3898
3899template<class EventHandler>
3900csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3901{
3902 if(sc.needs_filter)
3903 {
3904 if(m_options.scalar_filtering())
3905 {
3906 return _filter_scalar_dquot(sc.scalar);
3907 }
3908 else
3909 {
3910 _c4dbgp("dquo scalar left unfiltered");
3911 m_evt_handler->mark_key_scalar_unfiltered();
3912 }
3913 }
3914 else
3915 {
3916 _c4dbgp("dquo scalar doesn't need filtering");
3917 }
3918 return sc.scalar;
3919}
3920
3921template<class EventHandler>
3922csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3923{
3924 if(sc.needs_filter)
3925 {
3926 if(m_options.scalar_filtering())
3927 {
3928 return _filter_scalar_dquot(sc.scalar);
3929 }
3930 else
3931 {
3932 _c4dbgp("dquo scalar left unfiltered");
3933 m_evt_handler->mark_val_scalar_unfiltered();
3934 }
3935 }
3936 else
3937 {
3938 _c4dbgp("dquo scalar doesn't need filtering");
3939 }
3940 return sc.scalar;
3941}
3942
3943
3944//-----------------------------------------------------------------------------
3945
3946template<class EventHandler>
3948{
3949 if(m_options.scalar_filtering())
3950 {
3951 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3952 }
3953 else
3954 {
3955 _c4dbgp("literal scalar left unfiltered");
3956 m_evt_handler->mark_key_scalar_unfiltered();
3957 }
3958 return sb.scalar;
3959}
3960
3961template<class EventHandler>
3963{
3964 if(m_options.scalar_filtering())
3965 {
3966 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3967 }
3968 else
3969 {
3970 _c4dbgp("literal scalar left unfiltered");
3971 m_evt_handler->mark_val_scalar_unfiltered();
3972 }
3973 return sb.scalar;
3974}
3975
3976
3977//-----------------------------------------------------------------------------
3978
3979template<class EventHandler>
3981{
3982 if(m_options.scalar_filtering())
3983 {
3984 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
3985 }
3986 else
3987 {
3988 _c4dbgp("folded scalar left unfiltered");
3989 m_evt_handler->mark_key_scalar_unfiltered();
3990 }
3991 return sb.scalar;
3992}
3993
3994template<class EventHandler>
3996{
3997 if(m_options.scalar_filtering())
3998 {
3999 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
4000 }
4001 else
4002 {
4003 _c4dbgp("folded scalar left unfiltered");
4004 m_evt_handler->mark_val_scalar_unfiltered();
4005 }
4006 return sb.scalar;
4007}
4008
4009
4010//-----------------------------------------------------------------------------
4011//-----------------------------------------------------------------------------
4012//-----------------------------------------------------------------------------
4013
4014#ifdef RYML_DBG // !!! <----------------------------------
4015
4016template<class EventHandler>
4017void ParseEngine<EventHandler>::add_flags(ParserFlag_t on)
4018{
4019 ParserState *s = m_evt_handler->m_curr;
4020 char buf1_[64], buf2_[64], buf3_[64];
4021 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4022 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4023 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
4024 _c4dbgpf("state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
4025 s->flags |= on;
4026}
4027
4028template<class EventHandler>
4029void ParseEngine<EventHandler>::addrem_flags(ParserFlag_t on, ParserFlag_t off)
4030{
4031 ParserState *s = m_evt_handler->m_curr;
4032 char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
4033 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4034 csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
4035 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
4036 csubstr buf4 = detail::_parser_flags_to_str(buf4_, (~off)&((s->flags|on)));
4037 _c4dbgpf("state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
4038 RYML_ASSERT_BASIC_((on & off) == ParserFlag_t(0));
4039 s->flags &= ~off;
4040 s->flags |= on;
4041}
4042
4043template<class EventHandler>
4044void ParseEngine<EventHandler>::rem_flags(ParserFlag_t off)
4045{
4046 ParserState *s = m_evt_handler->m_curr;
4047 char buf1_[64], buf2_[64], buf3_[64];
4048 csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
4049 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4050 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
4051 _c4dbgpf("state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
4052 s->flags &= ~off;
4053}
4054
4055inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf, ParserFlag_t flags)
4056{
4057 size_t pos = 0;
4058 bool gotone = false;
4059
4060 #define _prflag(fl) \
4061 if((flags & fl) == (fl)) \
4062 { \
4063 if(gotone) \
4064 { \
4065 if(pos + 1 < buf.len) \
4066 buf[pos] = '|'; \
4067 ++pos; \
4068 } \
4069 csubstr fltxt = #fl; \
4070 if(pos + fltxt.len <= buf.len) \
4071 memcpy(buf.str + pos, fltxt.str, fltxt.len); \
4072 pos += fltxt.len; \
4073 gotone = true; \
4074 }
4075
4076 _prflag(RTOP);
4077 _prflag(RUNK);
4078 _prflag(RMAP);
4079 _prflag(RSEQ);
4080 _prflag(RFLOW);
4081 _prflag(RBLCK);
4082 _prflag(QMRK);
4083 _prflag(RKEY);
4084 _prflag(RVAL);
4085 _prflag(RKCL);
4086 _prflag(RNXT);
4087 _prflag(SSCL);
4088 _prflag(QSCL);
4089 _prflag(RSET);
4090 _prflag(RDOC);
4091 _prflag(NDOC);
4092 _prflag(USTY);
4093 _prflag(RSEQIMAP);
4094
4095 #undef _prflag
4096
4097 if(pos == 0)
4098 if(buf.len > 0)
4099 buf[pos++] = '0';
4100
4101 RYML_CHECK_BASIC_(pos <= buf.len);
4102
4103 return buf.first(pos);
4104}
4105
4106#endif // RYML_DBG !!! <----------------------------------
4107
4108
4109//-----------------------------------------------------------------------------
4110//-----------------------------------------------------------------------------
4111//-----------------------------------------------------------------------------
4112
4113template<class EventHandler>
4115{
4116 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, loc.offset < _buf().len);
4117 return _buf().sub(loc.offset);
4118}
4119
4120template<class EventHandler>
4122{
4123 if C4_UNLIKELY(val == nullptr)
4124 return {m_evt_handler->m_curr->pos.name, 0, 0, 0};
4125 RYML_CHECK_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4126 // NOTE: if any of these checks fails, the parser needs to be
4127 // instantiated with locations enabled.
4128 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4129 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
4130 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets != nullptr);
4131 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
4132 // NOTE: the pointer needs to belong to the buffer that was used to parse.
4133 csubstr src = _buf();
4134 RYML_CHECK_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, val != nullptr || src.str == nullptr);
4135 RYML_CHECK_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr));
4136 // ok. search the first stored newline after the given ptr
4137 using lineptr_type = size_t const* C4_RESTRICT;
4138 lineptr_type lineptr = nullptr;
4139 size_t offset = (size_t)(val - src.begin());
4140 if(m_newline_offsets_size < RYML_LOCATIONS_SMALL_THRESHOLD)
4141 {
4142 // just do a linear search if the size is small.
4143 for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
4144 {
4145 if(*curr > offset)
4146 {
4147 lineptr = curr;
4148 break;
4149 }
4150 }
4151 }
4152 else
4153 {
4154 // do a bisection search if the size is not small.
4155 //
4156 // We could use std::lower_bound but this is simple enough and
4157 // spares the costly include of <algorithm>.
4158 size_t count = m_newline_offsets_size;
4159 lineptr = m_newline_offsets;
4160 while(count)
4161 {
4162 size_t step = count >> 1;
4163 lineptr_type it = lineptr + step;
4164 if(*it < offset)
4165 {
4166 lineptr = ++it;
4167 count -= step + 1;
4168 }
4169 else
4170 {
4171 count = step;
4172 }
4173 }
4174 }
4175 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, lineptr);
4176 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
4177 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
4178 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, lineptr && (*lineptr > offset));
4179 Location loc;
4180 loc.name = m_evt_handler->m_curr->pos.name;
4181 loc.offset = offset;
4182 loc.line = (size_t)(lineptr - m_newline_offsets);
4183 if(lineptr > m_newline_offsets)
4184 loc.col = (offset - *(lineptr-1) - 1u);
4185 else
4186 loc.col = offset;
4187 return loc;
4188}
4189
4190template<class EventHandler>
4191void ParseEngine<EventHandler>::_prepare_locations()
4192{
4193 csubstr src = _buf();
4194 size_t numnewlines = 1u + src.count('\n');
4195 _resize_locations(numnewlines);
4196 m_newline_offsets_size = 0;
4197 for(size_t i = 0; i < src.len; i++)
4198 if(src.str[i] == '\n')
4199 m_newline_offsets[m_newline_offsets_size++] = i; // NOLINT
4200 m_newline_offsets[m_newline_offsets_size++] = src.len; // NOLINT
4201 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4202}
4203
4204template<class EventHandler>
4205void ParseEngine<EventHandler>::_resize_locations(size_t numnewlines)
4206{
4207 numnewlines = numnewlines >= 16 ? numnewlines : 16;
4208 if(numnewlines > m_newline_offsets_capacity)
4209 {
4210 if(m_newline_offsets)
4211 RYML_CB_FREE_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
4212 m_newline_offsets = RYML_CB_ALLOC_HINT_(m_evt_handler->m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets);
4213 m_newline_offsets_capacity = numnewlines;
4214 }
4215}
4216
4217template<class EventHandler>
4218bool ParseEngine<EventHandler>::_locations_dirty() const
4219{
4220 return !m_newline_offsets_size;
4221}
4222
4223
4224//-----------------------------------------------------------------------------
4225//-----------------------------------------------------------------------------
4226//-----------------------------------------------------------------------------
4227
4228template<class EventHandler>
4229void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4230{
4231 // don't assign to csubstr rem: otherwise, gcc12,13,14 -O3 -m32 misbuilds
4232 if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4233 {
4234 if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
4235 {
4236 _c4dbgpf("starts with whitespace: '{}'", _c4prc(m_evt_handler->m_curr->line_contents.rem.str[0]));
4237 _skipchars(" \t");
4238 }
4239 // comments
4240 if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
4241 {
4242 _c4dbgpf("it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4243 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4244 }
4245 }
4246}
4247
4248
4249template<class EventHandler>
4250void ParseEngine<EventHandler>::_handle_flow_line_beginning()
4251{
4252 _c4dbgpf("flow: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
4253 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
4254 if C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt())
4255 {
4256 csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation);
4257 _c4dbgpf("flow: after indentation={}", prs_(trimmed));
4258 if(trimmed.len && trimmed.triml(" \t").len)
4259 {
4260 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
4261 _c4err("bad indentation");
4262 }
4263 }
4264}
4265
4266template<class EventHandler>
4267size_t ParseEngine<EventHandler>::_handle_block_skip_leading_whitespace()
4268{
4269 const size_t mark = m_evt_handler->m_curr->pos.offset;
4270 const size_t firstpos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
4271 _c4dbgpf("block: mark={} firstpos={}", mark, firstpos);
4272 if(firstpos != npos)
4273 {
4274 _c4dbgp("block: non empty line");
4275 _line_progressed(firstpos);
4276 return mark;
4277 }
4278 else
4279 {
4280 _c4dbgp("block: rest of line is whitespace");
4281 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4282 return npos;
4283 }
4284}
4285
4286template<class EventHandler>
4287void ParseEngine<EventHandler>::_handle_block_check_leading_tabs(size_t start_mark, size_t end_mark)
4288{
4289 _c4dbgpf("block: start_mark={} end_mark={}", start_mark, end_mark);
4290 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, end_mark >= start_mark, m_evt_handler->m_curr->pos);
4291 if(end_mark != start_mark)
4292 {
4293 csubstr leading = _buf().range(start_mark, end_mark);
4294 _c4dbgpf("block: leading[{}-{}]={}", start_mark, end_mark, prs_(leading, true));
4295 size_t pos = leading.find('\t');
4296 if(pos != npos)
4297 {
4298 size_t fno = leading.first_not_of(" \t");
4299 if(fno == npos || pos < fno)
4300 _c4err("invalid tab character to the left");
4301 }
4302 (void)leading;
4303 }
4304}
4305
4306
4307//-----------------------------------------------------------------------------
4308
4309
4310template<class EventHandler>
4311void ParseEngine<EventHandler>::_handle_colon()
4312{
4313 size_t curr = m_evt_handler->m_curr->pos.line;
4314 if C4_UNLIKELY(m_prev_colon != npos && curr == m_prev_colon)
4315 {
4316 _c4dbgpf("colon: prevline={} currline={}", m_prev_colon, curr);
4317 _c4err("two colons on same line");
4318 }
4319 _c4dbgpf("colon: set prevline={}->{}", m_prev_colon, curr);
4320 m_prev_colon = curr;
4321}
4322
4323template<class EventHandler>
4324void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str)
4325{
4326 _c4dbgpf("store annotation[{}]: {}", dst->num_entries, prs_(str));
4327 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos); // NOLINT(bugprone-sizeof-expression)
4328 dst->annotations[dst->num_entries].str = str;
4329 dst->annotations[dst->num_entries].indentation = {};
4330 dst->annotations[dst->num_entries].line = {};
4331 dst->annotations[dst->num_entries].orig = {};
4332 ++dst->num_entries;
4333}
4334
4335template<class EventHandler>
4336void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line)
4337{
4338 _c4dbgpf("store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, maybe_null_str_(str), indentation, line);
4339 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos); // NOLINT(bugprone-sizeof-expression)
4340 if C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line)
4341 {
4342 _c4err("parse error");
4343 }
4344 dst->annotations[dst->num_entries].str = str;
4345 dst->annotations[dst->num_entries].indentation = indentation;
4346 dst->annotations[dst->num_entries].line = line;
4347 dst->annotations[dst->num_entries].orig = {};
4348 ++dst->num_entries;
4349}
4350
4351template<class EventHandler>
4352void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line, csubstr orig)
4353{
4354 _c4dbgpf("store annotation[{}]: '{}'->'{}' indentation={} line={}", dst->num_entries, orig, maybe_null_str_(str), indentation, line);
4355 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos); // NOLINT(bugprone-sizeof-expression)
4356 if C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line)
4357 {
4358 _c4err("parse error");
4359 }
4360 dst->annotations[dst->num_entries].str = str;
4361 dst->annotations[dst->num_entries].indentation = indentation;
4362 dst->annotations[dst->num_entries].line = line;
4363 dst->annotations[dst->num_entries].orig = orig;
4364 ++dst->num_entries;
4365}
4366
4367template<class EventHandler>
4368bool ParseEngine<EventHandler>::_annotations_require_key_container() const
4369{
4370 return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4371}
4372
4373template<class EventHandler>
4374bool ParseEngine<EventHandler>::_handle_annotations_before_unexpected_flow_token_rkey()
4375{
4376 if(!(m_pending_tags.num_entries | m_pending_anchors.num_entries))
4377 return false;
4378 _c4dbgpf("handle_annotations_before_unexpected_flow_comma_rkey, node={}", m_evt_handler->m_curr->node_id);
4379 if(m_pending_tags.num_entries)
4380 {
4381 _c4dbgpf("handle_annotations_before_unexpected_flow_comma_rkey, #tags={}", m_pending_tags.num_entries);
4382 if C4_LIKELY(m_pending_tags.num_entries == 1)
4383 {
4384 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4385 _clear_annotations(&m_pending_tags);
4386 }
4387 else
4388 {
4389 _c4err("too many tags");
4390 }
4391 }
4392 if(m_pending_anchors.num_entries)
4393 {
4394 _c4dbgpf("handle_annotations_before_unexpected_flow_comma, #anchors={}", m_pending_tags.num_entries);
4395 if C4_LIKELY(m_pending_anchors.num_entries == 1)
4396 {
4397 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4398 _clear_annotations(&m_pending_anchors);
4399 }
4400 else
4401 {
4402 _c4err("too many anchors");
4403 }
4404 }
4405 m_evt_handler->set_key_scalar_plain_empty();
4406 m_evt_handler->set_val_scalar_plain_empty();
4407 return true;
4408}
4409
4410template<class EventHandler>
4411void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4412{
4413 _c4dbgpf("annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4414 if(m_pending_tags.num_entries)
4415 {
4416 _c4dbgpf("annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4417 if C4_LIKELY(m_pending_tags.num_entries == 1)
4418 {
4419 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4420 _clear_annotations(&m_pending_tags);
4421 }
4422 else
4423 {
4424 _c4err("too many tags"); // LCOV_EXCL_LINE
4425 }
4426 }
4427 if(m_pending_anchors.num_entries)
4428 {
4429 _c4dbgpf("annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4430 if C4_LIKELY(m_pending_anchors.num_entries == 1)
4431 {
4432 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4433 _clear_annotations(&m_pending_anchors);
4434 }
4435 else
4436 {
4437 _c4err("too many anchors"); // LCOV_EXCL_LINE
4438 }
4439 }
4440}
4441
4442template<class EventHandler>
4443void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4444{
4445 _c4dbgpf("annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4446 if(m_pending_tags.num_entries)
4447 {
4448 _c4dbgpf("annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4449 if C4_LIKELY(m_pending_tags.num_entries == 1)
4450 {
4451 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4452 _clear_annotations(&m_pending_tags);
4453 }
4454 else
4455 {
4456 _c4err("too many tags");
4457 }
4458 }
4459 if(m_pending_anchors.num_entries)
4460 {
4461 _c4dbgpf("annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4462 if C4_LIKELY(m_pending_anchors.num_entries == 1)
4463 {
4464 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4465 _clear_annotations(&m_pending_anchors);
4466 }
4467 else
4468 {
4469 _c4err("too many anchors");
4470 }
4471 }
4472}
4473
4474template<class EventHandler>
4475void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(size_t current_line)
4476{
4477 _c4dbgpf("annotations_before_start_mapblck, current_line={}", current_line);
4478 if(m_pending_tags.num_entries == 2)
4479 {
4480 _c4dbgp("2 tags, setting entry 0");
4481 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4482 }
4483 else if(m_pending_tags.num_entries == 1)
4484 {
4485 _c4dbgpf("1 tag. line={}, curr={}", m_pending_tags.annotations[0].line, current_line);
4486 if(m_pending_tags.annotations[0].line < current_line)
4487 {
4488 _c4dbgp("...tag is for the map. setting it.");
4489 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4490 _clear_annotations(&m_pending_tags);
4491 }
4492 }
4493 //
4494 if(m_pending_anchors.num_entries == 2)
4495 {
4496 _c4dbgp("2 anchors, setting entry 0");
4497 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4498 }
4499 else if(m_pending_anchors.num_entries == 1)
4500 {
4501 _c4dbgpf("1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line, current_line);
4502 if(m_pending_anchors.annotations[0].line < current_line)
4503 {
4504 _c4dbgp("...anchor is for the map. setting it.");
4505 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4506 _clear_annotations(&m_pending_anchors);
4507 }
4508 }
4509}
4510
4511template<class EventHandler>
4512void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4513{
4514 _c4dbgp("annotations_before_start_mapblck_as_key");
4515 switch(m_pending_tags.num_entries)
4516 {
4517 case 1u:
4518 _c4dbgpf("annotations_after_start_mapblck_as_key: 1 tag={} line={} currline=", prs_(m_pending_tags.annotations[0].str), m_pending_tags.annotations[0].line, m_evt_handler->m_curr->pos.line);
4519 if(m_pending_tags.annotations[0].line != m_evt_handler->m_curr->pos.line)
4520 {
4521 _c4dbgp("annotations_after_start_mapblck_as_key: is map tag");
4522 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4523 _clear_annotations(&m_pending_tags);
4524 }
4525 break;
4526 case 2u:
4527 _c4dbgpf("annotations_after_start_mapblck_as_key: 2 tags: {} -> {}", prs_(m_pending_tags.annotations[0].str), prs_(m_pending_tags.annotations[1].str));
4528 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4529 break;
4530 }
4531 switch(m_pending_anchors.num_entries)
4532 {
4533 case 1u:
4534 _c4dbgpf("annotations_after_start_mapblck_as_key: 1 anchor={} line={} currline=", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[0].line, m_evt_handler->m_curr->pos.line);
4535 if(m_pending_anchors.annotations[0].line != m_evt_handler->m_curr->pos.line)
4536 {
4537 _c4dbgp("annotations_after_start_mapblck_as_key: is map anchor");
4538 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4539 _clear_annotations(&m_pending_anchors);
4540 }
4541 break;
4542 case 2u:
4543 _c4dbgpf("annotations_after_start_mapblck_as_key: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4544 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4545 break;
4546 }
4547}
4548
4549template<class EventHandler>
4550void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(size_t key_indentation, size_t key_line)
4551{
4552 _c4dbgp("annotations_after_start_mapblck");
4553 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2, m_evt_handler->m_curr->pos);
4554 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2, m_evt_handler->m_curr->pos);
4555 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4556 {
4557 key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4558 switch(m_pending_tags.num_entries)
4559 {
4560 case 1u:
4561 _c4dbgpf("annotations_after_start_mapblck: 1 tag: {}", prs_(m_pending_tags.annotations[0].str));
4562 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4563 _clear_annotations(&m_pending_tags);
4564 break;
4565 case 2u:
4566 _c4dbgpf("annotations_after_start_mapblck: 2 tags: {} -> {}", prs_(m_pending_tags.annotations[0].str), prs_(m_pending_tags.annotations[1].str));
4567 m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4568 _clear_annotations(&m_pending_tags);
4569 break;
4570 }
4571 switch(m_pending_anchors.num_entries)
4572 {
4573 case 1u:
4574 _c4dbgpf("annotations_after_start_mapblck: 1 anchors: {} -> {}", m_pending_anchors.annotations[0].str);
4575 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4576 _clear_annotations(&m_pending_anchors);
4577 break;
4578 case 2u:
4579 _c4dbgpf("annotations_after_start_mapblck: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4580 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4581 _clear_annotations(&m_pending_anchors);
4582 break;
4583 }
4584 }
4585 _set_indentation(key_indentation);
4586}
4587
4588template<class EventHandler>
4589size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(size_t val_indentation, size_t val_line)
4590{
4591 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries | m_pending_anchors.num_entries, m_evt_handler->m_curr->pos);
4592 // select the left-most annotation on the max line
4593 auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4594 for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4595 {
4596 auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4597 if(ann.line > curr->line)
4598 curr = &ann;
4599 else if(ann.indentation < curr->indentation)
4600 curr = &ann;
4601 }
4602 for(size_t j = 0; j < m_pending_tags.num_entries; ++j)
4603 {
4604 auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4605 if(ann.line > curr->line)
4606 curr = &ann;
4607 else if(ann.indentation < curr->indentation)
4608 curr = &ann;
4609 }
4610 return curr->line < val_line ? val_indentation : curr->indentation;
4611}
4612
4613template<class EventHandler>
4614void ParseEngine<EventHandler>::_handle_keyref(csubstr alias)
4615{
4616 if C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries))
4617 m_evt_handler->set_key_ref(alias);
4618 else
4619 _c4err("aliases cannot have anchors or tags");
4620}
4621
4622template<class EventHandler>
4623void ParseEngine<EventHandler>::_handle_valref(csubstr alias)
4624{
4625 if C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries))
4626 m_evt_handler->set_val_ref(alias);
4627 else
4628 _c4err("aliases cannot have anchors or tags");
4629}
4630
4631template<class EventHandler>
4632csubstr ParseEngine<EventHandler>::_resolve_tag(csubstr tag)
4633{
4634 _c4dbgpf("resolving tag: {} curr_doc={}", prs_(tag), m_evt_handler->m_curr_doc);
4635 _c4assert(tag.is_sub(_buf()));
4636 TagCache::LookupResult ret = m_evt_handler->tag_cache().find(tag, m_evt_handler->m_curr_doc);
4637 if(ret)
4638 {
4639 _c4dbgpf("resolving tag: found in cache[{}]: {}", ret.pos, prs_(ret.resolved));
4640 return ret.resolved;
4641 }
4642 _c4dbgpf("resolving tag: not in cache: {} curr_doc={}", prs_(tag), m_evt_handler->m_curr_doc);
4643 size_t bufsz = 0;
4644 substr buf = m_evt_handler->arena_rem();
4645 TagDirectives const& C4_RESTRICT tds = m_evt_handler->tag_directives();
4646 csubstr ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4647 m_evt_handler->m_curr->pos,
4648 m_evt_handler->m_stack.m_callbacks);
4649 _c4dbgpf("resolving tag: bufsz={} ttag.len={} !!ttag.str={}", bufsz, ttag.len, !!ttag.str);
4650 _c4assert((bufsz > buf.len) == (!ttag.str));
4651 _c4assert(!!bufsz == (ttag.len == bufsz));
4652 // try again if the arena size was not enough
4653 if(!ttag.str)
4654 {
4655 _c4dbgpf("tag requires arena, but it was small. arena.len={} arena.slack={} tag.required={}", m_evt_handler->arena_rem().len, m_evt_handler->arena().len, ttag.len);
4656 _c4assert(ttag.len == bufsz);
4657 buf = _alloc_arena(bufsz, &tag);
4658 if(buf.str) // the alloc may fail eg with the ints handler
4659 {
4660 ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4661 m_evt_handler->m_curr->pos,
4662 m_evt_handler->m_stack.m_callbacks);
4663 }
4664 _c4assert(ttag.len == bufsz);
4665 _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4666 }
4667 else if(bufsz) // if we succeeded writing into the arena, grow it as needed
4668 {
4669 _c4dbgp("tag required arena. update size");
4670 _c4assert(ttag.len == bufsz);
4671 _c4assert(ttag.is_sub(buf));
4672 (void)_alloc_arena(bufsz);
4673 }
4674 C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4127) // conditional expression is constant
4675 if C4_IF_CONSTEXPR (EventHandler::requires_strings_on_buffers) // NOLINT
4676 {
4677 _c4dbgpf("handler requires tags in buffers. !!ttag.str={} in_arena={} in_src={}", !!ttag.str, ttag.is_sub(m_evt_handler->arena()), ttag.is_sub(_buf()));
4678 // is the resolved tag not in any of those buffers?
4679 if(ttag.str && !ttag.is_sub(m_evt_handler->arena()) && !ttag.is_sub(_buf()))
4680 {
4681 _c4dbgpf("copying resolved tag to arena: slack={} required={}", m_evt_handler->arena_rem().len, ttag.len);
4682 buf = _alloc_arena(ttag.len, &tag);
4683 if(buf.str) // the alloc may fail eg with the ints handler
4684 memcpy(buf.str, ttag.str, ttag.len);
4685 ttag.str = buf.str; // keep the current len!
4686 _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4687 }
4688 }
4689 C4_SUPPRESS_WARNING_MSVC_POP
4690 _c4dbgpf("resolved tag: {} --> [{}]~~~{}~~~", prs_(tag), ttag.len, maybe_null_str_(ttag));
4691 _c4assert(ttag.len > 0);
4692 // cache the hard-earned result!
4693 m_evt_handler->tag_cache().add(tag, ttag, m_evt_handler->m_curr_doc, ret.pos);
4694 return ttag;
4695}
4696
4697template<class EventHandler>
4698bool ParseEngine<EventHandler>::_validate_directive_yaml(csubstr *C4_RESTRICT directive, csubstr *C4_RESTRICT version) const
4699{
4700 _c4assert(directive->begins_with("%YAML"));
4701 size_t version_start = directive->first_not_of(" \t", 5);
4702 if(version_start != npos)
4703 {
4704 csubstr digits = "0123456789";
4705 size_t major_end = directive->first_not_of(digits, version_start);
4706 if(major_end != npos && directive->str[major_end] == '.') // single dot
4707 {
4708 size_t minor_end = directive->first_not_of(digits, major_end + 1);
4709 if(minor_end == npos)
4710 minor_end = directive->len;
4711 _set_first_strict(*directive, minor_end);
4712 *version = directive->range(version_start, minor_end);
4713 _c4dbgpf("%YAML: version={} full={}", *version, prs_(*directive, true));
4714 return true;
4715 }
4716 }
4717 return false;
4718}
4719
4720template<class EventHandler>
4721bool ParseEngine<EventHandler>::_validate_directive_tag(csubstr *C4_RESTRICT directive, csubstr *C4_RESTRICT handle, csubstr *C4_RESTRICT prefix) const
4722{
4723 _c4assert(directive->begins_with("%TAG"));
4724 csubstr whitespace = " \t";
4725 size_t handle_start = directive->first_not_of(whitespace, 4);
4726 if(handle_start != npos && directive->str[handle_start] == '!')
4727 {
4728 size_t handle_end = directive->first_of(whitespace, handle_start);
4729 if(handle_end != npos)
4730 {
4731 size_t prefix_start = directive->first_not_of(whitespace, handle_end);
4732 if(prefix_start != npos)
4733 {
4734 size_t prefix_end = directive->first_of(whitespace, prefix_start);
4735 if(prefix_end == npos)
4736 prefix_end = directive->len;
4737 _set_first_strict(*directive, prefix_end);
4738 *handle = directive->range(handle_start, handle_end);
4739 *prefix = directive->range(prefix_start, prefix_end);
4740 _c4dbgpf("%TAG: handle={} prefix={} full={}", *handle, *prefix, prs_(*directive, true));
4741 if(is_valid_tag_handle(*handle))
4742 return true;
4743 }
4744 }
4745 }
4746 return false;
4747}
4748
4749template<class EventHandler>
4750void ParseEngine<EventHandler>::_handle_directive(csubstr directive)
4751{
4752 _c4dbgpf("handle_directive: rem={}", prs_(directive, true));
4753 _c4assert(m_evt_handler->m_curr->line_contents.rem.begins_with('%'));
4754 _c4assert(directive.str == m_evt_handler->m_curr->line_contents.rem.str);
4755 const char *err = nullptr;
4756 csubstr rem;
4757 size_t pos;
4758 auto isdirective = [](csubstr str, csubstr dir) {
4759 if(str.begins_with(dir))
4760 {
4761 csubstr rest = str.sub(dir.len);
4762 return (!rest.len || rest.str[0] == ' ' || rest.str[0] == '\t');
4763 }
4764 return false;
4765 };
4766 if(isdirective(directive, "%TAG"))
4767 {
4768 csubstr handle;
4769 csubstr prefix;
4770 if C4_UNLIKELY(!_validate_directive_tag(&directive, &handle, &prefix))
4771 {
4772 err = "invalid %TAG directive";
4773 goto directive_error; // NOLINT
4774 }
4775 m_evt_handler->add_directive_tag(handle, prefix);
4776 }
4777 else if(isdirective(directive, "%YAML"))
4778 {
4780 if C4_UNLIKELY(!_validate_directive_yaml(&directive, &version))
4781 {
4782 err = "invalid %YAML directive";
4783 goto directive_error; // NOLINT
4784 }
4785 if C4_UNLIKELY(m_has_directives_yaml)
4786 {
4787 err = "multiple %YAML directives";
4788 goto directive_error; // NOLINT
4789 }
4790 m_has_directives_yaml = true;
4791 m_evt_handler->add_directive_yaml(version);
4792 }
4793 m_has_directives = true;
4794 rem = m_evt_handler->m_curr->line_contents.rem;
4795 pos = rem.first_not_of(" \t", directive.len);
4796 pos = pos != npos ? pos : rem.len;
4797 _line_progressed(pos);
4798 rem = rem.sub(pos);
4799 _c4dbgpf("handle_directive: rest={}", prs_(rem));
4800 if C4_UNLIKELY(rem.len && !rem.begins_with('#'))
4801 {
4802 err = "invalid tokens after directive";
4803 goto directive_error; // NOLINT
4804 }
4805directive_error:
4806 if C4_UNLIKELY(err != nullptr)
4807 _c4err(err);
4808}
4809
4810template<class EventHandler>
4811bool ParseEngine<EventHandler>::_handle_bom()
4812{
4813 const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4814 if(rem.len)
4815 {
4816 const csubstr rest = rem.sub(1);
4817 // https://yaml.org/spec/1.2.2/#52-character-encodings
4818 #define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f') // is the character ASCII?
4819 if(rem.begins_with(csubstr{"\x00\x00\xfe\xff", 4}) || (rem.begins_with(csubstr{"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
4820 {
4821 _c4dbgp("byte order mark: UTF32BE");
4822 _handle_bom(UTF32BE);
4823 _line_progressed(4);
4824 m_bom_len = 4;
4825 return true;
4826 }
4827 else if(rem.begins_with(csubstr{"\xff\xfe\x00\x00", 4}) || (rest.begins_with(csubstr{"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
4828 {
4829 _c4dbgp("byte order mark: UTF32LE");
4830 _handle_bom(UTF32LE);
4831 _line_progressed(4);
4832 m_bom_len = 4;
4833 return true;
4834 }
4835 else if(rem.begins_with("\xfe\xff") || (rem.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
4836 {
4837 _c4dbgp("byte order mark: UTF16BE");
4838 _handle_bom(UTF16BE);
4839 _line_progressed(2);
4840 m_bom_len = 2;
4841 return true;
4842 }
4843 else if(rem.begins_with("\xff\xfe") || (rest.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
4844 {
4845 _c4dbgp("byte order mark: UTF16LE");
4846 _handle_bom(UTF16LE);
4847 _line_progressed(2);
4848 m_bom_len = 2;
4849 return true;
4850 }
4851 else if(rem.begins_with("\xef\xbb\xbf"))
4852 {
4853 _c4dbgp("byte order mark: UTF8");
4854 _handle_bom(UTF8);
4855 _line_progressed(3);
4856 m_bom_len = 3;
4857 return true;
4858 }
4859 #undef _rymlisascii
4860 }
4861 return false;
4862}
4863
4864template<class EventHandler>
4865void ParseEngine<EventHandler>::_handle_bom(Encoding_e enc)
4866{
4867 if(m_encoding == NOBOM)
4868 {
4869 if(enc == UTF8 || /*beginning of file*/(m_evt_handler->m_curr->line_contents.rem.str == _buf().str))
4870 m_encoding = enc;
4871 else
4872 _c4err("non-UTF8 byte order mark can appear only at the beginning of the file");
4873 }
4874 else if(enc != m_encoding)
4875 {
4876 _c4err("byte order mark can only be set once");
4877 }
4878}
4879
4880
4881//-----------------------------------------------------------------------------
4882
4883template<class EventHandler>
4884void ParseEngine<EventHandler>::_handle_seq_json()
4885{
4886seqjson_start:
4887 _c4dbgpf("handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4888
4889 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
4890 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ), m_evt_handler->m_curr->pos);
4891 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
4892 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT), m_evt_handler->m_curr->pos);
4893 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT), m_evt_handler->m_curr->pos);
4894
4895 _handle_flow_skip_whitespace();
4896 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4897 if(!rem.len)
4898 goto seqjson_again;
4899
4900 if(has_any(RVAL))
4901 {
4902 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
4903 const char first = rem.str[0];
4904 _c4dbgpf("mapjson[RVAL]: '{}'", first);
4905 switch(first)
4906 {
4907 case '"':
4908 {
4909 _c4dbgp("seqjson[RVAL]: scanning double-quoted scalar");
4910 ScannedScalar sc = _scan_scalar_dquot();
4911 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4912 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4913 addrem_flags(RNXT, RVAL);
4914 break;
4915 }
4916 case '[':
4917 {
4918 _c4dbgp("seqjson[RVAL]: start child seqjson");
4919 addrem_flags(RNXT, RVAL);
4920 m_evt_handler->begin_seq_val_flow();
4921 addrem_flags(RVAL, RNXT);
4922 _line_progressed(1);
4923 break;
4924 }
4925 case '{':
4926 {
4927 _c4dbgp("seqjson[RVAL]: start child mapjson");
4928 addrem_flags(RNXT, RVAL);
4929 m_evt_handler->begin_map_val_flow();
4930 addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
4931 _line_progressed(1);
4932 goto seqjson_finish;
4933 }
4934 case ']': // this happens on a trailing comma like ", ]"
4935 {
4936 _c4dbgp("seqjson[RVAL]: end!");
4937 rem_flags(RSEQ);
4938 _end_seq_flow();
4939 _line_progressed(1);
4940 if(!has_all(RSEQ|RFLOW))
4941 goto seqjson_finish;
4942 break;
4943 }
4944 default:
4945 {
4946 ScannedScalar sc;
4947 if(_scan_scalar_seq_json(&sc))
4948 {
4949 _c4dbgp("seqjson[RVAL]: it's a plain scalar.");
4950 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4951 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4952 addrem_flags(RNXT, RVAL);
4953 }
4954 else
4955 {
4956 _c4err("parse error");
4957 }
4958 }
4959 }
4960 }
4961 else // RNXT
4962 {
4963 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
4964 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
4965 const char first = rem.str[0];
4966 _c4dbgpf("mapjson[RNXT]: '{}'", first);
4967 switch(first)
4968 {
4969 case ',':
4970 {
4971 _c4dbgp("seqjson[RNXT]: expect next val");
4972 addrem_flags(RVAL, RNXT);
4973 m_evt_handler->add_sibling();
4974 _line_progressed(1);
4975 break;
4976 }
4977 case ']':
4978 {
4979 _c4dbgp("seqjson[RNXT]: end!");
4980 _end_seq_flow();
4981 _line_progressed(1);
4982 goto seqjson_finish;
4983 }
4984 default:
4985 _c4err("parse error");
4986 }
4987 }
4988
4989 seqjson_again:
4990 _c4dbgt("seqjson: go again", 0);
4991 if(_finished_line())
4992 {
4993 if C4_LIKELY(!_finished_file())
4994 {
4995 _line_ended();
4996 _scan_line();
4998 }
4999 else
5000 {
5001 _c4err("missing terminating ]");
5002 }
5003 }
5004 goto seqjson_start;
5005
5006 seqjson_finish:
5007 _c4dbgp("seqjson: finish");
5008}
5009
5010
5011//-----------------------------------------------------------------------------
5012
5013template<class EventHandler>
5014void ParseEngine<EventHandler>::_handle_map_json()
5015{
5016mapjson_start:
5017 _c4dbgpf("handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5018
5019 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
5020 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
5021 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5022 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT), m_evt_handler->m_curr->pos);
5023 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT)), m_evt_handler->m_curr->pos);
5024
5025 _handle_flow_skip_whitespace();
5026 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5027 if(!rem.len)
5028 goto mapjson_again;
5029
5030 if(has_any(RKEY))
5031 {
5032 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5033 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5034 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5035 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5036 const char first = rem.str[0];
5037 _c4dbgpf("mapjson[RKEY]: '{}'", first);
5038 switch(first)
5039 {
5040 case '"':
5041 {
5042 _c4dbgp("mapjson[RKEY]: scanning double-quoted scalar");
5043 ScannedScalar sc = _scan_scalar_dquot();
5044 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5045 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5046 addrem_flags(RKCL, RKEY);
5047 break;
5048 }
5049 case '}': // this happens on a trailing comma like ", }"
5050 {
5051 _c4dbgp("mapjson[RKEY]: end!");
5052 _end_map_flow();
5053 _line_progressed(1);
5054 goto mapjson_finish;
5055 }
5056 default:
5057 _c4err("parse error");
5058 }
5059 }
5060 else if(has_any(RVAL))
5061 {
5062 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5063 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5064 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5065 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5066 const char first = rem.str[0];
5067 _c4dbgpf("mapjson[RVAL]: '{}'", first);
5068 switch(first)
5069 {
5070 case '"':
5071 {
5072 _c4dbgp("mapjson[RVAL]: scanning double-quoted scalar");
5073 ScannedScalar sc = _scan_scalar_dquot();
5074 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5075 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5076 addrem_flags(RNXT, RVAL);
5077 break;
5078 }
5079 case '[':
5080 {
5081 _c4dbgp("mapjson[RVAL]: start val seqjson");
5082 addrem_flags(RNXT, RVAL);
5083 m_evt_handler->begin_seq_val_flow();
5084 _set_indentation(m_evt_handler->m_parent->indref);
5085 addrem_flags(RSEQ|RVAL, RMAP|RNXT);
5086 _line_progressed(1);
5087 goto mapjson_finish;
5088 }
5089 case '{':
5090 {
5091 _c4dbgp("mapjson[RVAL]: start val mapjson");
5092 addrem_flags(RNXT, RVAL);
5093 m_evt_handler->begin_map_val_flow();
5094 _set_indentation(m_evt_handler->m_parent->indref);
5095 addrem_flags(RKEY, RNXT);
5096 _line_progressed(1);
5097 // keep going in this function
5098 break;
5099 }
5100 default:
5101 {
5102 ScannedScalar sc;
5103 if(_scan_scalar_map_json(&sc))
5104 {
5105 _c4dbgp("mapjson[RVAL]: plain scalar.");
5106 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5107 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5108 addrem_flags(RNXT, RVAL);
5109 }
5110 else
5111 {
5112 _c4err("parse error");
5113 }
5114 break;
5115 }
5116 }
5117 }
5118 else if(has_any(RKCL)) // read the key colon
5119 {
5120 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5121 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5122 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5123 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5124 const char first = rem.str[0];
5125 _c4dbgpf("mapjson[RKCL]: '{}'", first);
5126 if(first == ':')
5127 {
5128 _c4dbgp("mapjson[RKCL]: found the colon");
5129 addrem_flags(RVAL, RKCL);
5130 _line_progressed(1);
5131 }
5132 else
5133 {
5134 _c4err("parse error");
5135 }
5136 }
5137 else if(has_any(RNXT))
5138 {
5139 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5140 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5141 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5142 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5143 _c4dbgpf("mapjson[RNXT]: '{}'", rem.str[0]);
5144 if(rem.begins_with(','))
5145 {
5146 _c4dbgp("mapjson[RNXT]: expect next keyval");
5147 m_evt_handler->add_sibling();
5148 addrem_flags(RKEY, RNXT);
5149 _line_progressed(1);
5150 }
5151 else if(rem.begins_with('}'))
5152 {
5153 _c4dbgp("mapjson[RNXT]: end!");
5154 _end_map_flow();
5155 _line_progressed(1);
5156 goto mapjson_finish;
5157 }
5158 else
5159 {
5160 _c4err("parse error"); // LCOV_EXCL_LINE
5161 }
5162 }
5163
5164 mapjson_again:
5165 _c4dbgt("mapjson: go again", 0);
5166 if(_finished_line())
5167 {
5168 if C4_LIKELY(!_finished_file())
5169 {
5170 _line_ended();
5171 _scan_line();
5173 }
5174 else
5175 {
5176 _c4err("missing terminating }");
5177 }
5178 }
5179 goto mapjson_start;
5180
5181 mapjson_finish:
5182 _c4dbgp("mapjson: finish");
5183}
5184
5185
5186//-----------------------------------------------------------------------------
5187
5188template<class EventHandler>
5189void ParseEngine<EventHandler>::_handle_seq_imap()
5190{
5191seqimap_start:
5192 _c4dbgpf("handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5193
5194 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQIMAP), m_evt_handler->m_curr->pos);
5195 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5196 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT|QMRK|RKCL), m_evt_handler->m_curr->pos);
5197 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, 1 == has_all(RVAL) + has_all(RNXT) + has_all(QMRK) + has_all(RKCL), m_evt_handler->m_curr->pos);
5198 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3, m_evt_handler->m_curr->pos);
5199
5200 _handle_flow_skip_whitespace();
5201 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5202 if(!rem.len)
5203 goto seqimap_again;
5204
5205 if(has_any(RVAL))
5206 {
5207 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL), m_evt_handler->m_curr->pos);
5208 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5209 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5210 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5211 const char first = rem.str[0];
5212 _c4dbgpf("seqimap[RVAL]: '{}'", _c4prc(first));
5213 ScannedScalar sc;
5214 if(first == '\'')
5215 {
5216 _c4dbgp("seqimap[RVAL]: scanning single-quoted scalar");
5217 sc = _scan_scalar_squot();
5218 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5219 _handle_annotations_before_blck_val_scalar();
5220 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5221 _end_map_flow();
5222 goto seqimap_finish;
5223 }
5224 else if(first == '"')
5225 {
5226 _c4dbgp("seqimap[RVAL]: scanning double-quoted scalar");
5227 sc = _scan_scalar_dquot();
5228 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5229 _handle_annotations_before_blck_val_scalar();
5230 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5231 _end_map_flow();
5232 goto seqimap_finish;
5233 }
5234 // block scalars (ie | and >) cannot appear in flow containers
5235 else if(_scan_scalar_plain_map_flow(&sc))
5236 {
5237 _c4dbgp("seqimap[RVAL]: it's a scalar.");
5238 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5239 _handle_annotations_before_blck_val_scalar();
5240 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5241 _end_map_flow();
5242 goto seqimap_finish;
5243 }
5244 else if(first == '[')
5245 {
5246 _c4dbgp("seqimap[RVAL]: start child seqflow");
5247 addrem_flags(RNXT, RVAL);
5248 _handle_annotations_before_blck_val_scalar();
5249 m_evt_handler->begin_seq_val_flow();
5250 addrem_flags(RVAL, RNXT|RSEQIMAP);
5251 _set_indentation(m_evt_handler->m_parent->indref);
5252 _line_progressed(1);
5253 goto seqimap_finish;
5254 }
5255 else if(first == '{')
5256 {
5257 _c4dbgp("seqimap[RVAL]: start child mapflow");
5258 addrem_flags(RNXT, RVAL);
5259 _handle_annotations_before_blck_val_scalar();
5260 m_evt_handler->begin_map_val_flow();
5261 addrem_flags(RMAP|RKEY, RSEQ|RVAL|RSEQIMAP|RNXT);
5262 _set_indentation(m_evt_handler->m_parent->indref);
5263 _line_progressed(1);
5264 goto seqimap_finish;
5265 }
5266 else if(first == ',' || first == ']')
5267 {
5268 _c4dbgp("seqimap[RVAL]: finish without val.");
5269 _handle_annotations_before_blck_val_scalar();
5270 m_evt_handler->set_val_scalar_plain_empty();
5271 _end_map_flow();
5272 goto seqimap_finish;
5273 }
5274 else if(first == '*')
5275 {
5276 csubstr ref = _scan_ref_seq();
5277 _c4dbgpf("seqimap[RVAL]: ref! {}", prs_(ref));
5278 _handle_valref(ref);
5279 addrem_flags(RNXT, RVAL);
5280 }
5281 else if(first == '&')
5282 {
5283 csubstr anchor = _scan_anchor();
5284 _c4dbgpf("seqimap[RVAL]: anchor! {}", prs_(anchor));
5285 _add_annotation(&m_pending_anchors, anchor);
5286 }
5287 else if(first == '!')
5288 {
5289 csubstr tag = _scan_tag();
5290 _c4dbgpf("seqimap[RVAL]: tag! {}", prs_(tag));
5291 _add_annotation(&m_pending_tags, tag);
5292 }
5293 else
5294 {
5295 _c4err("parse error"); // LCOV_EXCL_LINE
5296 }
5297 }
5298 else if(has_any(RNXT))
5299 {
5300 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
5301 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5302 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5303 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5304 const char first = rem.str[0];
5305 _c4dbgpf("seqimap[RNXT]: '{}'", _c4prc(first));
5306 if(first == ',' || first == ']')
5307 {
5308 // we may get here because a map or a seq started and we
5309 // return later
5310 _c4dbgp("seqimap: done");
5311 _end_map_flow();
5312 goto seqimap_finish;
5313 }
5314 else
5315 {
5316 _c4err("parse error"); // LCOV_EXCL_LINE
5317 }
5318 }
5319 else if(has_any(QMRK))
5320 {
5321 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(QMRK), m_evt_handler->m_curr->pos);
5322 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5323 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5324 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5325 const char first = rem.str[0];
5326 _c4dbgpf("seqimap[QMRK]: '{}'", _c4prc(first));
5327 ScannedScalar sc;
5328 if(first == '\'')
5329 {
5330 _c4dbgp("seqimap[QMRK]: scanning single-quoted scalar");
5331 sc = _scan_scalar_squot();
5332 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5333 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5334 addrem_flags(RKCL, QMRK);
5335 goto seqimap_again;
5336 }
5337 else if(first == '"')
5338 {
5339 _c4dbgp("seqimap[QMRK]: scanning double-quoted scalar");
5340 sc = _scan_scalar_dquot();
5341 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5342 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5343 addrem_flags(RKCL, QMRK);
5344 goto seqimap_again;
5345 }
5346 // block scalars (ie | and >) cannot appear in flow containers
5347 else if(_scan_scalar_plain_map_flow(&sc))
5348 {
5349 _c4dbgp("seqimap[QMRK]: it's a scalar.");
5350 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5351 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5352 addrem_flags(RKCL, QMRK);
5353 goto seqimap_again;
5354 }
5355 else if(first == '[')
5356 {
5357 _c4dbgp("seqimap[QMRK]: start child seqflow");
5358 addrem_flags(RKCL, QMRK);
5359 m_evt_handler->begin_seq_key_flow();
5360 addrem_flags(RSEQ|RVAL, RKCL|RSEQIMAP);
5361 _set_indentation(m_evt_handler->m_parent->indref);
5362 _line_progressed(1);
5363 goto seqimap_finish;
5364 }
5365 else if(first == '{')
5366 {
5367 _c4dbgp("seqimap[QMRK]: start child mapflow");
5368 addrem_flags(RKCL, QMRK);
5369 m_evt_handler->begin_map_key_flow();
5370 addrem_flags(RMAP|RKEY, RSEQ|RKCL|RSEQIMAP);
5371 _set_indentation(m_evt_handler->m_parent->indref);
5372 _line_progressed(1);
5373 goto seqimap_finish;
5374 }
5375 else if(first == ',' || first == ']')
5376 {
5377 _c4dbgp("seqimap[QMRK]: finish without key.");
5378 m_evt_handler->set_key_scalar_plain_empty();
5379 m_evt_handler->set_val_scalar_plain_empty();
5380 _end_map_flow();
5381 goto seqimap_finish;
5382 }
5383 else if(first == '&')
5384 {
5385 csubstr anchor = _scan_anchor();
5386 _c4dbgp("seqimap[QMRK]: anchor!");
5387 m_evt_handler->set_key_anchor(anchor);
5388 }
5389 else if(first == '*')
5390 {
5391 csubstr ref = _scan_ref_seq();
5392 _c4dbgp("seqimap[QMRK]: ref!");
5393 _handle_keyref(ref);
5394 addrem_flags(RKCL, QMRK);
5395 }
5396 else
5397 {
5398 _c4err("parse error"); // LCOV_EXCL_LINE
5399 }
5400 }
5401 else if(has_any(RKCL))
5402 {
5403 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5404 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5405 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5406 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RKCL), m_evt_handler->m_curr->pos);
5407 const char first = rem.str[0];
5408 _c4dbgpf("seqimap[RKCL]: '{}'", _c4prc(first));
5409 if(first == ':')
5410 {
5411 _c4dbgp("seqimap[RKCL]: found ':'");
5412 addrem_flags(RVAL, RKCL);
5413 _line_progressed(1);
5414 goto seqimap_again;
5415 }
5416 else if(first == ',' || first == ']')
5417 {
5418 _c4dbgp("seqimap[RKCL]: found ','. finish without val");
5419 m_evt_handler->set_val_scalar_plain_empty();
5420 _end_map_flow();
5421 goto seqimap_finish;
5422 }
5423 else
5424 {
5425 _c4err("parse error"); // LCOV_EXCL_LINE
5426 }
5427 }
5428
5429 seqimap_again:
5430 _c4dbgt("seqimap: go again", 0);
5431 if(_finished_line())
5432 {
5433 if C4_LIKELY(!_finished_file())
5434 {
5435 _line_ended();
5436 _scan_line();
5438 }
5439 else
5440 {
5441 _c4err("parse error");
5442 }
5443 }
5444 goto seqimap_start;
5445
5446 seqimap_finish:
5447 _c4dbgp("seqimap: finish");
5448}
5449
5450
5451//-----------------------------------------------------------------------------
5452
5453template<class EventHandler>
5454void ParseEngine<EventHandler>::_handle_seq_flow()
5455{
5456seqflow_start:
5457 _c4dbgpf("handle_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5458
5459 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5460 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ), m_evt_handler->m_curr->pos);
5461 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
5462 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT), m_evt_handler->m_curr->pos);
5463 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT), m_evt_handler->m_curr->pos);
5464 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
5465
5466 if(m_evt_handler->m_curr->at_line_beginning())
5467 {
5468 _handle_flow_line_beginning();
5469 }
5470
5471 _handle_flow_skip_whitespace();
5472 if(!m_evt_handler->m_curr->line_contents.rem.len)
5473 goto seqflow_again;
5474
5475 if(has_any(RVAL))
5476 {
5477 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5478 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5479 ScannedScalar sc;
5480 if(first == '\'')
5481 {
5482 _c4dbgp("seqflow[RVAL]: scanning single-quoted scalar");
5483 sc = _scan_scalar_squot();
5484 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5485 _handle_annotations_before_blck_val_scalar();
5486 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5487 addrem_flags(RNXT, RVAL);
5488 _mark_seqflow_val_end();
5489 }
5490 else if(first == '"')
5491 {
5492 _c4dbgp("seqflow[RVAL]: scanning double-quoted scalar");
5493 sc = _scan_scalar_dquot();
5494 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5495 _handle_annotations_before_blck_val_scalar();
5496 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5497 addrem_flags(RNXT, RVAL);
5498 _mark_seqflow_val_end();
5499 }
5500 // block scalars (ie | and >) cannot appear in flow containers
5501 else if(_scan_scalar_plain_seq_flow(&sc))
5502 {
5503 _c4dbgp("seqflow[RVAL]: it's a scalar.");
5504 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5505 _handle_annotations_before_blck_val_scalar();
5506 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5507 addrem_flags(RNXT, RVAL);
5508 _mark_seqflow_val_end();
5509 }
5510 else if(first == '[')
5511 {
5512 _c4dbgp("seqflow[RVAL]: start child seqflow");
5513 addrem_flags(RNXT, RVAL);
5514 _handle_annotations_before_blck_val_scalar();
5515 m_evt_handler->begin_seq_val_flow();
5516 _set_indentation(m_evt_handler->m_parent->indref);
5517 addrem_flags(RVAL, RNXT);
5518 _line_progressed(1);
5519 }
5520 else if(first == '{')
5521 {
5522 _c4dbgp("seqflow[RVAL]: start child mapflow");
5523 addrem_flags(RNXT, RVAL);
5524 _handle_annotations_before_blck_val_scalar();
5525 m_evt_handler->begin_map_val_flow();
5526 _set_indentation(m_evt_handler->m_parent->indref);
5527 addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
5528 _line_progressed(1);
5529 goto seqflow_finish;
5530 }
5531 else if(first == ']') // this happens on cases such as [] or [.., ]
5532 {
5533 _c4dbgp("seqflow[RVAL]: end!");
5534 if(m_pending_anchors.num_entries | m_pending_tags.num_entries)
5535 {
5536 _c4dbgp("seqflow[RVAL]: add pending annotations");
5537 _handle_annotations_before_blck_val_scalar();
5538 m_evt_handler->set_val_scalar_plain_empty();
5539 }
5540 _line_progressed(1);
5541 _end_seq_flow();
5542 goto seqflow_finish;
5543 }
5544 else if(first == '*')
5545 {
5546 csubstr ref = _scan_ref_seq();
5547 _c4dbgpf("seqflow[RVAL]: ref! {}", prs_(ref));
5548 _handle_valref(ref);
5549 addrem_flags(RNXT, RVAL);
5550 }
5551 else if(first == '&')
5552 {
5553 csubstr anchor = _scan_anchor();
5554 _c4dbgpf("seqflow[RVAL]: anchor! {}", prs_(anchor));
5555 _add_annotation(&m_pending_anchors, anchor);
5556 }
5557 else if(first == '!')
5558 {
5559 csubstr tag = _scan_tag();
5560 _c4dbgpf("seqflow[RVAL]: tag! {}", prs_(tag));
5561 _add_annotation(&m_pending_tags, tag);
5562 }
5563 else if(first == ':')
5564 {
5565 _c4dbgpf("seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5566 addrem_flags(RNXT, RVAL);
5567 m_evt_handler->begin_map_val_flow();
5568 _set_indentation(m_evt_handler->m_parent->indref);
5569 _handle_annotations_before_blck_key_scalar();
5570 m_evt_handler->set_key_scalar_plain_empty();
5571 addrem_flags(RSEQIMAP|RVAL, RSEQ|RNXT);
5572 _line_progressed(1);
5573 goto seqflow_finish;
5574 }
5575 else if(first == '?')
5576 {
5577 _c4dbgp("seqflow[RVAL]: start child mapflow, explicit key");
5578 addrem_flags(RNXT, RVAL);
5579 m_evt_handler->begin_map_val_flow();
5580 _set_indentation(m_evt_handler->m_parent->indref);
5581 addrem_flags(RSEQIMAP|QMRK, RSEQ|RNXT);
5582 _line_progressed(1);
5583 _maybe_skip_whitespace_tokens();
5584 goto seqflow_finish;
5585 }
5586 else if(first == ',')
5587 {
5588 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
5589 {
5590 _c4dbgp("seqflow[RVAL]: add pending annotations");
5591 _handle_annotations_before_blck_val_scalar();
5592 m_evt_handler->set_val_scalar_plain_empty();
5593 addrem_flags(RNXT, RVAL);
5594 _mark_seqflow_val_end();
5595 }
5596 else
5597 {
5598 _c4err("parse error");
5599 }
5600 }
5601 else
5602 {
5603 _c4err("parse error");
5604 }
5605 }
5606 else // RNXT
5607 {
5608 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
5609 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5610 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5611 if(first == ',')
5612 {
5613 _c4dbgp("seqflow[RNXT]: expect next val");
5614 addrem_flags(RVAL, RNXT);
5615 m_evt_handler->add_sibling();
5616 _line_progressed(1);
5617 if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
5618 {
5619 _c4err("parse error: invalid comment after comma");
5620 }
5621 _mark_seqflow_val_end();
5622 }
5623 else if(first == ']')
5624 {
5625 _c4dbgp("seqflow[RNXT]: end!");
5626 _line_progressed(1);
5627 _end_seq_flow();
5628 goto seqflow_finish;
5629 }
5630 else if(first == ':')
5631 {
5632 _c4dbgpf("seqflow[RNXT]: line@valend={} line@now={}", m_prev_val_end, m_evt_handler->m_curr->pos.line);
5633 if(m_prev_val_end != NONE && m_evt_handler->m_curr->pos.line == m_prev_val_end)
5634 {
5635 _c4dbgpf("seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5636 m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5637 _set_indentation(m_evt_handler->m_parent->indref);
5638 _line_progressed(1);
5639 addrem_flags(RSEQIMAP|RVAL, RNXT);
5640 goto seqflow_finish;
5641 }
5642 else
5643 {
5644 _c4err("parse error");
5645 }
5646 }
5647 else
5648 {
5649 _c4err("parse error");
5650 }
5651 }
5652
5653 seqflow_again:
5654 _c4dbgt("seqflow: go again", 0);
5655 if(_finished_line())
5656 {
5657 if C4_LIKELY(!_finished_file())
5658 {
5659 _line_ended();
5660 _scan_line();
5662 }
5663 else
5664 {
5665 _c4err("missing terminating ]");
5666 }
5667 }
5668 goto seqflow_start;
5669
5670 seqflow_finish:
5671 _c4dbgp("seqflow: finish");
5672}
5673
5674
5675//-----------------------------------------------------------------------------
5676
5677template<class EventHandler>
5678void ParseEngine<EventHandler>::_handle_map_flow()
5679{
5680mapflow_start:
5681 _c4dbgpf("handle_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5682
5683 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
5684 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
5685 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK), m_evt_handler->m_curr->pos);
5686 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)), m_evt_handler->m_curr->pos);
5687
5688 if(m_evt_handler->m_curr->at_line_beginning())
5689 {
5690 _handle_flow_line_beginning();
5691 }
5692
5693 _handle_flow_skip_whitespace();
5694 if(!m_evt_handler->m_curr->line_contents.rem.len)
5695 goto mapflow_again;
5696
5697 if(has_any(RKEY))
5698 {
5699 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5700 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5701 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5702 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5703 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5704 _c4dbgpf("mapflow[RKEY]: '{}'", first);
5705 ScannedScalar sc;
5706 if(first == '\'')
5707 {
5708 _c4dbgp("mapflow[RKEY]: scanning single-quoted scalar");
5709 sc = _scan_scalar_squot();
5710 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5711 _handle_annotations_before_blck_key_scalar();
5712 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5713 addrem_flags(RKCL, RKEY|QMRK);
5714 }
5715 else if(first == '"')
5716 {
5717 _c4dbgp("mapflow[RKEY]: scanning double-quoted scalar");
5718 sc = _scan_scalar_dquot();
5719 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5720 _handle_annotations_before_blck_key_scalar();
5721 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5722 addrem_flags(RKCL, RKEY|QMRK);
5723 }
5724 // block scalars (ie | and >) cannot appear in flow containers
5725 else if(_scan_scalar_plain_map_flow(&sc))
5726 {
5727 _c4dbgp("mapflow[RKEY]: plain scalar");
5728 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5729 _handle_annotations_before_blck_key_scalar();
5730 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5731 addrem_flags(RKCL, RKEY|QMRK);
5732 }
5733 else if(first == '?')
5734 {
5735 _c4dbgp("mapflow[RKEY]: explicit key");
5736 _handle_annotations_before_blck_key_scalar();
5737 addrem_flags(QMRK, RKEY);
5738 _line_progressed(1);
5739 _maybe_skip_whitespace_tokens();
5740 }
5741 else if(first == ':')
5742 {
5743 _c4dbgp("mapflow[RKEY]: setting empty key");
5744 _handle_annotations_before_blck_key_scalar();
5745 m_evt_handler->set_key_scalar_plain_empty();
5746 addrem_flags(RVAL, RKEY|QMRK);
5747 _line_progressed(1);
5748 _maybe_skip_whitespace_tokens();
5749 }
5750 else if(first == ',')
5751 {
5752 _c4dbgp("mapflow[RKEY]: comma!");
5753 if(!_handle_annotations_before_unexpected_flow_token_rkey())
5754 _c4err("unexpected comma");
5755 addrem_flags(RNXT, RKEY|QMRK);
5756 // keep going in this function
5757 }
5758 else if(first == '}') // this happens on a trailing comma like ", }"
5759 {
5760 _c4dbgp("mapflow[RKEY]: end!");
5761 (void)_handle_annotations_before_unexpected_flow_token_rkey();
5762 _line_progressed(1);
5763 _end_map_flow();
5764 goto mapflow_finish;
5765 }
5766 else if(first == '&')
5767 {
5768 csubstr anchor = _scan_anchor();
5769 _c4dbgpf("mapflow[RKEY]: key anchor! {}", prs_(anchor));
5770 _add_annotation(&m_pending_anchors, anchor);
5771 }
5772 else if(first == '!')
5773 {
5774 csubstr tag = _scan_tag();
5775 _c4dbgpf("mapflow[RKEY]: tag! {}", prs_(tag));
5776 _add_annotation(&m_pending_tags, tag);
5777 }
5778 else if(first == '*')
5779 {
5780 csubstr ref = _scan_ref_map();
5781 _c4dbgpf("mapflow[RKEY]: key ref! {}", prs_(ref));
5782 _handle_keyref(ref);
5783 addrem_flags(RKCL, RKEY);
5784 }
5785 else if(first == '[')
5786 {
5787 // RYML's tree cannot store container keys, but that's
5788 // handled inside the tree event handler. Other handler
5789 // types may be able to handle it.
5790 _c4dbgp("mapflow[RKEY]: start child seqflow (!)");
5791 _handle_annotations_before_blck_key_scalar();
5792 addrem_flags(RKCL, RKEY);
5793 m_evt_handler->begin_seq_key_flow();
5794 addrem_flags(RSEQ|RVAL, RMAP|RKCL);
5795 _set_indentation(m_evt_handler->m_parent->indref);
5796 _line_progressed(1);
5797 goto mapflow_finish;
5798 }
5799 else if(first == '{')
5800 {
5801 // RYML's tree cannot store container keys, but that's
5802 // handled inside the tree event handler. Other handler
5803 // types may be able to handle it.
5804 _c4dbgp("mapflow[RKEY]: start child mapflow (!)");
5805 _handle_annotations_before_blck_key_scalar();
5806 addrem_flags(RKCL, RKEY);
5807 m_evt_handler->begin_map_key_flow();
5808 addrem_flags(RKEY, RVAL|RKCL);
5809 _set_indentation(m_evt_handler->m_parent->indref);
5810 _line_progressed(1);
5811 // keep going in this function
5812 }
5813 else
5814 {
5815 _c4err("parse error"); // LCOV_EXCL_LINE
5816 }
5817 }
5818 else if(has_any(RKCL)) // read the key colon
5819 {
5820 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5821 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5822 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5823 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5824 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5825 _c4dbgpf("mapflow[RKCL]: '{}'", first);
5826 if(first == ':')
5827 {
5828 _c4dbgp("mapflow[RKCL]: found the colon");
5829 addrem_flags(RVAL, RKCL);
5830 _line_progressed(1);
5831 }
5832 else if(first == '}')
5833 {
5834 _c4dbgp("mapflow[RKCL]: end with missing val!");
5835 addrem_flags(RVAL, RKCL);
5836 m_evt_handler->set_val_scalar_plain_empty();
5837 _line_progressed(1);
5838 _end_map_flow();
5839 goto mapflow_finish;
5840 }
5841 else if(first == ',')
5842 {
5843 _c4dbgp("mapflow[RKCL]: got comma. val is missing");
5844 m_evt_handler->set_val_scalar_plain_empty();
5845 m_evt_handler->add_sibling();
5846 addrem_flags(RKEY, RKCL);
5847 _line_progressed(1);
5848 if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
5849 {
5850 _c4err("parse error: invalid comment after comma");
5851 }
5852 }
5853 else
5854 {
5855 _c4err("parse error");
5856 }
5857 }
5858 else if(has_any(RVAL))
5859 {
5860 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5861 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5862 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5863 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5864 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5865 _c4dbgpf("mapflow[RVAL]: '{}'", first);
5866 ScannedScalar sc;
5867 if(first == '\'')
5868 {
5869 _c4dbgp("mapflow[RVAL]: scanning single-quoted scalar");
5870 sc = _scan_scalar_squot();
5871 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5872 _handle_annotations_before_blck_val_scalar();
5873 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5874 addrem_flags(RNXT, RVAL);
5875 }
5876 else if(first == '"')
5877 {
5878 _c4dbgp("mapflow[RVAL]: scanning double-quoted scalar");
5879 sc = _scan_scalar_dquot();
5880 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5881 _handle_annotations_before_blck_val_scalar();
5882 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5883 addrem_flags(RNXT, RVAL);
5884 }
5885 // block scalars (ie | and >) cannot appear in flow containers
5886 else if(_scan_scalar_plain_map_flow(&sc))
5887 {
5888 _c4dbgp("mapflow[RVAL]: plain scalar.");
5889 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5890 _handle_annotations_before_blck_val_scalar();
5891 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5892 addrem_flags(RNXT, RVAL);
5893 }
5894 else if(first == '[')
5895 {
5896 _c4dbgp("mapflow[RVAL]: start val seqflow");
5897 addrem_flags(RNXT, RVAL);
5898 _handle_annotations_before_blck_val_scalar();
5899 m_evt_handler->begin_seq_val_flow();
5900 _set_indentation(m_evt_handler->m_parent->indref);
5901 addrem_flags(RSEQ|RVAL, RMAP|RNXT);
5902 _line_progressed(1);
5903 goto mapflow_finish;
5904 }
5905 else if(first == '{')
5906 {
5907 _c4dbgp("mapflow[RVAL]: start val mapflow");
5908 addrem_flags(RNXT, RVAL);
5909 _handle_annotations_before_blck_val_scalar();
5910 m_evt_handler->begin_map_val_flow();
5911 _set_indentation(m_evt_handler->m_parent->indref);
5912 addrem_flags(RKEY, RNXT);
5913 _line_progressed(1);
5914 // keep going in this function
5915 }
5916 else if(first == '}')
5917 {
5918 _c4dbgp("mapflow[RVAL]: end!");
5919 _handle_annotations_before_blck_val_scalar();
5920 m_evt_handler->set_val_scalar_plain_empty();
5921 _line_progressed(1);
5922 _end_map_flow();
5923 goto mapflow_finish;
5924 }
5925 else if(first == ',')
5926 {
5927 _c4dbgp("mapflow[RVAL]: empty val!");
5928 _handle_annotations_before_blck_val_scalar();
5929 m_evt_handler->set_val_scalar_plain_empty();
5930 addrem_flags(RNXT, RVAL);
5931 // keep going in this function
5932 }
5933 else if(first == '*')
5934 {
5935 csubstr ref = _scan_ref_map();
5936 _c4dbgpf("mapflow[RVAL]: key ref! {}", prs_(ref));
5937 _handle_valref(ref);
5938 addrem_flags(RNXT, RVAL);
5939 }
5940 else if(first == '&')
5941 {
5942 csubstr anchor = _scan_anchor();
5943 _c4dbgpf("mapflow[RVAL]: key anchor! {}", prs_(anchor));
5944 _add_annotation(&m_pending_anchors, anchor);
5945 }
5946 else if(first == '!')
5947 {
5948 csubstr tag = _scan_tag();
5949 _c4dbgpf("mapflow[RVAL]: tag! {}", prs_(tag));
5950 _add_annotation(&m_pending_tags, tag);
5951 }
5952 else
5953 {
5954 _c4err("parse error");
5955 }
5956 }
5957 else if(has_any(RNXT))
5958 {
5959 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5960 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5961 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5962 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5963 _c4dbgpf("mapflow[RNXT]: '{}'", m_evt_handler->m_curr->line_contents.rem.str[0]);
5964 if(m_evt_handler->m_curr->line_contents.rem.begins_with(','))
5965 {
5966 _c4dbgp("mapflow[RNXT]: expect next keyval");
5967 m_evt_handler->add_sibling();
5968 addrem_flags(RKEY, RNXT);
5969 _line_progressed(1);
5970 if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
5971 {
5972 _c4err("parse error: invalid comment after comma");
5973 }
5974 }
5975 else if(m_evt_handler->m_curr->line_contents.rem.begins_with('}'))
5976 {
5977 _c4dbgp("mapflow[RNXT]: end!");
5978 _line_progressed(1);
5979 _end_map_flow();
5980 goto mapflow_finish;
5981 }
5982 else
5983 {
5984 _c4err("parse error");
5985 }
5986 }
5987 else if(has_any(QMRK))
5988 {
5989 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5990 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5991 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5992 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5993 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5994 _c4dbgpf("mapflow[QMRK]: '{}'", first);
5995 ScannedScalar sc;
5996 if(first == '\'')
5997 {
5998 _c4dbgp("mapflow[QMRK]: scanning single-quoted scalar");
5999 sc = _scan_scalar_squot();
6000 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6001 _handle_annotations_before_blck_key_scalar();
6002 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6003 addrem_flags(RKCL, QMRK);
6004 }
6005 else if(first == '"')
6006 {
6007 _c4dbgp("mapflow[QMRK]: scanning double-quoted scalar");
6008 sc = _scan_scalar_dquot();
6009 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6010 _handle_annotations_before_blck_key_scalar();
6011 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6012 addrem_flags(RKCL, QMRK);
6013 }
6014 // block scalars (ie | and >) cannot appear in flow containers
6015 else if(_scan_scalar_plain_map_flow(&sc))
6016 {
6017 _c4dbgp("mapflow[QMRK]: plain scalar");
6018 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6019 _handle_annotations_before_blck_key_scalar();
6020 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6021 addrem_flags(RKCL, QMRK);
6022 }
6023 else if(first == ':')
6024 {
6025 _c4dbgp("mapflow[QMRK]: setting empty key");
6026 _handle_annotations_before_blck_key_scalar();
6027 m_evt_handler->set_key_scalar_plain_empty();
6028 addrem_flags(RVAL, QMRK);
6029 _line_progressed(1);
6030 _maybe_skip_whitespace_tokens();
6031 }
6032 else if(first == '}') // this happens on a trailing comma like ", }"
6033 {
6034 _c4dbgp("mapflow[QMRK]: end!");
6035 _handle_annotations_before_blck_key_scalar();
6036 m_evt_handler->set_key_scalar_plain_empty();
6037 m_evt_handler->set_val_scalar_plain_empty();
6038 _end_map_flow();
6039 _line_progressed(1);
6040 goto mapflow_finish;
6041 }
6042 else if(first == ',')
6043 {
6044 _c4dbgp("mapflow[QMRK]: empty key+val!");
6045 _handle_annotations_before_blck_key_scalar();
6046 m_evt_handler->set_key_scalar_plain_empty();
6047 m_evt_handler->set_val_scalar_plain_empty();
6048 addrem_flags(RNXT, QMRK);
6049 }
6050 else if(first == '&')
6051 {
6052 csubstr anchor = _scan_anchor();
6053 _c4dbgpf("mapflow[QMRK]: key anchor! {}", prs_(anchor));
6054 _add_annotation(&m_pending_anchors, anchor);
6055 }
6056 else if(first == '*')
6057 {
6058 csubstr ref = _scan_ref_map();
6059 _c4dbgpf("mapflow[QMRK]: key ref! {}", prs_(ref));
6060 _handle_keyref(ref);
6061 addrem_flags(RKCL, QMRK);
6062 }
6063 else if(first == '[')
6064 {
6065 // RYML's tree cannot store container keys, but that's
6066 // handled inside the tree sink. Other sink types may be
6067 // able to handle it.
6068 _c4dbgp("mapflow[QMRK]: start child seqflow (!)");
6069 addrem_flags(RKCL, QMRK);
6070 _handle_annotations_before_blck_key_scalar();
6071 m_evt_handler->begin_seq_key_flow();
6072 addrem_flags(RSEQ|RVAL, RMAP|RKCL);
6073 _set_indentation(m_evt_handler->m_parent->indref);
6074 _line_progressed(1);
6075 goto mapflow_finish;
6076 }
6077 else if(first == '{')
6078 {
6079 // RYML's tree cannot store container keys, but that's
6080 // handled inside the tree sink. Other sink types may be
6081 // able to handle it.
6082 _c4dbgp("mapflow[QMRK]: start child mapflow (!)");
6083 addrem_flags(RKCL, QMRK);
6084 _handle_annotations_before_blck_key_scalar();
6085 m_evt_handler->begin_map_key_flow();
6086 _set_indentation(m_evt_handler->m_parent->indref);
6087 addrem_flags(RKEY, RKCL);
6088 _line_progressed(1);
6089 // keep going in this function
6090 }
6091 else if(first == '!')
6092 {
6093 csubstr tag = _scan_tag();
6094 _c4dbgpf("mapflow[QMRK]: tag! {}", prs_(tag));
6095 _add_annotation(&m_pending_tags, tag);
6096 }
6097 else
6098 {
6099 _c4err("parse error"); // LCOV_EXCL_LINE
6100 }
6101 }
6102
6103 mapflow_again:
6104 _c4dbgt("mapflow: go again", 0);
6105 if(_finished_line())
6106 {
6107 if C4_LIKELY(!_finished_file())
6108 {
6109 _line_ended();
6110 _scan_line();
6112 }
6113 else
6114 {
6115 _c4err("missing terminating }");
6116 }
6117 }
6118 goto mapflow_start;
6119
6120 mapflow_finish:
6121 _c4dbgp("mapflow: finish");
6122}
6123
6124
6125//-----------------------------------------------------------------------------
6126
6127template<class EventHandler>
6128void ParseEngine<EventHandler>::_handle_seq_block()
6129{
6130seqblck_start:
6131 _c4dbgpf("handle_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6132
6133 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ), m_evt_handler->m_curr->pos);
6134 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(RBLCK), m_evt_handler->m_curr->pos);
6135 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT), m_evt_handler->m_curr->pos);
6136 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RVAL) + has_any(RNXT)), m_evt_handler->m_curr->pos);
6137
6138 _maybe_skip_comment_strict();
6139 if(!m_evt_handler->m_curr->line_contents.rem.len)
6140 goto seqblck_again;
6141
6142 if(has_any(RVAL))
6143 {
6144 _c4dbgpf("seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
6145 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
6146 if(m_evt_handler->m_curr->at_line_beginning())
6147 {
6148 _c4dbgpf("seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6149 if(m_evt_handler->m_curr->indentation_ge_extra())
6150 {
6151 _c4dbgpf("seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
6152 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6153 if(!m_evt_handler->m_curr->line_contents.rem.len)
6154 goto seqblck_again;
6155 }
6156 else if(m_evt_handler->m_curr->indentation_lt_extra())
6157 {
6158 _c4dbgp("seqblck[RVAL]: smaller indentation than RVAL!");
6159 if(m_evt_handler->m_curr->indentation_eq())
6160 {
6161 _c4dbgp("seqblck[RVAL]: smaller indentation than RVAL!");
6162 _handle_annotations_before_blck_val_scalar();
6163 m_evt_handler->set_val_scalar_plain_empty();
6164 addrem_flags(RNXT, RVAL);
6165 goto seqblck_again;
6166 }
6167 else
6168 {
6169 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6170 _c4dbgp("seqblck[RVAL]: smaller indentation!");
6171 _handle_indentation_pop_from_block_seq();
6172 goto seqblck_finish;
6173 }
6174 }
6175 else if(m_evt_handler->m_curr->line_contents.indentation == npos)
6176 {
6177 _c4dbgp("seqblck[RVAL]: empty line!");
6178 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6179 goto seqblck_again;
6180 }
6181 }
6182 RYML_ASSERT_PARSE_CB_(callbacks(), m_evt_handler->m_curr->line_contents.rem.len, m_evt_handler->m_curr->pos);
6183 const size_t startmark = _handle_block_skip_leading_whitespace();
6184 _c4dbgpf("seqblck[RVAL]: startmark={}", startmark);
6185 if(startmark == npos)
6186 {
6187 _c4dbgp("seqblck[RVAL]: whitespace only");
6188 goto seqblck_again;
6189 }
6190 const size_t tabmark = _handle_block_get_whitespace_mark();
6191 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6192 _c4dbgpf("seqblck[RVAL]: first='{}' currcol={}", first, m_evt_handler->m_curr->pos.col - 1);
6193 const size_t startline = m_evt_handler->m_curr->pos.line;
6194 _c4assert(m_evt_handler->m_curr->line_contents.current_col() >= m_bom_len);
6195 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col() - m_bom_len;
6196 ScannedScalar sc;
6197 if(first == '\'')
6198 {
6199 _c4dbgp("seqblck[RVAL]: single-quoted scalar");
6200 sc = _scan_scalar_squot();
6201 if(!_maybe_scan_following_colon())
6202 {
6203 _c4dbgp("seqblck[RVAL]: set as val");
6204 _handle_annotations_before_blck_val_scalar();
6205 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
6206 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6207 addrem_flags(RNXT, RVAL);
6208 }
6209 else
6210 {
6211 _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
6212 _handle_block_check_leading_tabs(startmark);
6213 addrem_flags(RNXT, RVAL);
6214 _handle_annotations_before_start_mapblck(startline);
6215 _handle_colon();
6216 m_evt_handler->begin_map_val_block();
6217 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6218 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6219 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6220 addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6221 _maybe_skip_whitespace_tokens();
6222 goto seqblck_finish;
6223 }
6224 }
6225 else if(first == '"')
6226 {
6227 _c4dbgp("seqblck[RVAL]: double-quoted scalar");
6228 sc = _scan_scalar_dquot();
6229 if(!_maybe_scan_following_colon())
6230 {
6231 _c4dbgp("seqblck[RVAL]: set as val");
6232 _handle_annotations_before_blck_val_scalar();
6233 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
6234 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6235 addrem_flags(RNXT, RVAL);
6236 }
6237 else
6238 {
6239 _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
6240 addrem_flags(RNXT, RVAL);
6241 _handle_block_check_leading_tabs(startmark);
6242 _handle_annotations_before_start_mapblck(startline);
6243 _handle_colon();
6244 m_evt_handler->begin_map_val_block();
6245 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6246 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6247 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6248 addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6249 _maybe_skip_whitespace_tokens();
6250 goto seqblck_finish;
6251 }
6252 }
6253 // block scalars can only appear as keys when in QMRK scope
6254 // (ie, after ? tokens), so no need to scan following colon in
6255 // here.
6256 else if(first == '|')
6257 {
6258 _c4dbgp("seqblck[RVAL]: block-literal scalar");
6259 ScannedBlock sb;
6260 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6261 _handle_annotations_before_blck_val_scalar();
6262 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6263 m_evt_handler->set_val_scalar_literal(maybe_filtered);
6264 addrem_flags(RNXT, RVAL);
6265 }
6266 else if(first == '>')
6267 {
6268 _c4dbgp("seqblck[RVAL]: block-folded scalar");
6269 ScannedBlock sb;
6270 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6271 _handle_annotations_before_blck_val_scalar();
6272 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6273 m_evt_handler->set_val_scalar_folded(maybe_filtered);
6274 addrem_flags(RNXT, RVAL);
6275 }
6276 else if(_scan_scalar_plain_seq_blck(&sc))
6277 {
6278 _c4dbgp("seqblck[RVAL]: plain scalar.");
6279 if(!_maybe_scan_following_colon())
6280 {
6281 _c4dbgp("seqblck[RVAL]: set as val");
6282 _handle_annotations_before_blck_val_scalar();
6283 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
6284 m_evt_handler->set_val_scalar_plain(maybe_filtered);
6285 addrem_flags(RNXT, RVAL);
6286 }
6287 else
6288 {
6289 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
6290 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6291 _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
6292 _handle_block_check_leading_tabs(startmark, tabmark);
6293 addrem_flags(RNXT, RVAL);
6294 _handle_annotations_before_start_mapblck(startline);
6295 _handle_colon();
6296 m_evt_handler->begin_map_val_block();
6297 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6298 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
6299 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6300 addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6301 _maybe_skip_whitespace_tokens();
6302 goto seqblck_finish;
6303 }
6304 }
6305 else if(first == '[')
6306 {
6307 _c4dbgp("seqblck[RVAL]: start child seqflow");
6308 addrem_flags(RNXT, RVAL);
6309 _handle_annotations_before_blck_val_scalar();
6310 m_evt_handler->begin_seq_val_flow();
6311 addrem_flags(RFLOW|RVAL, RBLCK|RNXT);
6312 _line_progressed(1);
6313 _set_indentation(m_evt_handler->m_parent->indref + 1u);
6314 goto seqblck_finish;
6315 }
6316 else if(first == '{')
6317 {
6318 _c4dbgp("seqblck[RVAL]: start child mapflow");
6319 addrem_flags(RNXT, RVAL);
6320 _handle_annotations_before_blck_val_scalar();
6321 m_evt_handler->begin_map_val_flow();
6322 addrem_flags(RMAP|RKEY|RFLOW, RBLCK|RSEQ|RVAL|RNXT);
6323 _line_progressed(1);
6324 _set_indentation(m_evt_handler->m_parent->indref + 1u);
6325 goto seqblck_finish;
6326 }
6327 else if(first == '-')
6328 {
6329 _c4dbgp("seqblck[RVAL]: dash");
6330 _handle_block_check_leading_tabs(startmark);
6331 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
6332 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6333 _c4dbgp("seqblck[RVAL]: start child seqblck");
6334 RYML_ASSERT_PARSE_CB_(this->callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6335 addrem_flags(RNXT, RVAL);
6336 _handle_annotations_before_blck_val_scalar();
6337 m_evt_handler->begin_seq_val_block();
6338 addrem_flags(RVAL, RNXT);
6339 _set_indentation(startindent);
6340 // keep going on inside this function
6341 _line_progressed(1);
6342 }
6343 else if(first == ':')
6344 {
6345 _c4dbgp("seqblck[RVAL]: start child mapblck with empty key");
6346 addrem_flags(RNXT, RVAL);
6347 _handle_annotations_before_start_mapblck(startline);
6348 _handle_colon();
6349 m_evt_handler->begin_map_val_block();
6350 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6351 m_evt_handler->set_key_scalar_plain_empty();
6352 addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6353 _line_progressed(1);
6354 _maybe_skip_whitespace_tokens();
6355 goto seqblck_finish;
6356 }
6357 else if(first == '&')
6358 {
6359 const csubstr anchor = _scan_anchor();
6360 _c4dbgpf("seqblck[RVAL]: anchor! {}", prs_(anchor));
6361 // we need to buffer the anchors, as there may be two
6362 // consecutive anchors in here
6363 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6364 }
6365 else if(first == '*')
6366 {
6367 csubstr ref = _scan_ref_seq();
6368 _c4dbgpf("seqblck[RVAL]: ref! {}", prs_(ref));
6369 if(!_maybe_scan_following_colon())
6370 {
6371 _c4dbgp("seqblck[RVAL]: set ref as val!");
6372 _handle_valref(ref);
6373 addrem_flags(RNXT, RVAL);
6374 }
6375 else
6376 {
6377 _c4dbgp("seqblck[RVAL]: ref is key of map");
6378 addrem_flags(RNXT, RVAL);
6379 _handle_annotations_before_start_mapblck(startline);
6380 m_evt_handler->begin_map_val_block();
6381 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6382 _handle_keyref(ref);
6383 addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6384 _set_indentation(startindent);
6385 _maybe_skip_whitespace_tokens();
6386 goto seqblck_finish;
6387 }
6388 }
6389 else if(first == '!')
6390 {
6391 csubstr tag = _scan_tag();
6392 _c4dbgpf("seqblck[RVAL]: val tag! {}", prs_(tag));
6393 // we need to buffer the tags, as there may be two
6394 // consecutive tags in here
6395 _add_annotation(&m_pending_tags, tag, startindent, startline);
6396 }
6397 else if(first == '?')
6398 {
6399 _c4dbgp("seqblck[RVAL]: start child mapblck, explicit key");
6400 addrem_flags(RNXT, RVAL);
6401 m_evt_handler->begin_map_val_block();
6402 addrem_flags(RMAP|QMRK, RSEQ|RNXT);
6403 _set_indentation(startindent);
6404 _line_progressed(1);
6405 _maybe_skipchars(' ');
6406 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6407 {
6408 _c4dbgp("seqblck[RVAL]: seqblck starts after ?");
6409 addrem_flags(RKCL, QMRK);
6410 m_evt_handler->begin_seq_key_block();
6411 addrem_flags(RSEQ|RVAL, RMAP|RKCL);
6412 _save_indentation();
6413 _line_progressed(1);
6414 _maybe_skipchars(' ');
6415 }
6416 goto seqblck_finish;
6417 }
6418 else
6419 {
6420 _c4err("parse error");
6421 }
6422 }
6423 else // RNXT
6424 {
6425 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
6426 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
6427 //
6428 // handle indentation
6429 //
6430 _c4dbgpf("seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6431 if C4_LIKELY(m_evt_handler->m_curr->at_line_beginning())
6432 {
6433 _c4dbgp("seqblck[RNXT]: at line begin");
6434 if(m_evt_handler->m_curr->indentation_ge())
6435 {
6436 _c4dbgpf("seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6437 _line_progressed(m_evt_handler->m_curr->indref);
6438 if(!m_evt_handler->m_curr->line_contents.rem.len)
6439 goto seqblck_again;
6440 }
6441 else if(m_evt_handler->m_curr->indentation_lt())
6442 {
6443 _c4dbgp("seqblck[RNXT]: smaller indentation!");
6444 _handle_indentation_pop_from_block_seq();
6445 if(has_all(RSEQ|RBLCK))
6446 {
6447 _c4dbgp("seqblck[RNXT]: still seqblck!");
6448 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
6449 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6450 if(!m_evt_handler->m_curr->line_contents.rem.len)
6451 goto seqblck_again; // LCOV_EXCL_LINE
6452 }
6453 else
6454 {
6455 _c4dbgp("seqblck[RNXT]: no longer seqblck!");
6456 goto seqblck_finish;
6457 }
6458 }
6459 else if(m_evt_handler->m_curr->line_contents.indentation == npos)
6460 {
6461 _c4dbgpf("seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
6462 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6463 if(!m_evt_handler->m_curr->line_contents.rem.len)
6464 goto seqblck_again; // LCOV_EXCL_LINE
6465 }
6466 }
6467 else
6468 {
6469 _c4dbgp("seqblck[RNXT]: NOT at line begin");
6470 if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(" \t"))
6471 {
6472 _c4err("parse error");
6473 }
6474 else
6475 {
6476 _skipchars(" \t");
6477 if(!m_evt_handler->m_curr->line_contents.rem.len)
6478 {
6479 _c4dbgp("seqblck[RNXT]: again");
6480 goto seqblck_again; // LCOV_EXCL_LINE
6481 }
6482 }
6483 }
6484 //
6485 // now handle the tokens
6486 //
6487 _c4assert(m_evt_handler->m_curr->line_contents.rem.len > 0);
6488 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6489 _c4dbgpf("seqblck[RNXT]: '{}' node_id={}", _c4prc(first), m_evt_handler->m_curr->node_id);
6490 if(first == '-')
6491 {
6492 if(m_evt_handler->m_curr->indref > 0
6493 || m_evt_handler->m_curr->line_contents.indentation > 0
6494 || !_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6495 {
6496 if C4_LIKELY(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6497 {
6498 _c4dbgp("seqblck[RNXT]: expect next val");
6499 addrem_flags(RVAL, RNXT);
6500 m_evt_handler->add_sibling();
6501 _line_progressed(1);
6502 }
6503 else
6504 {
6505 _c4err("parse error");
6506 }
6507 }
6508 else
6509 {
6510 _c4dbgp("seqblck[RNXT]: start doc");
6511 _start_doc_suddenly();
6512 _line_progressed(3);
6513 _maybe_skip_whitespace_tokens();
6514 goto seqblck_finish;
6515 }
6516 }
6517 else if(first == ':')
6518 {
6519 // This happens for example in `- [a: b]: c` (after
6520 // terminating the seq, ie, after `]`). All other cases
6521 // (ie colon after scalars) are caught elsewhere (ie, in
6522 // RVAL state).
6523 if C4_LIKELY(m_evt_handler->m_parent && (m_evt_handler->m_parent->flags & RMAP))
6524 {
6525 _c4dbgp("seqblck[RNXT]: actually this seq was '?' key of parent map");
6526 m_evt_handler->end_seq_block();
6527 goto seqblck_finish;
6528 }
6529 else
6530 {
6531 _c4err("parse error");
6532 }
6533 }
6534 else if(first == '.')
6535 {
6536 _c4dbgp("seqblck[RNXT]: maybe doc?");
6537 if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6538 {
6539 _c4dbgp("seqblck[RNXT]: end doc");
6540 _end_doc_suddenly();
6541 _line_progressed(3);
6542 _maybe_skip_whitespace_tokens();
6543 _check_doc_end_tokens();
6544 goto seqblck_finish;
6545 }
6546 else
6547 {
6548 _c4err("parse error");
6549 }
6550 }
6551 else
6552 {
6553 // may be an indentless sequence nested in a map...
6554 #ifdef RYML_DBG
6555 _print_state_stack();
6556 #endif
6557 if(m_evt_handler->m_parent
6558 && has_all(RMAP|RBLCK, m_evt_handler->m_parent)
6559 && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
6560 {
6561 _c4dbgpf("seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
6562 RYML_ASSERT_PARSE_CB_(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent, m_evt_handler->m_curr->pos);
6563 _handle_indentation_pop(m_evt_handler->m_parent);
6564 RYML_ASSERT_PARSE_CB_(this->callbacks(), has_all(RMAP|RBLCK), m_evt_handler->m_curr->pos);
6565 m_evt_handler->add_sibling();
6566 addrem_flags(RKEY, RNXT);
6567 goto seqblck_finish;
6568 }
6569 else if(first == '\t')
6570 {
6571 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of('\t');
6572 if(pos == npos)
6573 {
6574 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6575 goto seqblck_again;
6576 }
6577 }
6578 _c4err("parse error");
6579 }
6580 }
6581
6582 seqblck_again:
6583 _c4dbgt("seqblck: go again", 0);
6584 if(_finished_line())
6585 {
6586 m_bom_len = 0;
6587 _line_ended();
6588 _scan_line();
6589 if(_finished_file())
6590 {
6591 _c4dbgp("seqblck: finish!");
6592 _end_seq_blck();
6593 goto seqblck_finish;
6594 }
6596 }
6597 goto seqblck_start;
6598
6599 seqblck_finish:
6600 _c4dbgp("seqblck: finish");
6601}
6602
6603
6604//-----------------------------------------------------------------------------
6605
6606template<class EventHandler>
6607void ParseEngine<EventHandler>::_handle_map_block()
6608{
6609mapblck_start:
6610 _c4dbgpf("handle_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6611
6612 // states: RKEY -> RVAL -> RNXT
6613 // states: QMRK -> RKCL -> RVAL -> RNXT
6614 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
6615 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(RBLCK), m_evt_handler->m_curr->pos);
6616 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK), m_evt_handler->m_curr->pos);
6617 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)), m_evt_handler->m_curr->pos);
6618
6619 _maybe_skip_comment();
6620 if(!m_evt_handler->m_curr->line_contents.rem.len)
6621 goto mapblck_again;
6622
6623 if(has_any(RKEY))
6624 {
6625 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
6626 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
6627 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
6628 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
6629 //
6630 // handle indentation
6631 //
6632 if(m_evt_handler->m_curr->at_line_beginning())
6633 {
6634 if(m_evt_handler->m_curr->indentation_eq())
6635 {
6636 _c4dbgpf("mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
6637 _line_progressed(m_evt_handler->m_curr->indref);
6638 if(!m_evt_handler->m_curr->line_contents.rem.len)
6639 goto mapblck_again;
6640 }
6641 else if(m_evt_handler->m_curr->indentation_lt())
6642 {
6643 _c4dbgp("mapblck[RKEY]: smaller indentation!");
6644 _handle_indentation_pop_from_block_map();
6645 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6646 if(has_all(RMAP|RBLCK))
6647 {
6648 _c4dbgp("mapblck[RKEY]: still mapblck!");
6649 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY), m_evt_handler->m_curr->pos);
6650 if(!m_evt_handler->m_curr->line_contents.rem.len)
6651 goto mapblck_again;
6652 }
6653 else
6654 {
6655 _c4dbgp("mapblck[RKEY]: no longer mapblck!");
6656 goto mapblck_finish;
6657 }
6658 }
6659 else
6660 {
6661 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt(), m_evt_handler->m_curr->pos);
6662 _c4err("invalid indentation");
6663 }
6664 }
6665 //
6666 // now handle the tokens
6667 //
6668 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6669 const size_t startline = m_evt_handler->m_curr->pos.line;
6670 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6671 _c4dbgpf("mapblck[RKEY]: '{}'", _c4prc(first));
6672 ScannedScalar sc;
6673 if(first == '\'')
6674 {
6675 _c4dbgp("mapblck[RKEY]: scanning single-quoted scalar");
6676 sc = _scan_scalar_squot();
6677 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6678 _handle_annotations_before_blck_key_scalar();
6679 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6680 addrem_flags(RVAL, RKEY);
6681 if(!_maybe_scan_following_colon())
6682 _c4err("could not find ':' colon after key");
6683 _handle_colon();
6684 _maybe_skip_whitespace_tokens();
6685 }
6686 else if(first == '"')
6687 {
6688 _c4dbgp("mapblck[RKEY]: scanning double-quoted scalar");
6689 sc = _scan_scalar_dquot();
6690 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6691 _handle_annotations_before_blck_key_scalar();
6692 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6693 addrem_flags(RVAL, RKEY);
6694 if(!_maybe_scan_following_colon())
6695 _c4err("could not find ':' colon after key");
6696 _handle_colon();
6697 _maybe_skip_whitespace_tokens();
6698 }
6699 // block scalars (| and >) can not be used as keys unless they
6700 // appear in an explicit QMRK scope (ie, after the ? token),
6701 else if C4_UNLIKELY(first == '|')
6702 {
6703 _c4err("block map: literal keys must be enclosed in '?'");
6704 }
6705 else if C4_UNLIKELY(first == '>')
6706 {
6707 _c4err("block map: folded keys must be enclosed in '?'");
6708 }
6709 else if(_scan_scalar_plain_map_blck(&sc))
6710 {
6711 _c4dbgp("mapblck[RKEY]: plain scalar");
6712 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6713 _handle_annotations_before_blck_key_scalar();
6714 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6715 addrem_flags(RVAL, RKEY);
6716 if(!_maybe_scan_following_colon())
6717 _c4err("could not find ':' colon after key");
6718 _handle_colon();
6719 _maybe_skip_whitespace_tokens();
6720 }
6721 else if(first == '?')
6722 {
6723 _c4dbgp("mapblck[RKEY]: key token!");
6724 addrem_flags(QMRK, RKEY);
6725 _line_progressed(1);
6726 _maybe_skipchars(' ');
6727 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6728 {
6729 _c4dbgp("mapblck[RKEY]: seqblck starts after ?");
6730 addrem_flags(RKCL, QMRK);
6731 m_evt_handler->begin_seq_key_block();
6732 addrem_flags(RSEQ|RVAL, RMAP|RKCL);
6733 _save_indentation();
6734 _line_progressed(1);
6735 _maybe_skipchars(' ');
6736 goto mapblck_finish;
6737 }
6738 goto mapblck_again;
6739 }
6740 else if(first == ':')
6741 {
6742 _c4dbgp("mapblck[RKEY]: setting empty key");
6743 _handle_annotations_before_blck_key_scalar();
6744 m_evt_handler->set_key_scalar_plain_empty();
6745 addrem_flags(RVAL, RKEY);
6746 _line_progressed(1);
6747 _handle_colon();
6748 _maybe_skip_whitespace_tokens();
6749 }
6750 else if(first == '*')
6751 {
6752 csubstr ref = _scan_ref_map();
6753 _c4dbgpf("mapblck[RKEY]: key ref! {}", prs_(ref));
6754 _handle_keyref(ref);
6755 addrem_flags(RVAL, RKEY);
6756 if(!_maybe_scan_following_colon())
6757 _c4err("could not find ':' colon after key");
6758 _handle_colon();
6759 _maybe_skip_whitespace_tokens();
6760 }
6761 else if(first == '&')
6762 {
6763 csubstr anchor = _scan_anchor();
6764 _c4dbgpf("mapblck[RKEY]: key anchor! {}", prs_(anchor));
6765 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6766 }
6767 else if(first == '!')
6768 {
6769 csubstr tag = _scan_tag();
6770 _c4dbgpf("mapblck[RKEY]: key tag! {}", prs_(tag));
6771 _add_annotation(&m_pending_tags, tag, startindent, startline);
6772 }
6773 else if(first == '[')
6774 {
6775 // RYML's tree cannot store container keys, but that's
6776 // handled inside the tree handler. Other handlers may be
6777 // able to handle it.
6778 _c4dbgp("mapblck[RKEY]: start child seqflow (!)");
6779 _handle_annotations_before_blck_key_scalar();
6780 m_evt_handler->begin_seq_key_flow();
6781 addrem_flags(RSEQ|RFLOW|RVAL, RKEY|RMAP|RBLCK);
6782 _line_progressed(1);
6783 _set_indentation(startindent);
6784 goto mapblck_finish;
6785 }
6786 else if(first == '{')
6787 {
6788 // RYML's tree cannot store container keys, but that's
6789 // handled inside the tree handler. Other handlers may be
6790 // able to handle it.
6791 _c4dbgp("mapblck[RKEY]: start child mapflow (!)");
6792 _handle_annotations_before_blck_key_scalar();
6793 m_evt_handler->begin_map_key_flow();
6794 addrem_flags(RFLOW|RKEY, RBLCK);
6795 _line_progressed(1);
6796 _set_indentation(startindent);
6797 goto mapblck_finish;
6798 }
6799 else if(first == '-')
6800 {
6801 _c4dbgp("mapblck[RKEY]: maybe doc?");
6802 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6803 {
6804 _c4dbgp("mapblck[RKEY]: end+start doc");
6805 _start_doc_suddenly();
6806 _line_progressed(3);
6807 _maybe_skip_whitespace_tokens();
6808 goto mapblck_finish;
6809 }
6810 else
6811 {
6812 _c4err("parse error");
6813 }
6814 }
6815 else if(first == '.')
6816 {
6817 _c4dbgp("mapblck[RKEY]: maybe end doc?");
6818 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6819 {
6820 _c4dbgp("mapblck[RKEY]: end doc");
6821 _end_doc_suddenly();
6822 _line_progressed(3);
6823 _maybe_skip_whitespace_tokens();
6824 _check_doc_end_tokens();
6825 goto mapblck_finish;
6826 }
6827 else
6828 {
6829 _c4err("parse error"); // LCOV_EXCL_LINE
6830 }
6831 }
6832 else
6833 {
6834 _c4err("parse error");
6835 }
6836 }
6837 else if(has_any(RVAL))
6838 {
6839 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
6840 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
6841 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
6842 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
6843 //
6844 // handle indentation
6845 //
6846 if(m_evt_handler->m_curr->at_line_beginning())
6847 {
6848 _c4dbgpf("mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6849 m_evt_handler->m_curr->more_indented = false;
6850 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
6851 if(m_evt_handler->m_curr->indentation_eq_extra())
6852 {
6853 _c4dbgp("mapblck[RVAL]: skip indentation!");
6854 _line_progressed(m_evt_handler->m_curr->indref + 1);
6855 if(!m_evt_handler->m_curr->line_contents.rem.len)
6856 goto mapblck_again;
6857 }
6858 else if(m_evt_handler->m_curr->indentation_gt_extra())
6859 {
6860 _c4dbgp("mapblck[RVAL]: more indented!");
6861 m_evt_handler->m_curr->more_indented = true;
6862 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6863 if(!m_evt_handler->m_curr->line_contents.rem.len)
6864 goto mapblck_again; // LCOV_EXCL_LINE
6865 }
6866 else if(m_evt_handler->m_curr->indentation_lt_extra())
6867 {
6868 if(m_evt_handler->m_curr->indentation_eq())
6869 {
6870 _c4dbgp("mapblck[RVAL]: smaller indentation than RVAL!");
6871 // watchout for indentless seqs
6872 if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation)))
6873 {
6874 _c4dbgp("mapblck[RVAL]: smaller indentation than RVAL!");
6875 _handle_annotations_before_blck_val_scalar();
6876 m_evt_handler->set_val_scalar_plain_empty();
6877 addrem_flags(RNXT, RVAL);
6878 goto mapblck_again;
6879 }
6880 }
6881 else
6882 {
6883 _c4dbgp("mapblck[RVAL]: smaller indentation than RKEY!");
6884 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6885 _handle_indentation_pop_from_block_map();
6886 if(has_all(RMAP|RBLCK))
6887 {
6888 _c4dbgp("mapblck[RVAL]: still mapblck!");
6889 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6890 if(has_any(RNXT))
6891 {
6892 _c4dbgp("mapblck[RVAL]: speculatively expect next keyval");
6893 m_evt_handler->add_sibling();
6894 addrem_flags(RKEY, RNXT);
6895 }
6896 goto mapblck_again;
6897 }
6898 else
6899 {
6900 _c4dbgp("mapblck[RVAL]: no longer mapblck!");
6901 goto mapblck_finish;
6902 }
6903 }
6904 }
6905 }
6906 const size_t startcol = _handle_block_skip_leading_whitespace();
6907 if(startcol == npos)
6908 {
6909 _c4dbgp("mapblck[RVAL]: whitespace only");
6910 goto mapblck_again; // LCOV_EXCL_LINE
6911 }
6912 const size_t tabmark = _handle_block_get_whitespace_mark();
6913 //
6914 // now handle the tokens
6915 //
6916 _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
6917 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6918 const size_t startline = m_evt_handler->m_curr->pos.line;
6919 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6920 _c4dbgpf("mapblck[RVAL]: '{}'", _c4prc(first));
6921 ScannedScalar sc;
6922 if(first == '\'')
6923 {
6924 _c4dbgp("mapblck[RVAL]: scanning single-quoted scalar");
6925 sc = _scan_scalar_squot();
6926 if(!_maybe_scan_following_colon())
6927 {
6928 _c4dbgp("mapblck[RVAL]: set as val");
6929 _handle_annotations_before_blck_val_scalar();
6930 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
6931 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6932 addrem_flags(RNXT, RVAL);
6933 }
6934 else
6935 {
6936 _c4assert(m_evt_handler->m_curr->indref != npos);
6937 _c4assert(startindent > m_evt_handler->m_curr->indref);
6938 _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6939 _handle_block_check_leading_tabs(startcol);
6940 _handle_annotations_before_start_mapblck(startline);
6941 addrem_flags(RNXT, RVAL);
6942 _handle_colon();
6943 m_evt_handler->begin_map_val_block();
6944 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6945 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6946 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6947 _maybe_skip_whitespace_tokens();
6948 // keep the child state on RVAL
6949 addrem_flags(RVAL, RNXT);
6950 }
6951 }
6952 else if(first == '"')
6953 {
6954 _c4dbgp("mapblck[RVAL]: scanning double-quoted scalar");
6955 sc = _scan_scalar_dquot();
6956 if(!_maybe_scan_following_colon())
6957 {
6958 _c4dbgp("mapblck[RVAL]: set as val");
6959 _handle_annotations_before_blck_val_scalar();
6960 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
6961 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6962 addrem_flags(RNXT, RVAL);
6963 }
6964 else
6965 {
6966 _c4assert(m_evt_handler->m_curr->indref != npos);
6967 _c4assert(startindent > m_evt_handler->m_curr->indref);
6968 _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6969 _handle_block_check_leading_tabs(startcol);
6970 _handle_annotations_before_start_mapblck(startline);
6971 addrem_flags(RNXT, RVAL);
6972 _handle_colon();
6973 m_evt_handler->begin_map_val_block();
6974 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6975 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6976 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6977 _maybe_skip_whitespace_tokens();
6978 // keep the child state on RVAL
6979 addrem_flags(RVAL, RNXT);
6980 }
6981 }
6982 // block scalars can only appear as keys when in QMRK scope
6983 // (ie, after ? tokens), so no need to scan following colon
6984 else if(first == '|')
6985 {
6986 _c4dbgp("mapblck[RVAL]: scanning block-literal scalar");
6987 ScannedBlock sb;
6988 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6989 _handle_annotations_before_blck_val_scalar();
6990 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6991 m_evt_handler->set_val_scalar_literal(maybe_filtered);
6992 addrem_flags(RNXT, RVAL);
6993 }
6994 else if(first == '>')
6995 {
6996 _c4dbgp("mapblck[RVAL]: scanning block-folded scalar");
6997 ScannedBlock sb;
6998 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6999 _handle_annotations_before_blck_val_scalar();
7000 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7001 m_evt_handler->set_val_scalar_folded(maybe_filtered);
7002 addrem_flags(RNXT, RVAL);
7003 }
7004 else if(_scan_scalar_plain_map_blck(&sc))
7005 {
7006 _c4dbgp("mapblck[RVAL]: plain scalar.");
7007 if(!_maybe_scan_following_colon())
7008 {
7009 _c4dbgp("mapblck[RVAL]: set as val");
7010 _handle_annotations_before_blck_val_scalar();
7011 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
7012 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7013 addrem_flags(RNXT, RVAL);
7014 }
7015 else
7016 {
7017 _c4assert(m_evt_handler->m_curr->indref != npos);
7018 _c4assert(startindent > m_evt_handler->m_curr->indref);
7019 _c4dbgpf("mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
7020 _handle_block_check_leading_tabs(startcol, tabmark);
7021 addrem_flags(RNXT, RVAL);
7022 _handle_annotations_before_start_mapblck(startline);
7023 _handle_colon();
7024 m_evt_handler->begin_map_val_block();
7025 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7026 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
7027 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7028 _maybe_skip_whitespace_tokens();
7029 // keep the child state on RVAL
7030 addrem_flags(RVAL, RNXT);
7031 }
7032 }
7033 else if(first == '-' && _is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7034 {
7035 if C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token())
7036 _c4err("parse error");
7037 _c4dbgp("mapblck[RVAL]: start val seqblck");
7038 _handle_block_check_leading_tabs(startcol);
7039 addrem_flags(RNXT, RVAL);
7040 _handle_annotations_before_blck_val_scalar();
7041 m_evt_handler->begin_seq_val_block();
7042 addrem_flags(RSEQ|RVAL, RMAP|RNXT);
7043 _set_indentation(startindent);
7044 _line_progressed(1);
7045 _maybe_skip_whitespace_tokens();
7046 goto mapblck_finish;
7047 }
7048 else if(first == '[')
7049 {
7050 _c4dbgp("mapblck[RVAL]: start val seqflow");
7051 addrem_flags(RNXT, RVAL);
7052 _handle_annotations_before_blck_val_scalar();
7053 m_evt_handler->begin_seq_val_flow();
7054 addrem_flags(RSEQ|RFLOW|RVAL, RMAP|RBLCK|RNXT);
7055 _set_indentation(m_evt_handler->m_parent->indref + 1u);
7056 _line_progressed(1);
7057 goto mapblck_finish;
7058 }
7059 else if(first == '{')
7060 {
7061 _c4dbgp("mapblck[RVAL]: start val mapflow");
7062 addrem_flags(RNXT, RVAL);
7063 _handle_annotations_before_blck_val_scalar();
7064 m_evt_handler->begin_map_val_flow();
7065 addrem_flags(RKEY|RFLOW, RBLCK|RVAL|RNXT);
7066 m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
7067 _set_indentation(m_evt_handler->m_parent->indref + 1u);
7068 _line_progressed(1);
7069 goto mapblck_finish;
7070 }
7071 else if(first == '*')
7072 {
7073 csubstr ref = _scan_ref_map();
7074 _c4dbgpf("mapblck[RVAL]: ref! {}", prs_(ref));
7075 if(_maybe_scan_following_colon())
7076 {
7077 _c4dbgp("mapblck[RVAL]: start child map, block");
7078 addrem_flags(RNXT, RVAL);
7079 _handle_annotations_before_blck_val_scalar();
7080 m_evt_handler->begin_map_val_block();
7081 _handle_keyref(ref);
7082 _set_indentation(startindent);
7083 // keep going in RVAL
7084 addrem_flags(RVAL, RNXT);
7085 }
7086 else
7087 {
7088 _c4dbgp("mapblck[RVAL]: was val ref");
7089 _handle_valref(ref);
7090 addrem_flags(RNXT, RVAL);
7091 }
7092 _maybe_skip_whitespace_tokens();
7093 }
7094 else if(first == '&')
7095 {
7096 csubstr anchor = _scan_anchor();
7097 _c4dbgpf("mapblck[RVAL]: anchor! {}", prs_(anchor));
7098 // we need to buffer the anchors, as there may be two
7099 // consecutive anchors in here
7100 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7101 }
7102 else if(first == '!')
7103 {
7104 csubstr tag = _scan_tag();
7105 _c4dbgpf("mapblck[RVAL]: tag! {}", prs_(tag));
7106 // we need to buffer the tags, as there may be two
7107 // consecutive tags in here
7108 _add_annotation(&m_pending_tags, tag, startindent, startline);
7109 }
7110 else if(first == '?')
7111 {
7112 if C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token())
7113 _c4err("parse error");
7114 _c4dbgp("mapblck[RVAL]: start val mapblck");
7115 addrem_flags(RNXT, RVAL);
7116 _handle_annotations_before_blck_val_scalar();
7117 m_evt_handler->begin_map_val_block();
7118 addrem_flags(QMRK, RNXT);
7119 _set_indentation(startindent);
7120 _line_progressed(1);
7121 _maybe_skipchars(' ');
7122 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7123 {
7124 _c4dbgp("mapblck[RVAL]: seqblck starts after ?");
7125 addrem_flags(RKCL, QMRK);
7126 m_evt_handler->begin_seq_key_block();
7127 addrem_flags(RSEQ|RVAL, RMAP|RKCL);
7128 _save_indentation();
7129 _line_progressed(1);
7130 _maybe_skipchars(' ');
7131 goto mapblck_finish;
7132 }
7133 goto mapblck_again;
7134 }
7135 else if(first == ':')
7136 {
7137 _c4dbgp("mapblck[RVAL]: start val mapblck");
7138 addrem_flags(RNXT, RVAL);
7139 _handle_annotations_before_start_mapblck(startline);
7140 _handle_colon();
7141 m_evt_handler->begin_map_val_block();
7142 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7143 m_evt_handler->set_key_scalar_plain_empty();
7144 // keep the child state on RVAL
7145 addrem_flags(RVAL, RNXT);
7146 _line_progressed(1);
7147 _maybe_skip_whitespace_tokens();
7148 goto mapblck_again;
7149 }
7150 else
7151 {
7152 _c4err("parse error"); // LCOV_EXCL_LINE
7153 }
7154 }
7155 else if(has_any(RNXT))
7156 {
7157 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
7158 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
7159 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
7160 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
7161 //
7162 // handle indentation
7163 //
7164 if(m_evt_handler->m_curr->at_line_beginning())
7165 {
7166 _c4dbgpf("mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
7167 if(m_evt_handler->m_curr->indentation_eq())
7168 {
7169 _c4dbgpf("mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
7170 _line_progressed(m_evt_handler->m_curr->indref);
7171 _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
7172 m_evt_handler->add_sibling();
7173 addrem_flags(RKEY, RNXT);
7174 goto mapblck_again;
7175 }
7176 else if(m_evt_handler->m_curr->indentation_lt())
7177 {
7178 _c4dbgp("mapblck[RNXT]: smaller indentation!");
7179 _handle_indentation_pop_from_block_map();
7180 if(has_all(RMAP|RBLCK))
7181 {
7182 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7183 if(!has_any(RKCL))
7184 {
7185 _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
7186 m_evt_handler->add_sibling();
7187 addrem_flags(RKEY, RNXT);
7188 }
7189 goto mapblck_again;
7190 }
7191 else
7192 {
7193 goto mapblck_finish;
7194 }
7195 }
7196 }
7197 else
7198 {
7199 _c4dbgp("mapblck[RNXT]: NOT at line begin");
7200 if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(" \t"))
7201 {
7202 _c4err("parse error");
7203 }
7204 else
7205 {
7206 _skipchars(" \t");
7207 if(!m_evt_handler->m_curr->line_contents.rem.len)
7208 {
7209 _c4dbgp("seqblck[RNXT]: again");
7210 goto mapblck_again; // LCOV_EXCL_LINE
7211 }
7212 }
7213 }
7214 //
7215 // handle tokens
7216 //
7217 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7218 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7219 _c4dbgpf("mapblck[RNXT]: '{}'", _c4prc(first));
7220 if(first == ' ')
7221 {
7222 _c4dbgp("mapblck[RNXT]: skip spaces");
7223 _maybe_skip_whitespace_tokens();
7224 }
7225 else
7226 {
7227 _c4err("parse error");
7228 }
7229 }
7230 else if(has_any(QMRK))
7231 {
7232 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
7233 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
7234 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
7235 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
7236 if(_handle_map_block_qmrk())
7237 goto mapblck_again;
7238 else
7239 goto mapblck_finish;
7240 }
7241 else if(has_any(RKCL)) // read the key colon (after QMRK)
7242 {
7243 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
7244 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
7245 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
7246 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
7247 if(_handle_map_block_rkcl())
7248 goto mapblck_again;
7249 else
7250 goto mapblck_finish;
7251 }
7252
7253 mapblck_again:
7254 _c4dbgt("mapblck: again", 0);
7255 if(_finished_line())
7256 {
7257 _line_ended();
7258 _scan_line();
7259 if(_finished_file())
7260 {
7261 _c4dbgp("mapblck: file finished!");
7262 _end_map_blck();
7263 goto mapblck_finish;
7264 }
7266 }
7267 goto mapblck_start;
7268
7269 mapblck_finish:
7270 _c4dbgp("mapblck: finish");
7271}
7272
7273
7274//-----------------------------------------------------------------------------
7275
7276// return true if we should remain in map_block
7277template<class EventHandler>
7278bool ParseEngine<EventHandler>::_handle_map_block_qmrk()
7279{
7280 //
7281 // handle indentation
7282 //
7283 if(m_evt_handler->m_curr->at_line_beginning())
7284 {
7285 _c4dbgpf("mapblck[QMRK]: at line beginning. ind={} indref={}", m_evt_handler->m_curr->line_contents.indentation, m_evt_handler->m_curr->indref);
7286 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation != npos, m_evt_handler->m_curr->pos);
7287 if(m_evt_handler->m_curr->indentation_eq_extra())
7288 {
7289 _c4dbgpf("mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref + 1);
7290 _line_progressed(m_evt_handler->m_curr->indref + 1);
7291 if(!m_evt_handler->m_curr->line_contents.rem.len)
7292 return true; // go again
7293 }
7294 // indentation can be larger in QMRK state
7295 else if(m_evt_handler->m_curr->indentation_gt_extra())
7296 {
7297 _c4dbgp("mapblck[QMRK]: larger indentation !");
7298 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7299 if(!m_evt_handler->m_curr->line_contents.rem.len)
7300 return true; // go again
7301 }
7302 else
7303 {
7304 _c4dbgp("mapblck[QMRK]: smaller indentation!");
7305 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt_extra(), m_evt_handler->m_curr->pos);
7306 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7307 if(m_evt_handler->m_curr->indentation_eq()
7308 // defend against docs or indentless seqs
7309 && m_evt_handler->m_curr->line_contents.rem.str[0] != '-')
7310 {
7311 _c4dbgp("mapblck[QMRK]: QMRK finished!");
7312 _handle_annotations_before_blck_key_scalar();
7313 m_evt_handler->set_key_scalar_plain_empty();
7314 addrem_flags(RKCL, QMRK);
7315 return true; // go again
7316 }
7317 else if(m_evt_handler->m_curr->indentation_lt())
7318 {
7319 _c4dbgp("mapblck[QMRK]: indentation pop!");
7320 _handle_indentation_pop_from_block_map();
7321 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7322 if(has_all(RMAP|RBLCK))
7323 {
7324 _c4dbgp("mapblck[QMRK]: still mapblck!");
7325 return true; // go again
7326 }
7327 else
7328 {
7329 _c4dbgp("mapblck[QMRK]: no longer mapblck!");
7330 return false; // finish mapblck
7331 }
7332 }
7333 }
7334 }
7335 //
7336 // now handle the tokens
7337 //
7338 _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
7339 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7340 const size_t startline = m_evt_handler->m_curr->pos.line;
7341 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
7342 _c4dbgpf("mapblck[QMRK]: '{}'", first);
7343 ScannedScalar sc;
7344 if(first == '\'')
7345 {
7346 _c4dbgp("mapblck[QMRK]: scanning single-quoted scalar");
7347 sc = _scan_scalar_squot();
7348 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
7349 addrem_flags(RKCL, QMRK);
7350 if(!_maybe_scan_following_colon())
7351 {
7352 _c4dbgp("mapblck[QMRK]: set as key");
7353 _handle_annotations_before_blck_key_scalar();
7354 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7355 }
7356 else
7357 {
7358 _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7359 _handle_annotations_before_start_mapblck_as_key();
7360 m_evt_handler->begin_map_key_block();
7361 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7362 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7363 _maybe_skip_whitespace_tokens();
7364 _set_indentation(startindent);
7365 // keep the child state on RVAL
7366 addrem_flags(RVAL, RKCL);
7367 }
7368 }
7369 else if(first == '"')
7370 {
7371 _c4dbgp("mapblck[QMRK]: scanning double-quoted scalar");
7372 sc = _scan_scalar_dquot();
7373 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
7374 addrem_flags(RKCL, QMRK);
7375 if(!_maybe_scan_following_colon())
7376 {
7377 _c4dbgp("mapblck[QMRK]: set as key");
7378 _handle_annotations_before_blck_key_scalar();
7379 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7380 }
7381 else
7382 {
7383 _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7384 _handle_annotations_before_start_mapblck_as_key();
7385 m_evt_handler->begin_map_key_block();
7386 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7387 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7388 _maybe_skip_whitespace_tokens();
7389 _set_indentation(startindent);
7390 // keep the child state on RVAL
7391 addrem_flags(RVAL, RKCL);
7392 }
7393 }
7394 else if(first == '|')
7395 {
7396 _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
7397 ScannedBlock sb;
7398 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7399 csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb); // KEY!
7400 _handle_annotations_before_blck_key_scalar();
7401 m_evt_handler->set_key_scalar_literal(maybe_filtered);
7402 addrem_flags(RKCL, QMRK);
7403 }
7404 else if(first == '>')
7405 {
7406 _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
7407 ScannedBlock sb;
7408 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7409 csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb); // KEY!
7410 _handle_annotations_before_blck_key_scalar();
7411 m_evt_handler->set_key_scalar_folded(maybe_filtered);
7412 addrem_flags(RKCL, QMRK);
7413 }
7414 else if(_scan_scalar_plain_map_blck(&sc))
7415 {
7416 _c4dbgp("mapblck[QMRK]: plain scalar");
7417 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
7418 addrem_flags(RKCL, QMRK);
7419 if(!_maybe_scan_following_colon())
7420 {
7421 _c4dbgp("mapblck[QMRK]: set as key");
7422 _handle_annotations_before_blck_key_scalar();
7423 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7424 }
7425 else
7426 {
7427 _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7428 _handle_annotations_before_start_mapblck_as_key();
7429 m_evt_handler->begin_map_key_block();
7430 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7431 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7432 _maybe_skip_whitespace_tokens();
7433 _set_indentation(startindent);
7434 // keep the child state on RVAL
7435 addrem_flags(RVAL, RKCL);
7436 }
7437 }
7438 else if(first == ':')
7439 {
7440 _c4dbgp("mapblck[QMRK]: start new block map as key (!), empty key");
7441 addrem_flags(RKCL, QMRK);
7442 _handle_annotations_before_start_mapblck_as_key();
7443 m_evt_handler->begin_map_key_block();
7444 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7445 m_evt_handler->set_key_scalar_plain_empty();
7446 _line_progressed(1);
7447 _maybe_skip_whitespace_tokens();
7448 _set_indentation(startindent);
7449 // keep the child state on RVAL
7450 addrem_flags(RVAL, RKCL);
7451 }
7452 else if(first == '*')
7453 {
7454 csubstr ref = _scan_ref_map();
7455 _c4dbgpf("mapblck[QMRK]: key ref! {}", prs_(ref));
7456 addrem_flags(RKCL, QMRK);
7457 if(!_maybe_scan_following_colon())
7458 {
7459 _c4dbgp("mapblck[QMRK]: set ref as key");
7460 _handle_keyref(ref);
7461 }
7462 else
7463 {
7464 _c4dbgp("mapblck[QMRK]: start new block map as key (!), set ref as key");
7465 _handle_annotations_before_start_mapblck_as_key();
7466 m_evt_handler->begin_map_key_block();
7467 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7468 _handle_keyref(ref);
7469 _set_indentation(startindent);
7470 // keep the child state on RVAL
7471 addrem_flags(RVAL, RKCL|QMRK);
7472 }
7473 _maybe_skip_whitespace_tokens();
7474 }
7475 else if(first == '&')
7476 {
7477 csubstr anchor = _scan_anchor();
7478 _c4dbgpf("mapblck[QMRK]: key anchor! {}", prs_(anchor));
7479 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7480 }
7481 else if(first == '!')
7482 {
7483 csubstr tag = _scan_tag();
7484 _c4dbgpf("mapblck[QMRK]: key tag! {}", prs_(tag));
7485 _add_annotation(&m_pending_tags, tag, startindent, startline);
7486 }
7487 else if(first == '-')
7488 {
7489 _c4dbgp("mapblck[QMRK]: maybe seq or doc?");
7490 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7491 {
7492 _c4dbgp("mapblck[QMRK]: start child seqblck (!)");
7493 addrem_flags(RKCL, QMRK);
7494 _handle_annotations_before_blck_key_scalar();
7495 m_evt_handler->begin_seq_key_block();
7496 addrem_flags(RVAL|RSEQ, RMAP|RKCL);
7497 _set_indentation(startindent);
7498 _line_progressed(1);
7499 }
7500 else
7501 {
7502 _c4dbgp("mapblck[QMRK]: end+start doc");
7503 _c4assert(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem));
7504 _start_doc_suddenly();
7505 _line_progressed(3);
7506 }
7507 _maybe_skip_whitespace_tokens();
7508 return false; // finish mapblck
7509 }
7510 else if(first == '[')
7511 {
7512 _c4dbgp("mapblck[QMRK]: start child seqflow (!)");
7513 addrem_flags(RKCL, QMRK);
7514 _handle_annotations_before_blck_key_scalar();
7515 m_evt_handler->begin_seq_key_flow();
7516 addrem_flags(RVAL|RSEQ|RFLOW, RMAP|RKCL|RBLCK);
7517 _set_indentation(m_evt_handler->m_parent->indref + 1);
7518 _line_progressed(1);
7519 return false; // finish mapblck
7520 }
7521 else if(first == '{')
7522 {
7523 _c4dbgp("mapblck[QMRK]: start child mapflow (!)");
7524 addrem_flags(RKCL, QMRK);
7525 _handle_annotations_before_blck_key_scalar();
7526 m_evt_handler->begin_map_key_flow();
7527 addrem_flags(RKEY|RFLOW, RVAL|RKCL|RBLCK);
7528 _set_indentation(m_evt_handler->m_parent->indref + 1);
7529 _line_progressed(1);
7530 return false; // finish mapblck
7531 }
7532 else if(first == '?')
7533 {
7534 _c4dbgpf("mapblck[QMRK]: another QMRK '?'. ind={} indref={}", startindent, m_evt_handler->m_curr->indref);
7535 RYML_ASSERT_PARSE_CB_(callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
7536 _c4dbgp("mapblck[QMRK]: ? indent gt - start child mapblck (!)");
7537 addrem_flags(RKCL, QMRK);
7538 _handle_annotations_before_blck_key_scalar();
7539 m_evt_handler->begin_map_key_block();
7540 addrem_flags(QMRK, RKCL);
7541 _set_indentation(startindent);
7542 // indentation_lt() should be handled elsewhere
7543 _line_progressed(1);
7544 _maybe_skipchars(' ');
7545 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7546 {
7547 _c4dbgp("mapblck[RVAL]: seqblck starts after ?");
7548 addrem_flags(RKCL, QMRK);
7549 m_evt_handler->begin_seq_key_block();
7550 addrem_flags(RSEQ|RVAL, RMAP|RKCL);
7551 _save_indentation();
7552 _line_progressed(1);
7553 _maybe_skipchars(' ');
7554 return false;
7555 }
7556 }
7557 else
7558 {
7559 _c4err("parse error");
7560 }
7561 return true; // continue in mapblck
7562}
7563
7564
7565//-----------------------------------------------------------------------------
7566
7567// return true if we should remain in map_block
7568template<class EventHandler>
7569bool ParseEngine<EventHandler>::_handle_map_block_rkcl()
7570{
7571 //
7572 // handle indentation
7573 //
7574 if(m_evt_handler->m_curr->at_line_beginning())
7575 {
7576 if(m_evt_handler->m_curr->indentation_eq())
7577 {
7578 _c4dbgpf("mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
7579 _line_progressed(m_evt_handler->m_curr->indref);
7580 if(!m_evt_handler->m_curr->line_contents.rem.len)
7581 return true; // continue in mapblck
7582 }
7583 else if C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt())
7584 {
7585 _c4err("invalid indentation");
7586 }
7587 }
7588 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7589 _c4dbgpf("mapblck[RKCL]: '{}'", first);
7590 if(first == ':')
7591 {
7592 _c4dbgp("mapblck[RKCL]: found the colon");
7593 _line_progressed(1);
7594 _maybe_skipchars(' ');
7595 #if defined(__GNUC__) && ( \
7596 ((__GNUC__ >= 12) && ((C4_WORDSIZE == 4) || defined(C4_CPU_S390_X) || defined(C4_CPU_PPC64))) \
7597 || \
7598 (__GNUC__ == 16 && defined(C4_CPU_X86_64)))
7599 C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem);
7600 #endif
7601 // sequence is valid after the RKCL ':'
7602 if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7603 {
7604 addrem_flags(RVAL, RKCL);
7605 return true; // continue in mapblck
7606 }
7607 else
7608 {
7609 _c4dbgp("mapblck[RKCL]: start val seqblck");
7610 addrem_flags(RNXT, RKCL);
7611 m_evt_handler->begin_seq_val_block();
7612 addrem_flags(RSEQ|RVAL, RMAP|RNXT);
7613 _save_indentation();
7614 _line_progressed(1);
7615 _maybe_skipchars(' ');
7616 return false; // finish mapblck
7617 }
7618 }
7619 else if(first == '?')
7620 {
7621 _c4dbgp("mapblck[RKCL]: got '?'. val was empty");
7622 m_evt_handler->set_val_scalar_plain_empty();
7623 m_evt_handler->add_sibling();
7624 addrem_flags(QMRK, RKCL);
7625 _line_progressed(1);
7626 _maybe_skipchars(' ');
7627 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7628 {
7629 _c4dbgp("mapblck[RKCL]: seqblck starts after ?");
7630 addrem_flags(RKCL, QMRK);
7631 m_evt_handler->begin_seq_key_block();
7632 addrem_flags(RSEQ|RVAL, RMAP|QMRK);
7633 _save_indentation();
7634 _line_progressed(1);
7635 _maybe_skipchars(' ');
7636 return false;
7637 }
7638 }
7639 else if(first == '-')
7640 {
7641 if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7642 {
7643 _c4dbgp("mapblck[RKCL]: end+start doc");
7644 RYML_CHECK_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
7645 _start_doc_suddenly();
7646 _line_progressed(3);
7647 _maybe_skip_whitespace_tokens();
7648 return false; // finish mapblck
7649 }
7650 else
7651 {
7652 _c4err("parse error"); // LCOV_EXCL_LINE
7653 }
7654 }
7655 else if(first == '.')
7656 {
7657 _c4dbgp("mapblck[RKCL]: maybe end doc?");
7658 csubstr rs = m_evt_handler->m_curr->line_contents.rem.sub(1);
7659 if(rs == ".." || rs.begins_with(".. "))
7660 {
7661 _c4dbgp("mapblck[RKCL]: end+start doc");
7662 _end_doc_suddenly();
7663 _line_progressed(3);
7664 _maybe_skip_whitespace_tokens();
7665 _check_doc_end_tokens();
7666 return false; // finish mapblck
7667 }
7668 else
7669 {
7670 _c4err("parse error"); // LCOV_EXCL_LINE
7671 }
7672 }
7673 else/* if(m_was_inside_qmrk) */
7674 {
7675 _c4dbgp("mapblck[RKCL]: missing :");
7676 if C4_UNLIKELY(!m_evt_handler->m_curr->indentation_eq())
7677 _c4err("parse error"); // LCOV_EXCL_LINE
7678 m_evt_handler->set_val_scalar_plain_empty();
7679 m_evt_handler->add_sibling();
7680 addrem_flags(RKEY, RKCL);
7681 }
7682 return true;
7683}
7684
7685
7686//-----------------------------------------------------------------------------
7687
7688template<class EventHandler>
7689void ParseEngine<EventHandler>::_handle_unk_json()
7690{
7691 _c4dbgpf("handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7692
7693 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP), m_evt_handler->m_curr->pos);
7694 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(RTOP), m_evt_handler->m_curr->pos);
7695
7696 _maybe_skip_comment();
7697 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7698 if(!rem.len)
7699 return;
7700
7701 size_t pos = rem.first_not_of(" \t");
7702 if(pos)
7703 {
7704 pos = pos != npos ? pos : rem.len;
7705 _c4dbgpf("skipping indentation of {}", pos);
7706 _line_progressed(pos);
7707 rem = m_evt_handler->m_curr->line_contents.rem;
7708 if(!rem.len)
7709 return;
7710 _c4dbgpf("rem is now {}", prs_(rem));
7711 }
7712
7713 if(rem.begins_with('['))
7714 {
7715 _c4dbgp("it's a seq");
7716 _check_trailing_doc_token();
7717 _maybe_begin_doc();
7718 m_evt_handler->begin_seq_val_flow();
7719 addrem_flags(RSEQ|RFLOW|RVAL, RUNK|RTOP|RDOC);
7720 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7721 m_doc_empty = false;
7722 _line_progressed(1);
7723 }
7724 else if(rem.begins_with('{'))
7725 {
7726 _c4dbgp("it's a map");
7727 _check_trailing_doc_token();
7728 _maybe_begin_doc();
7729 m_evt_handler->begin_map_val_flow();
7730 addrem_flags(RMAP|RFLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7731 m_doc_empty = false;
7732 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7733 _line_progressed(1);
7734 }
7735 else if(_handle_bom())
7736 {
7737 _c4dbgp("byte order mark");
7738 }
7739 else
7740 {
7741 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
7742 _maybe_skip_whitespace_tokens();
7743 csubstr s = m_evt_handler->m_curr->line_contents.rem;
7744 if(!s.len)
7745 return;
7746 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7747 const char first = s.str[0];
7748 ScannedScalar sc;
7749 if(first == '"')
7750 {
7751 _c4dbgp("runk_json: scanning double-quoted scalar");
7752 _check_trailing_doc_token();
7753 _maybe_begin_doc();
7754 add_flags(RDOC);
7755 m_doc_empty = false;
7756 sc = _scan_scalar_dquot();
7757 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7758 if(!_maybe_scan_following_colon())
7759 {
7760 _c4dbgp("runk_json: set as val");
7761 _handle_annotations_before_blck_val_scalar();
7762 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7763 }
7764 else
7765 {
7766 _c4err("parse error");
7767 }
7768 }
7769 else if(_scan_scalar_plain_unk(&sc))
7770 {
7771 _c4dbgp("runk_json: got a plain scalar");
7772 _check_trailing_doc_token();
7773 _maybe_begin_doc();
7774 add_flags(RDOC);
7775 m_doc_empty = false;
7776 if(!_maybe_scan_following_colon())
7777 {
7778 _c4dbgp("runk_json: set as val");
7779 _handle_annotations_before_blck_val_scalar();
7780 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7781 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7782 }
7783 else
7784 {
7785 _c4err("parse error"); // LCOV_EXCL_LINE
7786 }
7787 }
7788 else
7789 {
7790 _c4err("parse error"); // LCOV_EXCL_LINE
7791 }
7792 }
7793}
7794
7795
7796//-----------------------------------------------------------------------------
7797
7798template<class EventHandler>
7799void ParseEngine<EventHandler>::_handle_unk()
7800{
7801 _c4dbgpf("handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7802
7803 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP), m_evt_handler->m_curr->pos);
7804 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(RTOP), m_evt_handler->m_curr->pos);
7805
7806 _maybe_skipchars(' ');
7807 _maybe_skip_comment();
7808
7809 if(!m_evt_handler->m_curr->line_contents.rem.len)
7810 return;
7811
7812 _c4dbgpf("runk: rem is now {}", prs_(m_evt_handler->m_curr->line_contents.rem));
7813
7814 if(m_evt_handler->m_curr->line_contents.indentation == 0u && (m_evt_handler->m_curr->at_line_beginning() || (m_bom_len && (m_evt_handler->m_curr->pos.line == m_bom_line))))
7815 {
7816 _c4dbgpf("runk: rtop: zero indent + at line begin. offset={}", m_evt_handler->m_curr->pos.offset);
7817 _c4dbgp("runk: check BOM");
7818 if(_handle_bom())
7819 {
7820 m_bom_line = m_evt_handler->m_curr->pos.line;
7821 _c4dbgpf("runk: byte order mark! line={} offset={}", m_bom_line, m_evt_handler->m_curr->pos.offset);
7822 return;
7823 }
7824 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7825 _c4dbgpf("runk: rtop: first={}", _c4prc(first));
7826 if(first == '-')
7827 {
7828 _c4dbgp("runk: rtop: suspecting doc");
7829 if(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7830 {
7831 _c4dbgp("runk: rtop: begin doc");
7832 _maybe_end_doc();
7833 _begin2_doc_expl();
7834 _set_indentation(0);
7835 addrem_flags(RDOC|RUNK, NDOC);
7836 _line_progressed(3u);
7837 _maybe_skip_whitespace_tokens();
7838 return;
7839 }
7840 }
7841 else if(first == '.')
7842 {
7843 _c4dbgp("runk: rtop: suspecting doc end");
7844 if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
7845 {
7846 _c4dbgp("runk: rtop: end doc");
7847 if(has_any(RDOC))
7848 {
7849 _end2_doc_expl();
7850 }
7851 else
7852 {
7853 _c4dbgp("runk: rtop: ignore end doc");
7854 }
7855 addrem_flags(NDOC|RUNK, RDOC);
7856 _line_progressed(3u);
7857 _maybe_skip_whitespace_tokens();
7858 _check_doc_end_tokens();
7859 return;
7860 }
7861 }
7862 else if(first == '%')
7863 {
7864 _c4dbgpf("directive: {}", m_evt_handler->m_curr->line_contents.rem);
7865 if C4_UNLIKELY(has_any(RDOC) || (!m_doc_empty && has_none(NDOC)))
7866 _c4err("need document footer before directives");
7867 _handle_directive(m_evt_handler->m_curr->line_contents.rem);
7868 return;
7869 }
7870 }
7871
7872 /* no else-if! */
7873
7874 size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7875 size_t remindent = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
7876 if(m_bom_len)
7877 {
7878 _c4dbgpf("runk: prev BOMlen={}", m_bom_len);
7879 if(m_evt_handler->m_curr->pos.line == m_bom_line)
7880 {
7881 _c4dbgpf("runk: BOM remindent={} offset={}", remindent, m_evt_handler->m_curr->pos.offset);
7882 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, remindent >= m_bom_len, m_evt_handler->m_curr->pos);
7883 remindent -= m_bom_len;
7884 }
7885 else
7886 {
7887 m_bom_len = 0;
7888 }
7889 }
7890
7891 size_t startcol = _handle_block_skip_leading_whitespace();
7892 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7893
7894 if(first == '[')
7895 {
7896 _c4dbgp("runk: flow seq?");
7897 _handle_unk_begin_doc();
7898 if C4_LIKELY( ! _annotations_require_key_container())
7899 {
7900 _c4dbgp("runk: it's a seq, flow");
7901 _handle_annotations_before_blck_val_scalar();
7902 m_evt_handler->begin_seq_val_flow();
7903 addrem_flags(RSEQ|RFLOW|RVAL, RUNK|RTOP|RDOC);
7904 _set_indentation(0);
7905 }
7906 else
7907 {
7908 _c4dbgp("runk: start new block map, set flow seq as key (!)");
7909 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7910 m_evt_handler->begin_map_val_block();
7911 addrem_flags(RMAP|RBLCK|RKEY, RUNK|RTOP|RDOC);
7912 _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7913 m_evt_handler->begin_seq_key_flow();
7914 addrem_flags(RSEQ|RFLOW|RVAL, RMAP|RBLCK|RKEY);
7915 _set_indentation(0);
7916 }
7917 _line_progressed(1);
7918 }
7919 else if(first == '{')
7920 {
7921 _c4dbgp("runk: flow map?");
7922 _handle_unk_begin_doc();
7923 if C4_LIKELY( ! _annotations_require_key_container())
7924 {
7925 _c4dbgp("runk: it's a map, flow");
7926 _handle_annotations_before_blck_val_scalar();
7927 m_evt_handler->begin_map_val_flow();
7928 addrem_flags(RMAP|RFLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7929 _set_indentation(0);
7930 }
7931 else
7932 {
7933 _c4dbgp("runk: start new block map, set flow map as key (!)");
7934 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7935 m_evt_handler->begin_map_val_block();
7936 addrem_flags(RMAP|RBLCK|RKEY, RUNK|RTOP|RDOC);
7937 _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7938 m_evt_handler->begin_map_key_flow();
7939 addrem_flags(RMAP|RFLOW, RBLCK);
7940 _set_indentation(0);
7941 }
7942 _line_progressed(1);
7943 }
7944 else if(first == '-' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7945 {
7946 _c4dbgp("runk: it's a seq, block");
7947 if C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token())
7948 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col, /*skip_annotations*/false);
7949 _handle_unk_begin_doc();
7950 _handle_annotations_before_blck_val_scalar();
7951 m_evt_handler->begin_seq_val_block();
7952 addrem_flags(RSEQ|RBLCK|RVAL, RNXT|RTOP|RUNK|RDOC);
7953 _set_indentation(startindent);
7954 _line_progressed(1);
7955 _maybe_skipchars(' ');
7956 }
7957 else if(first == '?' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7958 {
7959 _c4dbgp("runk: it's a map + this key is complex");
7960 if C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token())
7961 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col, /*skip_annotations*/false);
7962 _handle_block_check_leading_tabs(startcol);
7963 _handle_unk_begin_doc();
7964 _handle_annotations_before_blck_val_scalar();
7965 m_evt_handler->begin_map_val_block();
7966 addrem_flags(RMAP|RBLCK|QMRK, RKEY|RVAL|RTOP|RUNK|RDOC);
7967 _set_indentation(startindent);
7968 _line_progressed(1);
7969 _maybe_skipchars(' ');
7970 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7971 {
7972 _c4dbgp("runk: seqblck key starts after ?");
7973 addrem_flags(RKCL, QMRK);
7974 m_evt_handler->begin_seq_key_block();
7975 addrem_flags(RSEQ|RVAL, RMAP|RKCL);
7976 _save_indentation();
7977 _line_progressed(1);
7978 _maybe_skipchars(' ');
7979 }
7980 }
7981 else if(first == ':' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7982 {
7983 if(m_doc_empty || (m_pending_anchors.num_entries | m_pending_tags.num_entries))
7984 {
7985 _c4dbgp("runk: it's a map with an empty key");
7986 if C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token())
7987 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col);
7988 _handle_block_check_leading_tabs(startcol);
7989 const size_t startline = m_evt_handler->m_curr->pos.line; // save
7990 _handle_unk_begin_doc();
7991 _handle_annotations_before_start_mapblck(startline);
7992 _handle_colon();
7993 m_evt_handler->begin_map_val_block();
7994 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7995 m_evt_handler->set_key_scalar_plain_empty();
7996 _set_indentation(startindent);
7997 }
7998 else
7999 {
8000 _c4err("block colon cannot occur on a new line unless ? is used");
8001 }
8002 addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
8003 _line_progressed(1);
8004 _maybe_skip_whitespace_tokens();
8005 }
8006 else if(first == '&')
8007 {
8008 csubstr anchor = _scan_anchor();
8009 _c4dbgpf("anchor! {}", prs_(anchor));
8010 const size_t line = m_evt_handler->m_curr->pos.line;
8011 _handle_unk_begin_doc();
8012 _add_annotation(&m_pending_anchors, anchor, remindent, line);
8013 _set_indentation(0);
8014 }
8015 else if(first == '*')
8016 {
8017 csubstr ref = _scan_ref_map();
8018 _c4dbgpf("runk: ref! {}", prs_(ref));
8019 _handle_unk_begin_doc();
8020 if(!_maybe_scan_following_colon())
8021 {
8022 _c4dbgp("runk: set val ref");
8023 _handle_valref(ref);
8024 }
8025 else
8026 {
8027 _c4dbgp("runk: start new block map, set ref as key");
8028 _handle_block_check_leading_tabs(startcol);
8029 const size_t startline = m_evt_handler->m_curr->pos.line; // save
8030 _handle_annotations_before_start_mapblck(startline);
8031 m_evt_handler->begin_map_val_block();
8032 _handle_keyref(ref);
8033 _maybe_skip_whitespace_tokens();
8034 _set_indentation(0);
8035 addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
8036 }
8037 }
8038 else if(first == '!')
8039 {
8040 csubstr tag_orig;
8041 csubstr tag = _scan_tag(&tag_orig);
8042 _c4dbgpf("runk: val tag! {}", prs_(tag));
8043 // we need to buffer the tags, as there may be two
8044 // consecutive tags in here
8045 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
8046 const size_t line = m_evt_handler->m_curr->pos.line;
8047 _add_annotation(&m_pending_tags, tag, indentation, line, tag_orig);
8048 }
8049 else
8050 {
8051 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
8052 const size_t startscalar = _handle_block_get_whitespace_mark();
8053 const size_t startline = m_evt_handler->m_curr->pos.line; // save
8054 auto beginmap = [&](size_t startindent_){
8055 if C4_UNLIKELY(m_evt_handler->m_curr->pos.line > startline)
8056 _c4err("multiline scalars cannot be used as implicit keys");
8057 _handle_block_check_leading_tabs(startcol, startscalar);
8058 _handle_annotations_before_start_mapblck(startline);
8059 _handle_colon();
8060 m_evt_handler->begin_map_val_block();
8061 _handle_annotations_and_indentation_after_start_mapblck(startindent_, startline);
8062 };
8063 auto after_beginmap = [&](size_t startindent_){
8064 _maybe_skip_whitespace_tokens();
8065 _set_indentation(startindent_);
8066 addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
8067 };
8068 if(first == '|')
8069 {
8070 _c4dbgp("runk: block-literal scalar");
8071 _handle_unk_begin_doc();
8072 ScannedBlock sb;
8073 _scan_block(&sb, startindent);
8074 _handle_annotations_before_blck_val_scalar();
8075 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8076 m_evt_handler->set_val_scalar_literal(maybe_filtered);
8077 }
8078 else if(first == '>')
8079 {
8080 _c4dbgp("runk: block-folded scalar");
8081 _handle_unk_begin_doc();
8082 ScannedBlock sb;
8083 _scan_block(&sb, startindent);
8084 _handle_annotations_before_blck_val_scalar();
8085 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8086 m_evt_handler->set_val_scalar_folded(maybe_filtered);
8087 }
8088 else if(first == '\'')
8089 {
8090 _c4dbgp("runk: single-quoted scalar");
8091 _handle_unk_begin_doc();
8092 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8093 size_t col = m_evt_handler->m_curr->pos.col;
8094 ScannedScalar sc = _scan_scalar_squot();
8095 if(!_maybe_scan_following_colon())
8096 {
8097 _c4dbgp("runk: set as val");
8098 _handle_annotations_before_blck_val_scalar();
8099 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8100 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8101 }
8102 else
8103 {
8104 _c4dbgp("runk: start new block map, set single-quoted scalar as key");
8105 if C4_UNLIKELY(m_evt_handler->m_curr->pos.line > startline)
8106 _c4err("multiline key");
8107 if(!firsttoken)
8108 startindent = _handle_unk_check_left_tokens(startindent, col);
8109 beginmap(startindent);
8110 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8111 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8112 after_beginmap(startindent);
8113 }
8114 }
8115 else if(first == '"')
8116 {
8117 _c4dbgp("runk: double-quoted scalar");
8118 _handle_unk_begin_doc();
8119 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8120 size_t col = m_evt_handler->m_curr->pos.col;
8121 ScannedScalar sc = _scan_scalar_dquot();
8122 if(!_maybe_scan_following_colon())
8123 {
8124 _c4dbgp("runk: set as val");
8125 _handle_annotations_before_blck_val_scalar();
8126 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8127 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8128 }
8129 else
8130 {
8131 _c4dbgp("runk: start new block map, set double-quoted scalar as key");
8132 if C4_UNLIKELY(m_evt_handler->m_curr->pos.line > startline)
8133 _c4err("multiline key");
8134 if(!firsttoken)
8135 startindent = _handle_unk_check_left_tokens(startindent, col);
8136 beginmap(startindent);
8137 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8138 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8139 after_beginmap(startindent);
8140 }
8141 }
8142 else
8143 {
8144 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8145 size_t col = m_evt_handler->m_curr->pos.col;
8146 ScannedScalar sc;
8147 if(_scan_scalar_plain_unk(&sc))
8148 {
8149 _c4dbgp("runk: plain scalar");
8150 _handle_unk_begin_doc();
8151 if(!_maybe_scan_following_colon())
8152 {
8153 _c4dbgp("runk: set as val");
8154 _handle_annotations_before_blck_val_scalar();
8155 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8156 m_evt_handler->set_val_scalar_plain(maybe_filtered);
8157 }
8158 else
8159 {
8160 _c4dbgp("runk: start new block map, set plain scalar as key");
8161 // there is already a check to multiline inside
8162 // _scan_scalar_plain_unk(), so we don't need to
8163 // throw an error here. but let's be safe by
8164 // asserting the assumption:
8165 _c4assert(m_evt_handler->m_curr->pos.line == startline);
8166 if(!firsttoken)
8167 startindent = _handle_unk_check_left_tokens(startindent, col);
8168 beginmap(startindent);
8169 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8170 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8171 after_beginmap(startindent);
8172 }
8173 }
8174 else
8175 {
8176 _c4err("parse error"); // LCOV_EXCL_LINE
8177 }
8178 }
8179 }
8180
8181 if(m_bom_len && has_none(RUNK))
8182 {
8183 _c4dbgpf("runk: BOMlen={} BOMline={} now={} at_end={}", m_bom_len, m_bom_line, m_evt_handler->m_curr->pos.line, !m_evt_handler->m_curr->line_contents.rem.len);
8184 if(m_evt_handler->m_curr->pos.line != m_bom_line || !m_evt_handler->m_curr->line_contents.rem.len)
8185 {
8186 _c4dbgp("runk: clear BOMlen");
8187 m_bom_len = 0;
8188 }
8189 }
8190}
8191
8192template<class EventHandler>
8193void ParseEngine<EventHandler>::_handle_unk_begin_doc()
8194{
8195 _c4dbgp("runk: begin doc");
8196 _check_trailing_doc_token();
8197 _maybe_begin_doc();
8198 add_flags(RDOC);
8199 m_doc_empty = false;
8200}
8201
8202template<class EventHandler>
8203size_t ParseEngine<EventHandler>::_handle_unk_check_left_tokens(size_t realindent, size_t col, bool skip_annotations)
8204{
8205 _c4assert(col >= 1);
8206 col -= 1;
8207 _c4assert(col >= m_bom_len);
8208 csubstr s = m_evt_handler->m_curr->line_contents.full.range(m_bom_len, col);
8209 size_t pos = 0;
8210 _c4dbgpf("runk: check left tokens: s={}", prs_(s, /*escape*/true));
8211 if(skip_annotations)
8212 {
8213 _handle_unk_get_first_non_pending_token_pos(s, &realindent, &pos);
8214 _c4dbgpf("runk: skip annotations: realindent={} pos={}", realindent, pos);
8215 }
8216 size_t firstns = s.first_not_of(' ', pos);
8217 if(firstns == npos)
8218 firstns = s.len;
8219 _c4dbgpf("runk: check left tokens:\n"
8220 " tokens={} skipped={}\n"
8221 " bomlen={} first={} col={}\n"
8222 " (bomlen+first)={} vs {}=col\n"
8223 " startindent={} lineindent={}"
8224 , prs_(s, /*escape*/true), prs_(s.sub(firstns), /*escape*/true)
8225 , m_bom_len, firstns, col
8226 , m_bom_len+firstns, col,
8227 realindent, m_evt_handler->m_curr->line_contents.indentation);
8228 if(m_bom_len + firstns != col)
8229 _c4err("parse error");
8230 if(!skip_annotations)
8231 realindent = firstns;
8232 _c4dbgpf("runk: pos={} firstns={} -> realindent={}", pos, firstns, realindent);
8233 return realindent;
8234}
8235
8236
8237/** skip annotations which are pending on the same line */
8238template<class EventHandler>
8239void ParseEngine<EventHandler>::_handle_unk_get_first_non_pending_token_pos(csubstr s, size_t *indent, size_t *first_non_token_pos)
8240{
8241 csubstr first, second;
8242 uint32_t total = _get_annotations_same_line(s, &first, &second);
8243 _c4dbgpf("runk: before skip: {}", prs_(s, true));
8244 size_t pos = s.first_not_of(" \t");
8245 if(pos == npos)
8246 pos = s.len;
8247 if(!total)
8248 {
8249 *indent = *first_non_token_pos = pos;
8250 return;
8251 }
8252 _c4assert(!s.sub(pos).begins_with_any(" \t"));
8253 _c4dbgpf("runk: after skip leading {} whitespace: {}", pos, prs_(s.sub(pos), true));
8254 _c4dbgpf("runk: first annotation: {}", first);
8255 _c4assert(first.len);
8256 _c4assert(first.is_sub(s));
8257 _c4assert(first.is_sub(s.sub(pos)));
8258 _c4assert(s.sub(pos).begins_with(first));
8259 *indent = pos;
8260 pos += first.len;
8261 _c4dbgpf("runk: after skip first annotation: pos={} {}", pos, prs_(s.sub(pos), true));
8262 if(total > 1)
8263 {
8264 _c4dbgpf("runk: second annotation: {}", second);
8265 _c4assert(total == 2);
8266 _c4assert(second.len);
8267 _c4assert(second.is_sub(s));
8268 _c4assert(second.is_sub(s.sub(pos)));
8269 csubstr spos = s.sub(pos);
8270 size_t more = spos.first_not_of(" \t");
8271 _c4assert(more != npos); // because the annotations are on the same line
8272 _c4dbgpf("runk: next nonspace: {}", pos + more);
8273 pos += more;
8274 _c4dbgpf("runk: after skip annotation whitespace: pos={} {}", pos, prs_(s.sub(pos), true));
8275 _c4assert(s.sub(pos).begins_with(second));
8276 pos += second.len;
8277 _c4dbgpf("runk: after skip annotation 2: pos={} {}", pos, prs_(s.sub(pos), true));
8278 }
8279 *first_non_token_pos = pos;
8280}
8281
8282
8283template<class EventHandler>
8284uint32_t ParseEngine<EventHandler>::_get_annotations_same_line(csubstr token_soup, csubstr *first_, csubstr *second_) const
8285{
8286 _c4assert(!m_evt_handler->m_curr->at_first_token());
8287 (void)token_soup;
8288 using EntryPtr = typename Annotation::Entry const* C4_RESTRICT;
8289 EntryPtr first = nullptr;
8290 EntryPtr second = nullptr;
8291 uint32_t total = (uint32_t)(m_pending_anchors.num_entries + m_pending_tags.num_entries);
8292 if(total)
8293 {
8294 _c4dbgpf("there are {} pending annotations: {} anchors + {} tags", total, m_pending_anchors.num_entries, m_pending_tags.num_entries);
8295 auto valid_if_same_line = [this](EntryPtr entry){
8296 _c4dbgpf("pending: {} indent={} line={} vs currline={}", maybe_null_str_(entry->str), entry->indentation, entry->line, m_evt_handler->m_curr->pos.line);
8297 return (entry->line == m_evt_handler->m_curr->pos.line) ? entry : nullptr;
8298 };
8299 // now select annotations only on the same line
8300 total = 0;
8301 for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8302 total += !!valid_if_same_line(&m_pending_anchors.annotations[i]);
8303 for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
8304 total += !!valid_if_same_line(&m_pending_tags.annotations[i]);
8305 _c4dbgpf("{} annotations on same line", total);
8306 _c4assert(total > 0); // because this function is only called
8307 // while not at the first token. That
8308 // means we must have same-line
8309 // annotations.
8310 auto get_first_on_same_line = [this](EntryPtr not_this_one){
8311 for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8312 if(&m_pending_anchors.annotations[i] != not_this_one
8313 && m_pending_anchors.annotations[i].line == m_evt_handler->m_curr->pos.line)
8314 return &m_pending_anchors.annotations[i];
8315 for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
8316 if(&m_pending_tags.annotations[i] != not_this_one
8317 && m_pending_tags.annotations[i].line == m_evt_handler->m_curr->pos.line)
8318 return &m_pending_tags.annotations[i];
8319 C4_UNREACHABLE(); // LCOV_EXCL_LINE
8320 return (EntryPtr)nullptr; // LCOV_EXCL_LINE
8321 };
8322 _c4assert(total >= 1);
8323 // assign to first
8324 first = get_first_on_same_line(nullptr);
8325 _c4assert(first);
8326 _c4dbgpf("first annotation: {} indent={} line={}", maybe_null_str_(first->str), first->indentation, first->line);
8327 if(total > 1)
8328 {
8329 _c4assert(total == 2);
8330 // assign to second
8331 second = get_first_on_same_line(first);
8332 _c4assert(second);
8333 _c4dbgpf("second annotation: {} indent={} line={}", maybe_null_str_(second->str), second->indentation, second->line);
8334 }
8335 auto extract_string = [&](EntryPtr e){
8336 // tags can be null when the arena ran out of space
8337 if(!e->str.str || e->str.begins_with_any("!<"))
8338 {
8339 csubstr tag = e->orig;
8340 _c4assert(tag.str);
8341 _c4assert(tag.len);
8342 _c4assert(tag.is_sub(token_soup));
8343 _c4dbgpf("tag: {} -> {}", maybe_null_str_(e->str), tag);
8344 return tag;
8345 }
8346 csubstr anchor = e->str;
8347 _c4assert(anchor.len);
8348 _c4assert(anchor.str);
8349 _c4assert(anchor.is_sub(token_soup));
8350 _c4assert(!anchor.begins_with('&'));
8351 _c4assert(anchor.str - token_soup.str > 0);
8352 // add back the anchor's &
8353 --anchor.str;
8354 ++anchor.len;
8355 _c4assert(anchor.begins_with('&'));
8356 _c4dbgpf("anchor: {} -> {}", e->str, anchor);
8357 return anchor;
8358 };
8359 *first_ = first ? extract_string(first) : nullptr;
8360 *second_ = second ? extract_string(second) : nullptr;
8361 if(total > 1 && (first_->str > second_->str))
8362 {
8363 csubstr tmp = *first_;
8364 *first_ = *second_;
8365 *second_ = tmp;
8366 _c4dbgpf("swap first and second: {} -> {}", *first_, *second_);
8367 }
8368 }
8369 return total;
8370}
8371
8372
8373//-----------------------------------------------------------------------------
8374
8375template<class EventHandler>
8376C4_COLD void ParseEngine<EventHandler>::_handle_usty()
8377{
8378 _c4dbgpf("handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
8379
8380 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK|RFLOW), m_evt_handler->m_curr->pos);
8381
8382 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
8383 if(has_any(RNXT))
8384 {
8385 _c4dbgp("usty[RNXT]: finishing!");
8386 _end_stream();
8387 }
8388 #endif
8389
8390 _maybe_skip_comment();
8391 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
8392 if(!rem.len)
8393 return;
8394
8395 size_t pos = rem.first_not_of(" \t");
8396 if(pos)
8397 {
8398 pos = pos != npos ? pos : rem.len;
8399 _c4dbgpf("skipping indentation of {}", pos);
8400 _line_progressed(pos);
8401 rem = m_evt_handler->m_curr->line_contents.rem;
8402 if(!rem.len)
8403 return;
8404 _c4dbgpf("rem is now {}", prs_(rem));
8405 }
8406
8407 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, rem.len > 0, m_evt_handler->m_curr->pos);
8408 size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
8409 char first = rem.str[0];
8410 if(has_any(RSEQ)) // destination is a sequence
8411 {
8412 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, ! has_any(RMAP), m_evt_handler->m_curr->pos);
8413 _c4dbgpf("usty[RSEQ]: first='{}'", _c4prc(first));
8414 if(first == '[')
8415 {
8416 _c4dbgp("usty[RSEQ]: it's a flow seq. merging it");
8417 add_flags(RNXT);
8418 m_evt_handler->_push();
8419 addrem_flags(RFLOW|RVAL, RNXT|USTY);
8420 _set_indentation(startindent);
8421 _line_progressed(1);
8422 _maybe_skip_whitespace_tokens();
8423 }
8424 else if(first == '-' && _is_blck_token(rem))
8425 {
8426 _c4dbgp("usty[RSEQ]: it's a block seq. merging it");
8427 add_flags(RNXT);
8428 m_evt_handler->_push();
8429 addrem_flags(RBLCK|RVAL, RNXT|USTY);
8430 _set_indentation(startindent);
8431 _line_progressed(1);
8432 _maybe_skip_whitespace_tokens();
8433 }
8434 else
8435 {
8436 _c4err("can only parse a seq into an existing seq");
8437 }
8438 }
8439 else if(has_any(RMAP)) // destination is a map
8440 {
8441 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ), m_evt_handler->m_curr->pos);
8442 _c4dbgpf("usty[RMAP]: first='{}'", _c4prc(first));
8443 if(first == '{')
8444 {
8445 _c4dbgp("usty[RMAP]: it's a flow map. merging it");
8446 add_flags(RNXT);
8447 _handle_annotations_before_blck_val_scalar();
8448 m_evt_handler->_push();
8449 addrem_flags(RMAP|RFLOW|RKEY, RNXT|USTY);
8450 _set_indentation(startindent);
8451 _line_progressed(1);
8452 _maybe_skip_whitespace_tokens();
8453 }
8454 else if(first == '?' && _is_blck_token(rem))
8455 {
8456 _c4dbgp("usty[RMAP]: it's a block map + this key is complex");
8457 add_flags(RNXT);
8458 _handle_annotations_before_blck_val_scalar();
8459 m_evt_handler->_push();
8460 addrem_flags(RMAP|RBLCK|QMRK, RNXT|USTY);
8461 _save_indentation();
8462 _line_progressed(1);
8463 _maybe_skip_whitespace_tokens();
8464 }
8465 else if(first == ':' && _is_blck_token(rem))
8466 {
8467 _c4dbgp("usty[RMAP]: it's a map with an empty key");
8468 add_flags(RNXT);
8469 _handle_annotations_before_blck_val_scalar();
8470 m_evt_handler->_push();
8471 m_evt_handler->set_key_scalar_plain_empty();
8472 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8473 _save_indentation();
8474 _line_progressed(1);
8475 _maybe_skip_whitespace_tokens();
8476 }
8477 else if(rem.begins_with('&'))
8478 {
8479 csubstr anchor = _scan_anchor();
8480 _c4dbgpf("usty[RMAP]: anchor! {}", prs_(anchor));
8481 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8482 const size_t line = m_evt_handler->m_curr->pos.line;
8483 _add_annotation(&m_pending_anchors, anchor, indentation, line);
8484 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8485 }
8486 else if(first == '*')
8487 {
8488 csubstr ref = _scan_ref_map();
8489 _c4dbgpf("usty[RMAP]: ref! {}", prs_(ref));
8490 if(!_maybe_scan_following_colon())
8491 {
8492 _c4err("cannot read a VAL to a map");
8493 }
8494 else
8495 {
8496 _c4dbgp("usty[RMAP]: start new block map, set ref as key");
8497 const size_t startline = m_evt_handler->m_curr->pos.line; // save
8498 add_flags(RNXT);
8499 _handle_annotations_before_start_mapblck(startline);
8500 m_evt_handler->_push();
8501 _handle_keyref(ref);
8502 _maybe_skip_whitespace_tokens();
8503 _set_indentation(startindent);
8504 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8505 }
8506 }
8507 else if(first == '!')
8508 {
8509 csubstr tag = _scan_tag();
8510 _c4dbgpf("usty[RMAP]: val tag! {}", prs_(tag));
8511 // we need to buffer the tags, as there may be two
8512 // consecutive tags in here
8513 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8514 const size_t line = m_evt_handler->m_curr->pos.line;
8515 _add_annotation(&m_pending_tags, tag, indentation, line);
8516 }
8517 else if(first == '[' || (first == '-' && _is_blck_token(rem)))
8518 {
8519 _c4err("cannot parse a seq into an existing map");
8520 }
8521 else
8522 {
8523 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
8524 startindent = m_evt_handler->m_curr->line_contents.indentation; // save
8525 const size_t startline = m_evt_handler->m_curr->pos.line; // save
8526 ScannedScalar sc;
8527 _c4dbgpf("usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
8528 if(first == '\'')
8529 {
8530 _c4dbgp("usty[RMAP]: scanning single-quoted scalar");
8531 sc = _scan_scalar_squot();
8532 if(!_maybe_scan_following_colon())
8533 {
8534 _c4err("cannot read a VAL to a map");
8535 }
8536 else
8537 {
8538 _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
8539 add_flags(RNXT);
8540 _handle_annotations_before_start_mapblck(startline);
8541 m_evt_handler->_push();
8542 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8543 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8544 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8545 _set_indentation(startindent);
8546 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8547 _maybe_skip_whitespace_tokens();
8548 }
8549 }
8550 else if(first == '"')
8551 {
8552 _c4dbgp("usty[RMAP]: scanning double-quoted scalar");
8553 sc = _scan_scalar_dquot();
8554 if(!_maybe_scan_following_colon())
8555 {
8556 _c4err("cannot read a VAL to a map");
8557 }
8558 else
8559 {
8560 _c4dbgp("usty[RMAP]: start new block map, set double-quoted scalar as key");
8561 add_flags(RNXT);
8562 _handle_annotations_before_start_mapblck(startline);
8563 m_evt_handler->_push();
8564 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8565 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8566 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8567 _set_indentation(startindent);
8568 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8569 _maybe_skip_whitespace_tokens();
8570 }
8571 }
8572 else if(first == '|')
8573 {
8574 _c4err("block literal keys must be enclosed in '?'");
8575 }
8576 else if(first == '>')
8577 {
8578 _c4err("block literal keys must be enclosed in '?'");
8579 }
8580 else if(_scan_scalar_plain_unk(&sc))
8581 {
8582 _c4dbgp("usty[RMAP]: got a plain scalar");
8583 if(!_maybe_scan_following_colon())
8584 {
8585 _c4err("cannot read a VAL to a map");
8586 }
8587 else
8588 {
8589 _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
8590 add_flags(RNXT);
8591 _handle_annotations_before_start_mapblck(startline);
8592 m_evt_handler->_push();
8593 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8594 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8595 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8596 _set_indentation(startindent);
8597 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8598 _maybe_skip_whitespace_tokens();
8599 }
8600 }
8601 else
8602 {
8603 _c4err("parse error"); // LCOV_EXCL_LINE
8604 }
8605 }
8606 }
8607 else // destination is unknown
8608 {
8609 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ), m_evt_handler->m_curr->pos);
8610 _c4dbgpf("usty[UNK]: first='{}'", _c4prc(first));
8611 if(first == '[')
8612 {
8613 _c4dbgp("usty[UNK]: it's a flow seq");
8614 add_flags(RNXT);
8615 _handle_annotations_before_blck_val_scalar();
8616 m_evt_handler->begin_seq_val_flow();
8617 addrem_flags(RSEQ|RFLOW|RVAL, RNXT|USTY);
8618 _set_indentation(startindent);
8619 _line_progressed(1);
8620 _maybe_skip_whitespace_tokens();
8621 }
8622 else if(first == '-' && _is_blck_token(rem))
8623 {
8624 _c4dbgp("usty[UNK]: it's a block seq");
8625 add_flags(RNXT);
8626 _handle_annotations_before_blck_val_scalar();
8627 m_evt_handler->begin_seq_val_block();
8628 addrem_flags(RSEQ|RBLCK|RVAL, RNXT|USTY);
8629 _set_indentation(startindent);
8630 _line_progressed(1);
8631 _maybe_skip_whitespace_tokens();
8632 }
8633 else if(first == '{')
8634 {
8635 _c4dbgp("usty[UNK]: it's a flow map");
8636 add_flags(RNXT);
8637 _handle_annotations_before_blck_val_scalar();
8638 m_evt_handler->begin_map_val_flow();
8639 addrem_flags(RMAP|RFLOW|RKEY, RNXT|USTY);
8640 _set_indentation(startindent);
8641 _line_progressed(1);
8642 _maybe_skip_whitespace_tokens();
8643 }
8644 else if(first == '?' && _is_blck_token(rem))
8645 {
8646 _c4dbgp("usty[UNK]: it's a map + this key is complex");
8647 add_flags(RNXT);
8648 _handle_annotations_before_blck_val_scalar();
8649 m_evt_handler->begin_map_val_block();
8650 addrem_flags(RMAP|RBLCK|QMRK, RNXT|USTY);
8651 _save_indentation();
8652 _line_progressed(1);
8653 _maybe_skip_whitespace_tokens();
8654 }
8655 else if(first == ':' && _is_blck_token(rem))
8656 {
8657 _c4dbgp("usty[UNK]: it's a map with an empty key");
8658 add_flags(RNXT);
8659 _handle_annotations_before_blck_val_scalar();
8660 m_evt_handler->begin_map_val_block();
8661 m_evt_handler->set_key_scalar_plain_empty();
8662 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8663 _save_indentation();
8664 _line_progressed(1);
8665 _maybe_skip_whitespace_tokens();
8666 }
8667 else if(first == '&')
8668 {
8669 csubstr anchor = _scan_anchor();
8670 _c4dbgpf("usty[UNK]: anchor! {}", prs_(anchor));
8671 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8672 const size_t line = m_evt_handler->m_curr->pos.line;
8673 _add_annotation(&m_pending_anchors, anchor, indentation, line);
8674 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8675 }
8676 else if(first == '*')
8677 {
8678 csubstr ref = _scan_ref_map();
8679 _c4dbgpf("usty[UNK]: ref! {}", prs_(ref));
8680 if(!_maybe_scan_following_colon())
8681 {
8682 _c4dbgp("usty[UNK]: set val ref");
8683 _handle_valref(ref);
8684 }
8685 else
8686 {
8687 _c4dbgp("usty[UNK]: start new block map, set ref as key");
8688 const size_t startline = m_evt_handler->m_curr->pos.line; // save
8689 add_flags(RNXT);
8690 _handle_annotations_before_start_mapblck(startline);
8691 m_evt_handler->begin_map_val_block();
8692 _handle_keyref(ref);
8693 _maybe_skip_whitespace_tokens();
8694 _set_indentation(startindent);
8695 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8696 }
8697 }
8698 else if(first == '!')
8699 {
8700 csubstr tag = _scan_tag();
8701 _c4dbgpf("usty[UNK]: val tag! {}", prs_(tag));
8702 // we need to buffer the tags, as there may be two
8703 // consecutive tags in here
8704 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8705 const size_t line = m_evt_handler->m_curr->pos.line;
8706 _add_annotation(&m_pending_tags, tag, indentation, line);
8707 }
8708 else
8709 {
8710 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
8711 startindent = m_evt_handler->m_curr->line_contents.indentation; // save
8712 const size_t startline = m_evt_handler->m_curr->pos.line; // save
8713 first = rem.str[0];
8714 ScannedScalar sc;
8715 _c4dbgpf("usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
8716 if(first == '\'')
8717 {
8718 _c4dbgp("usty[UNK]: scanning single-quoted scalar");
8719 sc = _scan_scalar_squot();
8720 if(!_maybe_scan_following_colon())
8721 {
8722 _c4dbgp("usty[UNK]: set as val");
8723 _handle_annotations_before_blck_val_scalar();
8724 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8725 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8726 _end_stream();
8727 }
8728 else
8729 {
8730 _c4dbgp("usty[UNK]: start new block map, set scalar as key");
8731 add_flags(RNXT);
8732 _handle_annotations_before_start_mapblck(startline);
8733 m_evt_handler->begin_map_val_block();
8734 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8735 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8736 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8737 _set_indentation(startindent);
8738 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8739 _maybe_skip_whitespace_tokens();
8740 }
8741 }
8742 else if(first == '"')
8743 {
8744 _c4dbgp("usty[UNK]: scanning double-quoted scalar");
8745 sc = _scan_scalar_dquot();
8746 if(!_maybe_scan_following_colon())
8747 {
8748 _c4dbgp("usty[UNK]: set as val");
8749 _handle_annotations_before_blck_val_scalar();
8750 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8751 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8752 _end_stream();
8753 }
8754 else
8755 {
8756 _c4dbgp("usty[UNK]: start new block map, set double-quoted scalar as key");
8757 add_flags(RNXT);
8758 _handle_annotations_before_start_mapblck(startline);
8759 m_evt_handler->begin_map_val_block();
8760 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8761 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8762 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8763 _set_indentation(startindent);
8764 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8765 _maybe_skip_whitespace_tokens();
8766 }
8767 }
8768 else if(first == '|')
8769 {
8770 _c4dbgp("usty[UNK]: scanning block-literal scalar");
8771 ScannedBlock sb;
8772 _scan_block(&sb, startindent);
8773 _c4dbgp("usty[UNK]: set as val");
8774 _handle_annotations_before_blck_val_scalar();
8775 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8776 m_evt_handler->set_val_scalar_literal(maybe_filtered);
8777 _end_stream();
8778 }
8779 else if(first == '>')
8780 {
8781 _c4dbgp("usty[UNK]: scanning block-folded scalar");
8782 ScannedBlock sb;
8783 _scan_block(&sb, startindent);
8784 _c4dbgp("usty[UNK]: set as val");
8785 _handle_annotations_before_blck_val_scalar();
8786 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8787 m_evt_handler->set_val_scalar_folded(maybe_filtered);
8788 _end_stream();
8789 }
8790 else if(_scan_scalar_plain_unk(&sc))
8791 {
8792 _c4dbgp("usty[UNK]: got a plain scalar");
8793 if(!_maybe_scan_following_colon())
8794 {
8795 _c4dbgp("usty[UNK]: set as val");
8796 _handle_annotations_before_blck_val_scalar();
8797 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8798 m_evt_handler->set_val_scalar_plain(maybe_filtered);
8799 _end_stream();
8800 }
8801 else
8802 {
8803 _c4dbgp("usty[UNK]: start new block map, set scalar as key");
8804 add_flags(RNXT);
8805 _handle_annotations_before_start_mapblck(startline);
8806 m_evt_handler->begin_map_val_block();
8807 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8808 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8809 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8810 _set_indentation(startindent);
8811 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8812 _maybe_skip_whitespace_tokens();
8813 }
8814 }
8815 else
8816 {
8817 _c4err("parse error"); // LCOV_EXCL_LINE
8818 }
8819 }
8820 }
8821}
8822
8823
8824//-----------------------------------------------------------------------------
8825
8826template<class EventHandler>
8828{
8829 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8830 RYML_SAVE_TEST_JSON_(filename, src);
8831 m_evt_handler->start_parse(filename.str, src);
8832 m_evt_handler->begin_stream();
8833 _reset();
8834 while( ! _finished_file())
8835 {
8836 _scan_line();
8837 while( ! _finished_line())
8838 {
8840 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8841 if(has_any(RSEQ))
8842 {
8843 _handle_seq_json();
8844 }
8845 else if(has_any(RMAP))
8846 {
8847 _handle_map_json();
8848 }
8849 else if(has_any(RUNK))
8850 {
8851 _handle_unk_json();
8852 }
8853 else
8854 {
8855 _c4err("internal error"); // LCOV_EXCL_LINE
8856 }
8857 }
8858 if(_finished_file())
8859 break; // it may have finished because of multiline blocks
8860 _line_ended();
8861 }
8862 _end_stream();
8863 m_evt_handler->finish_parse();
8864}
8865
8866
8867//-----------------------------------------------------------------------------
8868
8869template<class EventHandler>
8871{
8872 RYML_ASSERT_BASIC_CB_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8873 RYML_SAVE_TEST_YAML_(filename, src);
8874 m_evt_handler->start_parse(filename.str, src);
8875 m_evt_handler->begin_stream();
8876 _reset();
8877 while( ! _finished_file())
8878 {
8879 _scan_line();
8880 while( ! _finished_line())
8881 {
8883 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8884 if(has_any(RFLOW))
8885 {
8886 if(has_none(RSEQIMAP))
8887 {
8888 if(has_any(RSEQ))
8889 {
8890 _handle_seq_flow();
8891 }
8892 else
8893 {
8894 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
8895 _handle_map_flow();
8896 }
8897 }
8898 else
8899 {
8900 _handle_seq_imap();
8901 }
8902 }
8903 else if(has_any(RBLCK))
8904 {
8905 if(has_any(RSEQ))
8906 {
8907 _handle_seq_block();
8908 }
8909 else
8910 {
8911 RYML_ASSERT_PARSE_CB_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
8912 _handle_map_block();
8913 }
8914 }
8915 else if(has_any(RUNK))
8916 {
8917 _handle_unk();
8918 }
8919 else if(has_any(USTY))
8920 {
8921 _handle_usty();
8922 }
8923 else
8924 {
8925 _c4err("internal error"); // LCOV_EXCL_LINE
8926 }
8927 }
8928 if(_finished_file())
8929 break; // it may have finished because of multiline blocks
8930 _line_ended();
8931 }
8932 _end_stream();
8933 m_evt_handler->finish_parse();
8934}
8935/** @endcond */
8936
8937} // namespace yml
8938} // namespace c4
8939
8940// NOLINTEND(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered,modernize-avoid-c-style-cast)
8941
8942#undef _c4dbgnextline
8943#undef _c4assert
8944#undef _c4err
8945
8946C4_SUPPRESS_WARNING_MSVC_POP
8947C4_SUPPRESS_WARNING_GCC_CLANG_POP
8948
8949#endif // C4_YML_PARSE_ENGINE_DEF_HPP_
Lightweight generic type-safe wrappers for converting individual values to/from strings.
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
ParseEngine(EventHandler *evt_handler, ParserOptions const &opts={})
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&) noexcept
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
Definition common.hpp:39
#define RYML_NOEXCEPT
Conditionally expands to noexcept when RYML_USE_ASSERT is 0 and is empty otherwise.
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
void err_parse(ErrorDataParse const &errdata, const char *msg)
trigger a parse error to its respective handler, with a non-formatted error message.
Definition common.cpp:210
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
Definition charconv.hpp:902
basic_substring< char > substr
a mutable string view
Definition substr.hpp:2355
basic_substring< const char > csubstr
an immutable string view
Definition substr.hpp:2356
bool is_valid_tag_handle(csubstr handle)
Definition tag.cpp:210
bool is_custom_tag(csubstr tag)
is a tag of the form !handle!tag?
Definition tag.cpp:9
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
Definition utf.cpp:42
@ npos
a null string position
Definition common.hpp:138
int ParserFlag_t
data type for ParserState_e
@ RTOP
reading at top level
@ RSET
the (implicit) map being read is a !!set.
@ RSEQ
reading a seq
@ RNXT
read next sibling
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ RDOC
reading a document
@ QSCL
stored scalar was quoted
@ RBLCK
reading in block mode
@ RMAP
reading a map
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a val
@ RFLOW
reading is inside explicit flow chars: [] or {}
size_t adjust_pos_with_escapes(csubstr scalar, size_t pos, bool keep_newlines=false)
Adjust a position in a scalar, increasing it to account for any escaped characters.
size_t to_chars(substr buf, escaped_scalar e)
formatting implementation to escape a scalar with escape_scalar()
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition common.hpp:124
@ UTF16BE
UTF16, Big-Endian.
Definition common.hpp:146
@ UTF8
UTF8.
Definition common.hpp:144
@ UTF16LE
UTF16, Little-Endian.
Definition common.hpp:145
@ NOBOM
No Byte Order Mark was found.
Definition common.hpp:143
@ UTF32BE
UTF32, Big-Endian.
Definition common.hpp:148
@ UTF32LE
UTF32, Little-Endian.
Definition common.hpp:147
enum c4::yml::Encoding_ Encoding_e
csubstr version()
Definition version.cpp:6
@ NONE
an index to none
Definition common.hpp:131
#define _c4dbgnextline()
#define RYML_SAVE_TEST_YAML_(filename, src)
#define RYML_SAVE_TEST_JSON_(filename, src)
#define _ryml_relocate(s)
#define _c4err(...)
#define _c4assert(...)
#define RYML_WITH_TAB_TOKENS_(...)
#define RYML_WITH_OR_WITHOUT_TAB_TOKENS_(with, without)
basic_substring range(size_t first, size_t last=npos) const noexcept
return [first,last[.
Definition substr.hpp:519
size_t first_not_of(const C c) const
Definition substr.hpp:993
basic_substring triml(const C c) const
trim left
Definition substr.hpp:629
size_t first_of(const C c, size_t start=0) const
Definition substr.hpp:934
basic_substring first(size_t num) const noexcept
return the first num elements: [0,num[
Definition substr.hpp:529
basic_substring sub(size_t first) const noexcept
return [first,len[
Definition substr.hpp:502
basic_substring trimr(const C c) const
trim the character c from the right
Definition substr.hpp:653
C * str
a restricted pointer to the first character of the substring
Definition substr.hpp:216
Data for a parse error.
Definition common.hpp:269
Filters an input string into a different output string.
Result for filtering a scalar which not fit in the intended memory.
Result for filtering a scalar which not fit in the intended memory.
Helper to control the line contents while parsing a buffer.
void reset_with_next_line(substr buf, size_t start) RYML_NOEXCEPT
holds a source or yaml file position, for example when an error is detected; See also location_format...
Definition common.hpp:229
csubstr name
name of the file
Definition common.hpp:233
Options to give to the ParseEngine to control its behavior.
Accelerator structure to reduce memory requirements by enabling reuse of resolved tags.
Definition tag.hpp:71
formatting helper to escape a scalar with escape_scalar_fn()
utilities for UTF and Byte Order Mark