rapidyaml 0.14.0
parse and emit YAML, and do it fast
Loading...
Searching...
No Matches
parse_engine.def.hpp
Go to the documentation of this file.
1#ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2#define _C4_YML_PARSE_ENGINE_DEF_HPP_
3
4#ifndef _C4_YML_PARSE_ENGINE_HPP_
6#endif
7#ifndef _C4_CHARCONV_HPP_
8#include "c4/charconv.hpp"
9#endif
10#ifndef C4_UTF_HPP_
11#include "c4/utf.hpp"
12#endif
13#ifndef _C4_YML_FILTER_PROCESSOR_HPP_
15#endif
16#ifndef _C4_YML_TAG_HPP_
17#include "c4/yml/tag.hpp"
18#endif
19#ifndef _C4_YML_NODE_TYPE_HPP_
20#include "c4/yml/node_type.hpp"
21#endif
22
23#ifndef _C4_YML_DETAIL_DBGPRINT_HPP_
24#include "c4/yml/detail/dbgprint.hpp"
25#endif
26
27#ifdef RYML_DBG
28#ifndef C4_DUMP_HPP_
29#include <c4/dump.hpp>
30#endif
31#define _c4err(...) \
32 do { RYML_DEBUG_BREAK(); this->_err(RYML_LOC_HERE(), __VA_ARGS__); } while(0)
33#else
34#define _c4err(...) \
35 this->_err(RYML_LOC_HERE(), __VA_ARGS__)
36#endif
37#define _c4assert(...) \
38 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, __VA_ARGS__, m_evt_handler->m_curr->pos)
39
40
41#if defined(RYML_WITH_TAB_TOKENS)
42#define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
43#define _RYML_WITHOUT_TAB_TOKENS(...)
44#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
45#else
46#define _RYML_WITH_TAB_TOKENS(...)
47#define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
48#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
49#endif
50
51// helper to export cases to the YAML test suite
52#ifndef RYML_SAVE_TEST_YAML
53#define _RYML_SAVE_TEST_YAML(filename, src)
54#define _RYML_SAVE_TEST_JSON(filename, src)
55#else
56#define _RYML_SAVE_TEST_YAML(filename, src) c4::yml::ryml_save_test_yaml(filename, src)
57#define _RYML_SAVE_TEST_JSON(filename, src) c4::yml::ryml_save_test_json(filename, src)
58namespace c4 {
59namespace yml {
60void ryml_save_test_yaml(csubstr filename, csubstr src);
61void ryml_save_test_json(csubstr filename, csubstr src);
62} // namespace yml
63} // namespace c4
64#endif
65
66
67// scaffold:
68#define _c4dbgnextline() \
69 do { \
70 _c4dbgq("\n-----------"); \
71 _c4dbgt("handling line={}, offset={}B", \
72 m_evt_handler->m_curr->pos.line, \
73 m_evt_handler->m_curr->pos.offset); \
74 } while(0)
75
76
77C4_SUPPRESS_WARNING_MSVC_PUSH
78C4_SUPPRESS_WARNING_MSVC(4296) // expression is always 'boolean_value'
79C4_SUPPRESS_WARNING_MSVC(4702) // unreachable code
80C4_SUPPRESS_WARNING_GCC_CLANG_PUSH
81C4_SUPPRESS_WARNING_GCC_CLANG("-Wtype-limits") // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
82C4_SUPPRESS_WARNING_GCC_CLANG("-Wformat-nonliteral")
83C4_SUPPRESS_WARNING_GCC_CLANG("-Wold-style-cast")
84#if defined(__GNUC__) && (__GNUC__ >= 6)
85C4_SUPPRESS_WARNING_GCC("-Wnull-dereference")
86#endif
87#if defined(__GNUC__) && (__GNUC__ >= 7)
88C4_SUPPRESS_WARNING_GCC("-Wduplicated-branches")
89#endif
90
91// NOLINTBEGIN(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered,modernize-avoid-c-style-cast)
92
93namespace c4 {
94namespace yml {
95
96namespace { // NOLINT
97
98C4_HOT C4_ALWAYS_INLINE void _set_first(substr &C4_RESTRICT subject, size_t pos) noexcept
99{
100 // avoids reassigning the ptr in substr
101 subject.len = pos != npos ? pos : subject.len;
102}
103C4_HOT C4_ALWAYS_INLINE void _set_first(csubstr &C4_RESTRICT subject, size_t pos) noexcept
104{
105 // avoids reassigning the ptr in substr
106 subject.len = pos != npos ? pos : subject.len;
107}
108C4_HOT C4_ALWAYS_INLINE void _set_first_strict(substr &C4_RESTRICT subject, size_t pos) RYML_NOEXCEPT
109{
110 // avoids reassigning the ptr in substr
111 _RYML_ASSERT_BASIC(pos != npos); // LCOV_EXCL_LINE
112 subject.len = pos;
113}
114C4_HOT C4_ALWAYS_INLINE void _set_first_strict(csubstr &C4_RESTRICT subject, size_t pos) RYML_NOEXCEPT
115{
116 // avoids reassigning the ptr in substr
117 _RYML_ASSERT_BASIC(pos != npos); // LCOV_EXCL_LINE
118 subject.len = pos;
119}
120
121C4_HOT C4_ALWAYS_INLINE bool _is_blck_token(csubstr s) RYML_NOEXCEPT
122{
123 _RYML_ASSERT_BASIC(s.len > 0);
124 _RYML_ASSERT_BASIC(s.str[0] == '-' || s.str[0] == ':' || s.str[0] == '?');
125 return ((s.len == 1) || ((s.str[1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[1] == '\t'))));
126}
127
128C4_HOT C4_ALWAYS_INLINE bool _is_blck_seq_token_maybe(csubstr const& C4_RESTRICT s) noexcept
129{
130 return ((s.len >= 1) && (s.str[0] == '-') && ((s.len == 1) || ((s.str[1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[1] == '\t')))));
131}
132
133inline bool _is_doc_begin_token(csubstr s) RYML_NOEXCEPT
134{
135 _RYML_ASSERT_BASIC(s.begins_with('-'));
136 _RYML_ASSERT_BASIC(!s.ends_with("\n"));
137 _RYML_ASSERT_BASIC(!s.ends_with("\r"));
138 return (s.len >= 3 && s.str[1] == '-' && s.str[2] == '-')
139 && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
140}
141
142inline bool _is_doc_end_token(csubstr s) RYML_NOEXCEPT
143{
144 _RYML_ASSERT_BASIC(s.begins_with('.'));
145 _RYML_ASSERT_BASIC(!s.ends_with("\n"));
146 _RYML_ASSERT_BASIC(!s.ends_with("\r"));
147 return (s.len >= 3 && s.str[1] == '.' && s.str[2] == '.')
148 && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
149}
150
151inline bool _is_doc_token(csubstr s) noexcept
152{
153 if(s.len >= 3)
154 {
155 switch(s.str[0])
156 {
157 case '-':
158 //return _is_doc_begin_token(s); // this was failing with gcc -O2
159 return (s.str[1] == '-' && s.str[2] == '-')
160 && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
161 case '.':
162 //return _is_doc_end_token(s); // this was failing with gcc -O2
163 return (s.str[1] == '.' && s.str[2] == '.')
164 && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
165 }
166 }
167 return false;
168}
169
170inline size_t _begins_with_special_json_scalar(csubstr s) RYML_NOEXCEPT
171{
172 _RYML_ASSERT_BASIC(s.len);
173 switch(s.str[0])
174 {
175 case 'f':
176 return s.begins_with("false") ? 5u : 0u;
177 case 't':
178 return s.begins_with("true") ? 4u : 0u;
179 case 'n':
180 return s.begins_with("null") ? 4u : 0u;
181 }
182 return 0u;
183}
184
185
186//-----------------------------------------------------------------------------
187
188C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following)
189{
190 return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n');
191}
192
193//! look for the next newline chars, and jump to the right of those
194inline substr _from_next_line(substr rem)
195{
196 size_t nlpos = rem.first_of("\r\n");
197 if(nlpos == csubstr::npos)
198 return {};
199 const char nl = rem[nlpos];
200 rem = rem.right_of(nlpos);
201 if(rem.empty())
202 return {};
203 if(_extend_from_combined_newline(nl, rem.front()))
204 rem = rem.sub(1);
205 return rem;
206}
207
208
209//-----------------------------------------------------------------------------
210
211inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i)
212{
213 _RYML_ASSERT_BASIC(r[*i] == '\n');
214 size_t numnl_following = 0;
215 ++(*i);
216 for( ; *i < r.len; ++(*i))
217 {
218 if(r.str[*i] == '\n')
219 ++numnl_following;
220 // skip leading whitespace
221 else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
222 ;
223 else
224 break;
225 }
226 return numnl_following;
227}
228
229/** @p i is set to the first non whitespace character after the line
230 * @return the number of empty lines after the initial position */
231inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation)
232{
233 _RYML_ASSERT_BASIC(r[*i] == '\n');
234 size_t numnl_following = 0;
235 ++(*i);
236 if(indentation == 0)
237 {
238 for( ; *i < r.len; ++(*i))
239 {
240 const char c = r.str[*i];
241 if(c == '\n')
242 ++numnl_following;
243 // skip leading whitespace
244 else if(c != ' ' && c != '\t' && c != '\r')
245 break;
246 }
247 }
248 else
249 {
250 for( ; *i < r.len; ++(*i))
251 {
252 char c = r.str[*i];
253 if(c == '\n')
254 {
255 ++numnl_following;
256 // skip the indentation after the newline
257 size_t stop = *i + indentation;
258 for( ; *i < r.len; ++(*i))
259 {
260 c = r.str[*i];
261 if(c != ' ' && c != '\r')
262 break;
263 _RYML_ASSERT_BASIC(*i < stop); // LCOV_EXCL_LINE
264 }
265 C4_UNUSED(stop);
266 }
267 // skip leading whitespace
268 else if(c != ' ' && c != '\t' && c != '\r')
269 {
270 break;
271 }
272 }
273 }
274 return numnl_following;
275}
276
277} // anon namespace
278
279
280//-----------------------------------------------------------------------------
281//-----------------------------------------------------------------------------
282//-----------------------------------------------------------------------------
283
284template<class EventHandler>
286{
287 _free();
288 _clr();
289}
290
291template<class EventHandler>
293 : m_options(opts)
294 , m_evt_handler(evt_handler)
295 , m_pending_anchors()
296 , m_pending_tags()
297 , m_has_directives_yaml(false)
298 , m_has_directives(false)
299 , m_doc_empty(true)
300 , m_prev_colon(npos)
301 , m_prev_val_end(npos)
302 , m_encoding(NOBOM)
303 , m_newline_offsets()
304 , m_newline_offsets_size(0)
305 , m_newline_offsets_capacity(0)
306{
307 _RYML_CHECK_BASIC(evt_handler);
308}
309
310template<class EventHandler>
312 : m_options(that.m_options)
313 , m_evt_handler(that.m_evt_handler)
314 , m_pending_anchors(that.m_pending_anchors)
315 , m_pending_tags(that.m_pending_tags)
316 , m_has_directives_yaml(that.m_has_directives_yaml)
317 , m_has_directives(that.m_has_directives)
318 , m_doc_empty(that.m_doc_empty)
319 , m_prev_colon(npos)
320 , m_prev_val_end(npos)
321 , m_encoding(NOBOM)
322 , m_newline_offsets(that.m_newline_offsets)
323 , m_newline_offsets_size(that.m_newline_offsets_size)
324 , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
325{
326 that._clr();
327}
328
329template<class EventHandler>
331 : m_options(that.m_options)
332 , m_evt_handler(that.m_evt_handler)
333 , m_pending_anchors(that.m_pending_anchors)
334 , m_pending_tags(that.m_pending_tags)
335 , m_has_directives_yaml(that.m_has_directives_yaml)
336 , m_has_directives(that.m_has_directives)
337 , m_doc_empty(that.m_doc_empty)
338 , m_prev_colon(npos)
339 , m_prev_val_end(npos)
340 , m_encoding(NOBOM)
341 , m_newline_offsets()
342 , m_newline_offsets_size()
343 , m_newline_offsets_capacity()
344{
345 if(that.m_newline_offsets_capacity)
346 {
347 _resize_locations(that.m_newline_offsets_capacity);
348 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
349 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
350 m_newline_offsets_size = that.m_newline_offsets_size;
351 }
352}
353
354template<class EventHandler>
356{
357 _free();
358 m_options = (that.m_options);
359 m_evt_handler = that.m_evt_handler;
360 m_pending_anchors = that.m_pending_anchors;
361 m_pending_tags = that.m_pending_tags;
362 m_has_directives_yaml = that.m_has_directives_yaml;
363 m_has_directives = that.m_has_directives;
364 m_doc_empty = that.m_doc_empty;
365 m_prev_colon = that.m_prev_colon;
366 m_prev_val_end = that.m_prev_val_end;
367 m_encoding = that.m_encoding;
368 m_newline_offsets = (that.m_newline_offsets);
369 m_newline_offsets_size = (that.m_newline_offsets_size);
370 m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
371 that._clr();
372 return *this;
373}
374
375template<class EventHandler>
377{
378 if(&that != this)
379 {
380 _free();
381 m_options = (that.m_options);
382 m_evt_handler = that.m_evt_handler;
383 m_pending_anchors = that.m_pending_anchors;
384 m_pending_tags = that.m_pending_tags;
385 m_has_directives_yaml = that.m_has_directives_yaml;
386 m_has_directives = that.m_has_directives;
387 m_doc_empty = that.m_doc_empty;
388 m_prev_colon = that.m_prev_colon;
389 m_prev_val_end = that.m_prev_val_end;
390 m_encoding = that.m_encoding;
391 if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
392 _resize_locations(that.m_newline_offsets_capacity);
393 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
394 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
395 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
396 m_newline_offsets_size = that.m_newline_offsets_size;
397 }
398 return *this;
399}
400
401template<class EventHandler>
402void ParseEngine<EventHandler>::_clr()
403{
404 m_options = {};
405 m_evt_handler = {};
406 m_pending_anchors = {};
407 m_pending_tags = {};
408 m_has_directives_yaml = false;
409 m_has_directives = false;
410 m_doc_empty = true;
411 m_prev_colon = npos;
412 m_prev_val_end = npos;
413 m_encoding = NOBOM;
414 m_newline_offsets = {};
415 m_newline_offsets_size = {};
416 m_newline_offsets_capacity = {};
417}
418
419template<class EventHandler>
420void ParseEngine<EventHandler>::_free()
421{
422 if(m_newline_offsets)
423 {
424 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
425 m_newline_offsets = nullptr;
426 m_newline_offsets_size = 0u;
427 m_newline_offsets_capacity = 0u;
428 }
429}
430
431
432//-----------------------------------------------------------------------------
433
434template<class EventHandler>
435void ParseEngine<EventHandler>::_reset()
436{
437 m_pending_anchors = {};
438 m_pending_tags = {};
439 m_has_directives_yaml = false;
440 m_has_directives = false;
441 m_doc_empty = true;
442 m_prev_colon = npos;
443 m_prev_val_end = npos;
444 m_bom_len = 0;
445 m_encoding = NOBOM;
446 m_bom_line = 0;
447 if(m_options.locations())
448 {
449 _prepare_locations();
450 }
451}
452
453
454//-----------------------------------------------------------------------------
455
456template<class EventHandler>
457void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena, substr *other)
458{
459 _c4dbgp("relocate to new arena");
460 const char *pb = prev_arena.str;
461 const char *pe = prev_arena.str + prev_arena.len;
462 #define _ryml_relocate(s) \
463 if((s).str >= pb && (s).str <= pe) \
464 { \
465 (s).str = next_arena.str + ((s).str - pb); \
466 } \
467 ((void)0)
468 for(ParserState &st : m_evt_handler->m_stack)
469 {
470 _ryml_relocate(st.line_contents.rem);
471 _ryml_relocate(st.line_contents.full);
472 }
473 _ryml_relocate(m_evt_handler->m_src);
474 for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
475 {
476 _ryml_relocate(m_pending_tags.annotations[i].str); // LCOV_EXCL_LINE
477 _ryml_relocate(m_pending_tags.annotations[i].orig); // LCOV_EXCL_LINE
478 }
479 for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
480 {
481 _ryml_relocate(m_pending_anchors.annotations[i].str);
482 _ryml_relocate(m_pending_anchors.annotations[i].orig);
483 }
484 {
485 TagDirectives &tds = m_evt_handler->tag_directives();
486 for(size_t i = 0, sz = tds.size(); i < sz; ++i)
487 {
488 _ryml_relocate(tds.m_directives[i].handle);
489 _ryml_relocate(tds.m_directives[i].prefix);
490 }
491 }
492 {
493 TagCache &tch = m_evt_handler->tag_cache();
494 for(id_type i = 0, sz = tch.m_entries.size(); i < sz; ++i)
495 {
496 _ryml_relocate(tch.m_entries[i].tag);
497 _ryml_relocate(tch.m_entries[i].resolved);
498 }
499 }
500 if(other)
501 {
502 _ryml_relocate(*other);
503 }
504 #undef _ryml_relocate
505}
506
507/** @cond dev */
508template<class EventHandler>
510{
511 csubstr prev = m_evt_handler->arena();
512 substr out = m_evt_handler->alloc_arena(len);
513 substr curr = m_evt_handler->arena();
514 if(curr.str != prev.str)
515 _relocate_arena(prev, curr, other);
516 return out;
517}
518/** @endcond */
519
520
521//-----------------------------------------------------------------------------
522
523#ifdef RYML_DBG
524template<class EventHandler>
525template<class DumpFn>
526C4_NO_INLINE void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn) const
527{
528 ParserState const *const C4_RESTRICT st = m_evt_handler->m_curr;
529 LineContents const& C4_RESTRICT lc = st->line_contents;
530 csubstr contents = lc.full.first(lc.num_cols);
531 if(contents.len)
532 {
533 // print the yaml src line
534 size_t offs = 3u + to_chars(substr{}, st->pos.line) + to_chars(substr{}, st->pos.col);
535 csubstr m_file = m_evt_handler->m_curr->pos.name;
536 if(m_file.len)
537 {
538 _dbg_dump(std::forward<DumpFn>(dumpfn), "{}:", m_file);
539 offs += m_file.len + 1;
540 }
541 _dbg_dump(std::forward<DumpFn>(dumpfn), "{}:{}: ", st->pos.line, st->pos.col);
542 csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
543 csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("..."));
544 _dbg_dump(std::forward<DumpFn>(dumpfn), "{}{} (size={})\n", escaped_scalar(maybe_full_content, /*escape*/true), maybe_ellipsis, contents.len);
545 // highlight the remaining portion of the previous line
546 size_t firstcol = (size_t)(lc.rem.str - lc.full.str);
547 size_t lastcol = firstcol + lc.rem.len;
548 size_t firstcol_adj = adjust_pos_with_escapes(lc.full, firstcol);
549 size_t len = adjust_pos_with_escapes(lc.rem, lc.rem.len);
550 for(size_t i = 0; i < offs + firstcol_adj; ++i)
551 std::forward<DumpFn>(dumpfn)(" ");
552 std::forward<DumpFn>(dumpfn)("^");
553 for(size_t i = 1, e = (len < 80u ? len : 80u); i < e; ++i)
554 std::forward<DumpFn>(dumpfn)("~");
555 _dbg_dump(std::forward<DumpFn>(dumpfn), "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
556 }
557 else
558 {
559 std::forward<DumpFn>(dumpfn)("\n");
560 }
561 // next line: print the state flags
562 {
563 char flagbuf_[128];
564 _dbg_dump(std::forward<DumpFn>(dumpfn), "top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
565 }
566}
567
568template<class EventHandler>
570{
571 if(_dbg_enabled())
572 {
573 for(ParserState const& s : m_evt_handler->m_stack)
574 _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(buf, s.flags));
575 }
576}
577
578template<class EventHandler>
580{
581 char buf[128];
582 _print_state_stack(buf);
583}
584#endif
585
586
587//-----------------------------------------------------------------------------
588
589template<class EventHandler>
590template<class ...Args>
591C4_NORETURN C4_NO_INLINE void ParseEngine<EventHandler>::_err(Location const& cpploc, Location const& ymlloc, const char* fmt, Args const& ...args) const
592{
593 m_evt_handler->cancel_parse();
594 err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, ymlloc}, fmt, args...);
595}
596
597template<class EventHandler>
598template<class ...Args>
599C4_NORETURN C4_NO_INLINE void ParseEngine<EventHandler>::_err(Location const& cpploc, const char *fmt, Args const& ...args) const
600{
601 m_evt_handler->cancel_parse();
602 err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, m_evt_handler->m_curr->pos}, fmt, args...);
603}
604
605
606//-----------------------------------------------------------------------------
607#ifdef RYML_DBG
608template<class EventHandler>
609template<class ...Args>
610void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args const& ...args) const
611{
612 if(_dbg_enabled())
613 {
614 _dbg_printf(fmt, args...);
615 _dbg_dumper("\n");
616 _fmt_msg(_dbg_dumper);
617 }
618}
619#endif
620
621
622//-----------------------------------------------------------------------------
623template<class EventHandler>
624bool ParseEngine<EventHandler>::_finished_file() const
625{
626 bool ret = m_evt_handler->m_curr->pos.offset >= _buf().len;
627 if(ret)
628 {
629 _c4dbgp("finished file!!!");
630 }
631 return ret;
632}
633
634template<class EventHandler>
635C4_HOT C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_finished_line() const // LCOV_EXCL_LINE
636{
637 return m_evt_handler->m_curr->line_contents.rem.empty();
638}
639
640
641//-----------------------------------------------------------------------------
642
643template<class EventHandler>
644void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
645{
646 if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' _RYML_WITH_TAB_TOKENS(|| m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')))
647 {
648 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
649 if(pos == npos)
650 pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just all whitespace
651 _c4dbgpf("skip {} whitespace characters", pos);
652 _line_progressed(pos);
653 }
654}
655
656template<class EventHandler>
657void ParseEngine<EventHandler>::_maybe_skipchars(char c)
658{
659 if(m_evt_handler->m_curr->line_contents.rem.len && m_evt_handler->m_curr->line_contents.rem.str[0] == c)
660 {
661 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(c);
662 if(pos == npos)
663 pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just all c
664 _c4dbgpf("skip {}x'{}'", pos, _c4prc(c));
665 _line_progressed(pos);
666 }
667}
668
669template<class EventHandler>
670template<size_t N>
671void ParseEngine<EventHandler>::_skipchars(const char (&chars)[N])
672{
673 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars), m_evt_handler->m_curr->pos);
674 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
675 if(pos == npos)
676 pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just whitespace
677 _c4dbgpf("skip {} characters", pos);
678 _line_progressed(pos);
679}
680
681template<class EventHandler>
682void ParseEngine<EventHandler>::_skip_comment()
683{
684 LineContents const& C4_RESTRICT lc = m_evt_handler->m_curr->line_contents;
685 const size_t col = m_evt_handler->m_curr->pos.col - 1u;
686 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, lc.rem.begins_with('#'), m_evt_handler->m_curr->pos);
687 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, lc.rem.is_sub(lc.full), m_evt_handler->m_curr->pos);
688 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col >= 1, m_evt_handler->m_curr->pos); // 1-based
689 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, col == ((size_t)(lc.rem.str - lc.full.str)), m_evt_handler->m_curr->pos);
690 // raise an error if the comment is not preceded by whitespace
691 if(lc.rem.str != lc.full.str) // not at line beginning
692 {
693 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, col > 0, m_evt_handler->m_curr->pos);
694 const char prev = lc.full.str[col - 1u];
695 if(C4_UNLIKELY(prev != ' ' && prev != '\t'))
696 _c4err("comment not preceded by whitespace");
697 }
698 _c4dbgpf("comment was '{}'", m_evt_handler->m_curr->line_contents.rem);
699 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
700}
701
702template<class EventHandler>
703void ParseEngine<EventHandler>::_maybe_skip_comment_strict()
704{
705 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
706 if(pos != npos)
707 {
708 if('#' == m_evt_handler->m_curr->line_contents.rem[pos])
709 {
710 _line_progressed(pos);
711 _skip_comment();
712 }
713 }
714}
715
716template<class EventHandler>
717void ParseEngine<EventHandler>::_maybe_skip_comment()
718{
719 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
720 if(pos != npos)
721 {
722 if('#' == m_evt_handler->m_curr->line_contents.rem[pos])
723 {
724 _line_progressed(pos);
725 _skip_comment();
726 }
727 }
728 else
729 {
730 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
731 }
732}
733
734template<class EventHandler>
735bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
736{
737 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
738 if(pos != npos)
739 {
740 if(':' == m_evt_handler->m_curr->line_contents.rem[pos])
741 {
742 // bump pos to skip the colon as well, and check the colon
743 // is followed by space or tab
744 if(++pos < m_evt_handler->m_curr->line_contents.rem.len)
745 {
746 const char next = m_evt_handler->m_curr->line_contents.rem.str[pos];
747 if(next == ' ' _RYML_WITH_TAB_TOKENS(|| next == '\t'))
748 ++pos;
749 else
750 return false;
751 }
752 _line_progressed(pos);
753 return true;
754 }
755 }
756 else
757 {
758 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
759 }
760 return false;
761}
762
763
764//-----------------------------------------------------------------------------
765
766template<class EventHandler>
767csubstr ParseEngine<EventHandler>::_scan_anchor()
768{
769 csubstr s = m_evt_handler->m_curr->line_contents.rem;
770 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with('&'), m_evt_handler->m_curr->pos);
771 csubstr anchor = s.range(1, s.first_of(" ,]}\t"));
772 _line_progressed(1u + anchor.len);
773 _maybe_skipchars(' ');
774 return anchor;
775}
776
777template<class EventHandler>
778csubstr ParseEngine<EventHandler>::_scan_ref_seq()
779{
780 csubstr s = m_evt_handler->m_curr->line_contents.rem;
781 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'), m_evt_handler->m_curr->pos);
782 _set_first(s, s.first_of(" ,]\t"));
783 _line_progressed(s.len);
784 return s;
785}
786
787template<class EventHandler>
788csubstr ParseEngine<EventHandler>::_scan_ref_map()
789{
790 csubstr s = m_evt_handler->m_curr->line_contents.rem;
791 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'), m_evt_handler->m_curr->pos);
792 _set_first(s, s.first_of(" ,}\t"));
793 _line_progressed(s.len);
794 return s;
795}
796
797template<class EventHandler>
798csubstr ParseEngine<EventHandler>::_scan_tag()
799{
800 csubstr t = m_evt_handler->m_curr->line_contents.rem;
801 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.begins_with('!'), m_evt_handler->m_curr->pos);
802 if(!t.begins_with("!<"))
803 {
804 _c4dbgp("begins with '!'");
805 _set_first(t, t.first_of(" ,]}\t"));
806 if(C4_UNLIKELY(t.first_of("[{") != npos))
807 _c4err("invalid tag");
808 _line_progressed(t.len);
809 if(m_options.resolve_tags_all() || (m_options.resolve_tags() && is_custom_tag(t)))
810 t = _resolve_tag(t);
811 }
812 else
813 {
814 _c4dbgp("begins with '!<'");
815 size_t pos = t.find('>');
816 if(C4_UNLIKELY(pos == npos))
817 _c4err("invalid tag");
818 _set_first_strict(t, pos+1);
819 _line_progressed(t.len);
820 t = t.sub(1);
821 }
822 _maybe_skip_whitespace_tokens();
823 return t;
824}
825
826template<class EventHandler>
827csubstr ParseEngine<EventHandler>::_scan_tag(csubstr *orig)
828{
829 csubstr t = m_evt_handler->m_curr->line_contents.rem;
830 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.begins_with('!'), m_evt_handler->m_curr->pos);
831 if(!t.begins_with("!<"))
832 {
833 _c4dbgp("begins with '!'");
834 _set_first(t, t.first_of(" ,\t"));
835 if(C4_UNLIKELY(t.first_of("[{") != npos))
836 _c4err("invalid tag");
837 _line_progressed(t.len);
838 *orig = t;
839 if(m_options.resolve_tags_all() || (m_options.resolve_tags() && is_custom_tag(t)))
840 t = _resolve_tag(t);
841 }
842 else
843 {
844 _c4dbgp("begins with '!<'");
845 size_t pos = t.find('>');
846 if(C4_UNLIKELY(pos == npos))
847 _c4err("invalid tag");
848 _set_first_strict(t, pos+1);
849 _line_progressed(t.len);
850 *orig = t;
851 t = t.sub(1);
852 }
853 _maybe_skip_whitespace_tokens();
854 return t;
855}
856
857
858//-----------------------------------------------------------------------------
859
860template<class EventHandler>
861bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_block_token(csubstr s)
862{
863 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
864 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(":-"), m_evt_handler->m_curr->pos);
865 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count('\n') == 0, m_evt_handler->m_curr->pos);
866 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count('\r') == 0, m_evt_handler->m_curr->pos);
867 if(s.len > 1)
868 {
869 switch(s.str[1])
870 {
871 case ' ':
872 case ',':
873 case '}':
874 case ']':
875 case '\t':
876 if(s.str[0] == ':')
877 {
878 _c4dbgpf("not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
879 return false;
880 }
881 else
882 {
883 _c4err("invalid scalar");
884 }
885 break;
886 case '{':
887 case '[':
888 _c4err("invalid token \":{}\"", _c4prc(s.str[1]));
889 break;
890 default:
891 break;
892 }
893 }
894 else
895 {
896 if(s.str[0] == '-')
897 _c4err("invalid scalar");
898 return false;
899 }
900 return true;
901}
902
903template<class EventHandler>
904bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_qmrk(csubstr s)
905{
906 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
907 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] == '?', m_evt_handler->m_curr->pos);
908 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count('\n') == 0, m_evt_handler->m_curr->pos);
909 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count('\r') == 0, m_evt_handler->m_curr->pos);
910 if(s.len > 1)
911 {
912 switch(s.str[1])
913 {
914 case ' ':
915 case '\t':
916 _c4dbgpf("not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
917 return false;
918 case '{':
919 case '}':
920 case '[':
921 case ']':
922 _c4err("invalid token \"?{}\"", _c4prc(s.str[1]));
923 break;
924 default:
925 break;
926 }
927 }
928 else
929 {
930 return false;
931 }
932 return true;
933}
934
935
936template<class EventHandler>
937bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
938{
939 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.empty(), m_evt_handler->m_curr->pos);
940 // it's not a scalar if it starts with any of these characters:
941 switch(s.str[0])
942 {
943 // these are all legal tokens which mean no scalar is starting:
944 case '[':
945 case ']':
946 case '{':
947 case '}':
948 case '&':
949 case '*':
950 case '!':
951 case '|':
952 case '>':
953 case '#':
954 case ',':
955 _c4dbgpf("not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
956 return false;
957 // '-' and ':' are illegal at the beginning if not followed by a scalar character
958 case '-':
959 case ':':
960 _c4dbgpf("suspicious token='{}' len={}", _c4prc(s.str[0]), s.len);
961 return _is_valid_start_scalar_plain_flow_check_block_token(s);
962 case '?':
963 _c4dbgpf("qmrk='{}' len={}", _c4prc(s.str[0]), s.len);
964 return _is_valid_start_scalar_plain_flow_check_qmrk(s);
965 // everything else is a legal starting character
966 default:
967 return true;
968 }
969}
970
971
972template<class EventHandler>
973bool ParseEngine<EventHandler>::_scan_scalar_plain_handle_newline(csubstr s, size_t offs)
974{
975 _c4dbgpf("newl[PLAIN]: found '\\n'. offs={} line={} sofar={}", offs, m_evt_handler->m_curr->pos.line, _prs(s.first(offs), true));
976 if(s.len > offs + 1)
977 {
978 _c4dbgp("newl[PLAIN]: buffer continues");
979 csubstr next_line = s.sub(offs + 1);
980 size_t next_line_indentation = next_line.first_not_of(' ');
981 if(next_line_indentation != npos)
982 {
983 _c4dbgpf("newl[PLAIN]: line={} indentation={} indref={}", m_evt_handler->m_curr->pos.line + 1, next_line_indentation, m_evt_handler->m_curr->indref);
984 next_line = next_line.first(next_line.first_of("\n\r"));
985 _c4dbgpf("newl[PLAIN]: has indentation. next_line={}", _prs(next_line));
986 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, next_line_indentation <= next_line.len, m_evt_handler->m_curr->pos);
987 if(C4_LIKELY(next_line_indentation >= m_evt_handler->m_curr->indref))
988 {
989 _c4dbgp("newl[PLAIN]: larger indentation");
990 next_line = next_line.sub(next_line_indentation);
991 }
992 else if(C4_UNLIKELY(next_line.len && next_line.triml(' ').len))
993 {
994 _c4dbgp("newl[PLAIN]: err, smaller indentation");
995 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
996 _line_ended();
997 _scan_line();
998 if(m_evt_handler->m_curr->line_contents.indentation != npos)
999 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
1000 _c4err("parse error"); // cannot reduce indentation here
1001 }
1002 _c4dbgpf("newl[PLAIN]: next_line.len={}", next_line.len);
1003 if(next_line.len)
1004 {
1005 size_t fno = next_line.first_not_of(" \t");
1006 if(fno != csubstr::npos)
1007 {
1008 _c4assert(fno < next_line.len);
1009 switch(next_line.str[fno])
1010 {
1011 case ',': case ']': case '#':
1012 _c4dbgpf("newl[PLAIN]: found terminating character beginning next line: '{}'", next_line.str[fno]);
1013 return false;
1014 case ':': // cannot be succeeded by whitespace
1015 _c4dbgp("newl[PLAIN]: found :");
1016 if(fno + 1 == next_line.len || _is_blck_token(next_line.sub(fno)))
1017 {
1018 _c4dbgpf("newl[PLAIN]: found terminating character beginning next line: '{}'", next_line.str[fno]);
1019 return false;
1020 }
1021 break;
1022 }
1023 }
1024 }
1025 }
1026 }
1027 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
1028 _line_ended();
1029 _scan_line();
1030 return true;
1031}
1032
1033template<class EventHandler>
1034bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
1035{
1036 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP), m_evt_handler->m_curr->pos);
1037 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1038 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ|RSEQIMAP), m_evt_handler->m_curr->pos);
1039 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1040 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL), m_evt_handler->m_curr->pos);
1041
1042 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(' '), m_evt_handler->m_curr->pos);
1043 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with('\n'), m_evt_handler->m_curr->pos);
1044
1045 if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1046 return false;
1047
1048 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset);
1049 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1050
1051 _c4dbgp("scanning seqflow scalar...");
1052
1053 bool needs_filter = false;
1054 size_t col = 0; // zero-based column
1055 size_t offs = 0; // offset
1056 for( ; offs < s.len; ++offs, ++col)
1057 {
1058 const char c = s.str[offs];
1059 switch(c)
1060 {
1061 case ',':
1062 case ']':
1063 _c4dbgpf("found terminating character at {}: '{}'", offs, c);
1064 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1065 goto ended_scalar;
1066 case '\n':
1067 _c4dbgpf("found '\\n' at col={}", col);
1068 if(!_scan_scalar_plain_handle_newline(s, offs))
1069 goto ended_scalar;
1070 col = (size_t)-1; // so that col is 0 in the next loop iteration
1071 needs_filter = true;
1072 break;
1073 case '\r':
1074 --col; // don't count \r when calling _line_progressed()
1075 needs_filter = true;
1076 break;
1077 case ':':
1078 _c4dbgp("found suspicious ':'");
1079 if(s.len > offs + 1)
1080 {
1081 char next = s.str[offs + 1];
1082 _c4dbgpf("next char is '{}'", _c4prc(next));
1083 if(next == '\r')
1084 {
1085 csubstr after = s.sub(offs + 1).triml('\r');
1086 if(after.len)
1087 {
1088 next = after.str[0];
1089 _c4dbgpf("skip \\r to '{}'", _c4prc(next));
1090 }
1091 }
1092 // no else here.
1093 if(next == ' ' _RYML_WITH_TAB_TOKENS(|| next == '\t') || next == ',' || next == '\n' || next == ']')
1094 {
1095 _c4dbgp("map starting!");
1096 goto ended_scalar;
1097 }
1098 else
1099 {
1100 _c4dbgp("':' nothing to see here");
1101 }
1102 }
1103 else
1104 {
1105 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len == offs + 1, m_evt_handler->m_curr->pos);
1106 _line_progressed(col);
1107 _c4err("missing termination: '{}'", c); // noreturn
1108 }
1109 break;
1110 case '#':
1111 {
1112 _c4dbgp("found suspicious '#'");
1113 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1114 char prev = s.str[offs - 1];
1115 if(prev == ' ' _RYML_WITH_TAB_TOKENS(|| prev == '\t'))
1116 {
1117 _c4dbgpf("found terminating character at {}: '{}'", offs, c);
1118 goto ended_scalar;
1119 }
1120 }
1121 break;
1122 case '[':
1123 case '{':
1124 case '}':
1125 _line_progressed(col); // advance to report the proper position in the error
1126 _c4err("invalid character: '{}'", c); // noreturn
1127 case '-':
1128 case '.':
1129 _c4dbgpf("doc token character: '{}', offs={}", c, offs);
1130 if(offs == 0 && m_evt_handler->m_curr->at_line_beginning())
1131 {
1132 _c4dbgp("at line beginning");
1133 if(s.len >= 3 && s.str[1] == c && s.str[2] == c)
1134 {
1135 _c4err("parse error"); // no return
1136 }
1137 }
1138 break;
1139 default:
1140 ;
1141 }
1142 }
1143
1144ended_scalar:
1145
1146 _line_progressed(col);
1147 _set_first(s, offs);
1148 sc->scalar = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
1149 sc->needs_filter = needs_filter;
1150
1151 _c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true);
1152
1153 return true;
1154}
1155
1156template<class EventHandler>
1157bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
1158{
1159 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ) || has_any(RSEQIMAP), m_evt_handler->m_curr->pos);
1160 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1161 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP|RSEQIMAP), m_evt_handler->m_curr->pos);
1162 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1163 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK), m_evt_handler->m_curr->pos);
1164
1165 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(' '), m_evt_handler->m_curr->pos);
1166 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with('\n'), m_evt_handler->m_curr->pos);
1167
1168 if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1169 return false;
1170
1171 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset);
1172 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1173
1174 _c4dbgp("scanning mapflow scalar...");
1175
1176 bool needs_filter = false;
1177 size_t col = 0; // zero-based column
1178 size_t offs = 0; // offset
1179 for( ; offs < s.len; ++offs, ++col)
1180 {
1181 const char c = s.str[offs];
1182 switch(c)
1183 {
1184 case ',':
1185 case '}':
1186 _c4dbgpf("found terminating character at {}: '{}'", offs, c);
1187 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1188 goto ended_scalar;
1189 case '\n':
1190 _c4dbgpf("found '\\n' at col={}", col);
1191 if(!_scan_scalar_plain_handle_newline(s, offs))
1192 goto ended_scalar;
1193 col = (size_t)-1; // so that col is 0 in the next loop iteration
1194 needs_filter = true;
1195 break;
1196 case '\r':
1197 --col; // don't count \r when calling _line_progressed()
1198 needs_filter = true;
1199 break;
1200 case ':':
1201 _c4dbgpf("found ':'", c);
1202 if(s.len == offs+1)
1203 break;
1204 {
1205 const char next = s.str[offs+1];
1206 _c4dbgpf("next='{}'", c);
1207 if(next == ' ' || next == ',' || next == '}' || next == '\n' || next == '\r' _RYML_WITH_TAB_TOKENS(|| next == '\t'))
1208 {
1209 _c4dbgpf("found terminating character: '{}'", c);
1210 goto ended_scalar;
1211 }
1212 }
1213 break;
1214 case '{':
1215 case '[':
1216 _line_progressed(col);
1217 _c4err("invalid character: '{}'", c); // noreturn
1218 break;
1219 case ']':
1220 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQIMAP), m_evt_handler->m_curr->pos);
1221 goto ended_scalar;
1222 default:
1223 ;
1224 }
1225 }
1226
1227ended_scalar:
1228
1229 _line_progressed(col);
1230 s = s.first(offs);
1231 sc->scalar = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
1232 sc->needs_filter = needs_filter;
1233
1234 _c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true);
1235
1236 return sc->scalar.len > 0u;
1237}
1238
1239template<class EventHandler>
1240bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1241{
1242 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP), m_evt_handler->m_curr->pos);
1243 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1244 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ), m_evt_handler->m_curr->pos);
1245 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1246
1247 substr s = m_evt_handler->m_curr->line_contents.rem;
1248 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '), m_evt_handler->m_curr->pos);
1249 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1250
1251 _c4dbgp("seq_json: scanning scalar...");
1252
1253 switch(s.str[0])
1254 {
1255 case ']':
1256 case '{':
1257 case ',':
1258 _c4dbgp("seq_json: not a scalar.");
1259 return false;
1260 }
1261
1262 {
1263 const size_t len = _begins_with_special_json_scalar(s);
1264 if(len)
1265 {
1266 char c = s.len > len ? s.str[len] : ',';
1267 if(c == ',' || c == ']' || c == ' ' || c == '\n' || c == '\t' || c == '\r')
1268 {
1269 sc->scalar = s.first(len);
1270 sc->needs_filter = false;
1271 _c4dbgpf("seq_json: special scalar: '{}'", sc->scalar);
1272 _line_progressed(len);
1273 return true;
1274 }
1275 else
1276 {
1277 return false;
1278 }
1279 }
1280 }
1281
1282 // must be a number or special scalar
1283 size_t i = 0;
1284 for( ; i < s.len; ++i)
1285 {
1286 const char c = s.str[i];
1287 switch(c)
1288 {
1289 case ',':
1290 case ']':
1291 case ' ':
1292 case '\t':
1293 _c4dbgpf("seq_json: found terminating character: '{}'", c);
1294 goto ended_scalar;
1295 default:
1296 ;
1297 }
1298 }
1299
1300ended_scalar:
1301
1302 if(C4_LIKELY(i > 0))
1303 {
1304 _line_progressed(i);
1305 sc->scalar = s.first(i);
1306 sc->needs_filter = false;
1307 _c4dbgpf("seq_json: scalar was {}", _prs(sc->scalar, /*escape*/true));
1308 }
1309
1310 return true;
1311}
1312
1313template<class EventHandler>
1314bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1315{
1316 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ), m_evt_handler->m_curr->pos);
1317 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1318 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP), m_evt_handler->m_curr->pos);
1319 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1320 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL), m_evt_handler->m_curr->pos);
1321
1322 substr s = m_evt_handler->m_curr->line_contents.rem;
1323 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '), m_evt_handler->m_curr->pos);
1324 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1325
1326 _c4dbgp("scanning scalar...");
1327
1328 {
1329 const size_t len = _begins_with_special_json_scalar(s);
1330 if(len)
1331 {
1332 char c = s.len > len ? s.str[len] : ',';
1333 _c4dbgpf("begins with special scalar: {} next='{}'", s.first(len), _c4prc(c));
1334 if(c == ',' || c == '}' || c == ' ' || c == '\n' || c == '\t' || c == '\r')
1335 {
1336 sc->scalar = s.first(len);
1337 sc->needs_filter = false;
1338 _c4dbgpf("special json scalar: '{}'", _prs(sc->scalar));
1339 _line_progressed(len);
1340 return true;
1341 }
1342 else
1343 {
1344 return false;
1345 }
1346 }
1347 }
1348
1349 // must be a number
1350 size_t i = 0;
1351 for( ; i < s.len; ++i)
1352 {
1353 const char c = s.str[i];
1354 switch(c)
1355 {
1356 case ',':
1357 case '}':
1358 case ' ':
1359 case '\t':
1360 _c4dbgpf("found terminating character: '{}'", c);
1361 goto ended_scalar;
1362 default:
1363 ;
1364 }
1365 }
1366
1367ended_scalar:
1368
1369 if(C4_LIKELY(i > 0))
1370 {
1371 _line_progressed(i);
1372 sc->scalar = s.first(i);
1373 sc->needs_filter = false;
1374 _c4dbgpf("scalar was {}", _prs(sc->scalar));
1375 return true;
1376 }
1377
1378 return false;
1379}
1380
1381template<class EventHandler>
1382bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1383{
1384 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] == '-', m_evt_handler->m_curr->pos);
1385 return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_begin_token(s));
1386}
1387
1388template<class EventHandler>
1389bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1390{
1391 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] == '.', m_evt_handler->m_curr->pos);
1392 return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_end_token(s));
1393}
1394
1395template<class EventHandler>
1396bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc, size_t indentation)
1397{
1398 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1399 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP), m_evt_handler->m_curr->pos);
1400 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK|RUNK|USTY), m_evt_handler->m_curr->pos);
1401
1402 substr s = m_evt_handler->m_curr->line_contents.rem;
1403 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '), m_evt_handler->m_curr->pos);
1404 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1405
1406 switch(s.str[0])
1407 {
1408 case '-':
1409 if(_is_blck_token(s))
1410 {
1411 return false;
1412 }
1413 else if(_is_doc_begin(s))
1414 {
1415 _c4dbgp("token is doc start");
1416 return false;
1417 }
1418 break;
1419 case ':':
1420 case '?':
1421 if(_is_blck_token(s))
1422 return false;
1423 break;
1424 case '[':
1425 case '{':
1426 case '&':
1427 case '*':
1428 case '!':
1429 case '\t':
1430 case ',':
1431 case '%':
1432 return false;
1433 case '.':
1434 if(_is_doc_end(s))
1435 {
1436 _c4dbgp("token is doc end");
1437 return false;
1438 }
1439 break;
1440 }
1441
1442 _c4dbgpf("plain scalar! indentation={}", indentation);
1443
1444 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1445 const size_t start_line = m_evt_handler->m_curr->pos.line;
1446
1447 bool needs_filter = false;
1448 while(true)
1449 {
1450 _c4dbgpf("plain scalar line: {}", _prs(s));
1451 for(size_t i = 0; i < s.len; ++i)
1452 {
1453 const char curr = s.str[i];
1454 //_c4dbgpf("[{}]='{}'", i, _c4prc(curr));
1455 switch(curr)
1456 {
1457 case ':':
1458 _c4dbgpf("[{}]: got suspicious ':'", i);
1459 // are there more characters?
1460 if((i + 1 == s.len) || ((s.str[i+1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[i+1] == '\t'))))
1461 {
1462 _c4dbgpf("followed by '{}'", i+1 == s.len ? csubstr("\\n") : _c4prc(s.str[i+1]));
1463 _line_progressed(i);
1464 // ': ' is accepted only on the first line
1465 if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1466 {
1467 _c4dbgp("start line. scalar ends here");
1468 goto ended_scalar;
1469 }
1470 else
1471 {
1472 _c4err("multiline scalars cannot be used as implicit keys");
1473 }
1474 }
1475 else
1476 {
1477 size_t j = i;
1478 while(j + 1 < s.len && s.str[j+1] == ':')
1479 {
1480 _c4dbgp("skip colon");
1481 ++j;
1482 }
1483 i = j > i ? j-1 : i;
1484 _c4dbgp("nothing to see here");
1485 }
1486 break;
1487 case '#':
1488 _c4dbgp("got suspicious '#'");
1489 if(!i || (s.str[i-1] == ' ' || s.str[i-1] == '\t'))
1490 {
1491 _c4dbgp("comment! scalar ends here");
1492 _line_progressed(i);
1493 goto ended_scalar;
1494 }
1495 else
1496 {
1497 _c4dbgp("nothing to see here");
1498 }
1499 break;
1500 }
1501 }
1502 _line_progressed(s.len);
1503 csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1504 next_peeked = next_peeked.trimr("\n\r");
1505 const size_t next_indentation = next_peeked.first_not_of(' ');
1506 _c4dbgpf("indentation curr={} next={}", indentation, next_indentation);
1507 if(next_indentation < indentation)
1508 {
1509 _c4dbgp("smaller indentation! scalar ended");
1510 goto ended_scalar;
1511 }
1512 else if(next_indentation == 0 && next_peeked.len > 0)
1513 {
1514 const char first = next_peeked.str[0];
1515 switch(first)
1516 {
1517 case '-':
1518 _c4dbgpf("doc begin? peeked={}", _prs(next_peeked, size_t(3)));
1519 if(_is_doc_begin_token(next_peeked))
1520 {
1521 _c4dbgp("doc begin! scalar ended");
1522 goto ended_scalar;
1523 }
1524 break;
1525 case '.':
1526 _c4dbgpf("doc end? peeked={}", _prs(next_peeked, size_t(3)));
1527 if(_is_doc_end_token(next_peeked))
1528 {
1529 _c4dbgp("doc end! scalar ended");
1530 goto ended_scalar;
1531 }
1532 break;
1533 }
1534 }
1535 // load with next line
1536 _c4dbgp("next line!");
1537 if(!_finished_file())
1538 {
1539 _c4dbgp("next line!");
1540 _line_ended();
1541 _scan_line();
1542 }
1543 else
1544 {
1545 _c4dbgp("file finished!");
1546 goto ended_scalar;
1547 }
1548 s = m_evt_handler->m_curr->line_contents.rem;
1549 needs_filter = true;
1550 }
1551
1552ended_scalar:
1553
1554 sc->scalar = _buf().range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(" \n\r\t");
1555 sc->needs_filter = needs_filter;
1556
1557 _c4dbgpf("scalar was {}", _prs(sc->scalar));
1558
1559 return true;
1560}
1561
1562template<class EventHandler>
1563C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc) // LCOV_EXCL_LINE
1564{
1565 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP), m_evt_handler->m_curr->pos);
1566 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1567 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP), m_evt_handler->m_curr->pos);
1568 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ), m_evt_handler->m_curr->pos);
1569 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK), m_evt_handler->m_curr->pos);
1570 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL), m_evt_handler->m_curr->pos);
1571 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1572}
1573
1574template<class EventHandler>
1575C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc) // LCOV_EXCL_LINE
1576{
1577 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ), m_evt_handler->m_curr->pos);
1578 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1579 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP), m_evt_handler->m_curr->pos);
1580 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK), m_evt_handler->m_curr->pos);
1581 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK), m_evt_handler->m_curr->pos);
1582 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1583}
1584
1585template<class EventHandler>
1586C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc) // LCOV_EXCL_LINE
1587{
1588 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RUNK|USTY), m_evt_handler->m_curr->pos);
1589 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1590}
1591
1592
1593//-----------------------------------------------------------------------------
1594
1595template<class EventHandler>
1596substr ParseEngine<EventHandler>::_peek_next_line(size_t pos) const
1597{
1598 substr rem{}; // declare here because of the goto
1599 size_t nlpos{}; // declare here because of the goto
1600 pos = pos == npos ? m_evt_handler->m_curr->pos.offset : pos;
1601 if(pos >= _buf().len)
1602 goto next_is_empty;
1603
1604 // look for the next newline chars, and jump to the right of those
1605 rem = _from_next_line(_buf().sub(pos));
1606 if(rem.empty())
1607 goto next_is_empty;
1608
1609 // now get everything up to and including the following newline chars
1610 nlpos = rem.first_of("\r\n");
1611 if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len))
1612 nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1613 rem = rem.left_of(nlpos, /*include_pos*/true);
1614
1615 _c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n"));
1616 return rem;
1617
1618next_is_empty:
1619 _c4dbgpf("peek next line @ {}: (len=0)''", pos);
1620 return rem;
1621}
1622
1623//-----------------------------------------------------------------------------
1624
1625template<class EventHandler>
1626void ParseEngine<EventHandler>::_scan_line()
1627{
1628 if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < _buf().len))
1629 m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf(), m_evt_handler->m_curr->pos.offset);
1630 else
1631 m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf().last(0), 0);
1632}
1633
1634template<class EventHandler>
1635void ParseEngine<EventHandler>::_line_progressed(size_t ahead)
1636{
1637 _c4dbgpf("line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}",
1638 m_evt_handler->m_curr->pos.line,
1639 m_evt_handler->m_curr->line_contents.full.len,
1640 ahead, m_evt_handler->m_curr->pos.col,
1641 m_evt_handler->m_curr->pos.col+ahead,
1642 m_evt_handler->m_curr->pos.offset,
1643 m_evt_handler->m_curr->pos.offset+ahead);
1644 m_evt_handler->m_curr->pos.offset += ahead;
1645 m_evt_handler->m_curr->pos.col += ahead;
1646 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.num_cols+1, m_evt_handler->m_curr->pos);
1647 m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1648}
1649
1650template<class EventHandler>
1651void ParseEngine<EventHandler>::_line_ended()
1652{
1653 _c4dbgpf("line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1654 m_evt_handler->m_curr->pos.line,
1655 m_evt_handler->m_curr->line_contents.full.len,
1656 m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols,
1657 m_evt_handler->m_curr->pos.col, 1);
1658 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.num_cols + 1, m_evt_handler->m_curr->pos);
1659 m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1660 ++m_evt_handler->m_curr->pos.line;
1661 m_evt_handler->m_curr->pos.col = 1;
1662}
1663
1664template<class EventHandler>
1665void ParseEngine<EventHandler>::_line_ended_undo()
1666{
1667 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u, m_evt_handler->m_curr->pos);
1668 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u, m_evt_handler->m_curr->pos);
1669 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols, m_evt_handler->m_curr->pos);
1670 const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1671 _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1672 m_evt_handler->m_curr->pos.offset -= delta;
1673 --m_evt_handler->m_curr->pos.line;
1674 m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.num_cols + 1u;
1675 // don't forget to undo also the changes to the remainder of the line
1676 //_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= _buf().len || _buf()[m_evt_handler->m_curr->pos.offset] == '\n' || _buf()[m_evt_handler->m_curr->pos.offset] == '\r', m_evt_handler->m_curr->pos);
1677 m_evt_handler->m_curr->line_contents.rem = _buf().sub(m_evt_handler->m_curr->pos.offset, 0);
1678}
1679
1680
1681//-----------------------------------------------------------------------------
1682template<class EventHandler>
1683void ParseEngine<EventHandler>::_set_indentation(size_t indentation) noexcept
1684{
1685 m_evt_handler->m_curr->indref = indentation;
1686 _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1687}
1688
1689template<class EventHandler>
1690void ParseEngine<EventHandler>::_save_indentation()
1691{
1692 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full), m_evt_handler->m_curr->pos);
1693 m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1694 _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1695}
1696
1697template<class EventHandler>
1698void ParseEngine<EventHandler>::_mark_seqflow_val_end() noexcept
1699{
1700 _c4dbgpf("SEQFLOW. mark val end at line={}", m_evt_handler->m_curr->pos.line);
1701 m_prev_val_end = m_evt_handler->m_curr->pos.line;
1702}
1703
1704
1705//-----------------------------------------------------------------------------
1706
1707template<class EventHandler>
1708void ParseEngine<EventHandler>::_flow_container_was_a_key(size_t orig_indent)
1709{
1710 _c4dbgpf("flow container is followed by colon! orig_indent={}", orig_indent);
1711 m_evt_handler->actually_val_is_first_key_of_new_map_block();
1712 addrem_flags(RMAP|RVAL|RBLCK, RKCL|RSEQ|RUNK);
1713 _set_indentation(orig_indent);
1714 _maybe_skip_whitespace_tokens();
1715}
1716
1717template<class EventHandler>
1718void ParseEngine<EventHandler>::_end_flow_container(size_t orig_indent, bool multiline)
1719{
1720 // this is called AFTER ending the flow container,
1721 // so now we're at the parent container's scope
1722 if(has_all(RMAP|RBLCK) && has_none(RKCL|RVAL|RNXT))
1723 {
1724 _c4dbgp("flow container: end as vanilla block map key!");
1725 if(C4_UNLIKELY(multiline))
1726 _c4err("multiline key is invalid");
1727 if(C4_UNLIKELY(!_maybe_scan_following_colon()))
1728 _c4err("could not find ':' colon after key");
1729 _maybe_skip_whitespace_tokens();
1730 addrem_flags(RVAL, RKEY|RKCL|RNXT);
1731 }
1732 else if(has_none(RFLOW))
1733 {
1734 _c4dbgp("end_flow_container: now not in flow!");
1735 if(has_any(RUNK|RSEQ|RKCL) && _maybe_scan_following_colon())
1736 {
1737 if(C4_UNLIKELY(multiline))
1738 _c4err("multiline key is invalid");
1739 _flow_container_was_a_key(orig_indent);
1740 }
1741 else
1742 {
1743 _c4dbgp("end_flow_container: end map as key!");
1744 }
1745 }
1746 else if(has_any(RSEQ))
1747 {
1748 _c4dbgp("end_flow_container: now in a flow seq");
1749 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1750 _mark_seqflow_val_end();
1751 }
1752}
1753
1754template<class EventHandler>
1755void ParseEngine<EventHandler>::_end_map_flow()
1756{
1757 bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1758 size_t orig_indent = m_evt_handler->m_curr->indref;
1759 _c4dbgpf("mapflow: end, multiline={}", multiline);
1760 m_evt_handler->end_map_flow(multiline && m_options.detect_flow_ml());
1761 _end_flow_container(orig_indent, multiline);
1762}
1763
1764template<class EventHandler>
1765void ParseEngine<EventHandler>::_end_seq_flow()
1766{
1767 bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1768 size_t orig_indent = m_evt_handler->m_curr->indref;
1769 _c4dbgpf("seqflow: end, multiline={}", multiline);
1770 m_evt_handler->end_seq_flow(multiline && m_options.detect_flow_ml());
1771 _end_flow_container(orig_indent, multiline);
1772}
1773
1774template<class EventHandler>
1775void ParseEngine<EventHandler>::_end_map_blck()
1776{
1777 _c4dbgp("mapblck: end");
1778 if(has_any(RKCL|RVAL))
1779 {
1780 _c4dbgp("mapblck: set missing val");
1781 _handle_annotations_before_blck_val_scalar();
1782 m_evt_handler->set_val_scalar_plain_empty();
1783 }
1784 else if(has_any(QMRK))
1785 {
1786 _c4dbgp("mapblck: set missing keyval");
1787 _handle_annotations_before_blck_key_scalar();
1788 m_evt_handler->set_key_scalar_plain_empty();
1789 _handle_annotations_before_blck_val_scalar();
1790 m_evt_handler->set_val_scalar_plain_empty();
1791 }
1792 m_evt_handler->end_map_block();
1793}
1794
1795template<class EventHandler>
1796void ParseEngine<EventHandler>::_end_seq_blck()
1797{
1798 if(has_any(RVAL))
1799 {
1800 _c4dbgp("seqblck: set missing val");
1801 _handle_annotations_before_blck_val_scalar();
1802 m_evt_handler->set_val_scalar_plain_empty();
1803 }
1804 m_evt_handler->end_seq_block();
1805}
1806
1807template<class EventHandler>
1808void ParseEngine<EventHandler>::_end2_map()
1809{
1810 _c4dbgp("map: end");
1811 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP), m_evt_handler->m_curr->pos);
1812 if(has_any(RBLCK))
1813 {
1814 _end_map_blck();
1815 }
1816 else
1817 {
1818 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1819 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(USTY), m_evt_handler->m_curr->pos);
1820 m_evt_handler->_pop();
1821 }
1822}
1823
1824template<class EventHandler>
1825void ParseEngine<EventHandler>::_end2_seq()
1826{
1827 _c4dbgp("seq: end");
1828 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ), m_evt_handler->m_curr->pos);
1829 if(has_any(RBLCK))
1830 {
1831 _end_seq_blck();
1832 }
1833 else
1834 {
1835 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1836 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(USTY), m_evt_handler->m_curr->pos);
1837 m_evt_handler->_pop();
1838 }
1839}
1840
1841template<class EventHandler>
1842void ParseEngine<EventHandler>::_begin2_doc()
1843{
1844 _c4dbgp("begin_doc");
1845 m_has_directives_yaml = false;
1846 m_has_directives = false;
1847 m_doc_empty = true;
1848 add_flags(RDOC);
1849 m_evt_handler->begin_doc();
1850 m_evt_handler->m_curr->indref = 0; // ?
1851}
1852
1853template<class EventHandler>
1854void ParseEngine<EventHandler>::_begin2_doc_expl()
1855{
1856 _c4dbgp("begin_doc_expl");
1857 m_has_directives_yaml = false;
1858 m_has_directives = false;
1859 m_doc_empty = true;
1860 add_flags(RDOC);
1861 m_evt_handler->begin_doc_expl();
1862 m_evt_handler->m_curr->indref = 0; // ?
1863}
1864
1865template<class EventHandler>
1866void ParseEngine<EventHandler>::_end2_doc()
1867{
1868 _c4dbgp("doc: end");
1869 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RDOC), m_evt_handler->m_curr->pos);
1870 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1871 {
1872 _c4dbgp("doc was empty; add empty val");
1873 _handle_annotations_before_blck_val_scalar();
1874 m_evt_handler->set_val_scalar_plain_empty();
1875 }
1876 m_evt_handler->end_doc();
1877 m_bom_len = 0;
1878}
1879
1880template<class EventHandler>
1881void ParseEngine<EventHandler>::_end2_doc_expl()
1882{
1883 _c4dbgp("doc: end");
1884 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1885 {
1886 _c4dbgp("doc: no children; add empty val");
1887 _handle_annotations_before_blck_val_scalar();
1888 m_evt_handler->set_val_scalar_plain_empty();
1889 }
1890 m_evt_handler->end_doc_expl();
1891 m_bom_len = 0;
1892}
1893
1894template<class EventHandler>
1895void ParseEngine<EventHandler>::_maybe_begin_doc()
1896{
1897 if(has_none(RDOC))
1898 {
1899 _c4dbgp("doc must be started");
1900 _begin2_doc();
1901 }
1902}
1903template<class EventHandler>
1904void ParseEngine<EventHandler>::_maybe_end_doc()
1905{
1906 if(has_any(RDOC))
1907 {
1908 _c4dbgp("doc must be finished");
1909 _end2_doc();
1910 }
1911 else if(m_doc_empty && (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1912 {
1913 _c4dbgp("no doc to finish, but pending annotations");
1914 m_evt_handler->begin_doc();
1915 _handle_annotations_before_blck_val_scalar();
1916 m_evt_handler->set_val_scalar_plain_empty();
1917 m_evt_handler->end_doc();
1918 }
1919}
1920
1921template<class EventHandler>
1922void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1923{
1924 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1, m_evt_handler->m_curr->pos);
1925 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack[0].flags & RDOC, m_evt_handler->m_curr->pos);
1926 _c4dbgp("root is RDOC");
1927 if(m_evt_handler->m_curr->level != 0)
1928 _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1929 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RDOC), m_evt_handler->m_curr->pos);
1930}
1931
1932/** Check whether the current parse tokens are trailing on the
1933 * previous doc, and raise an error if they are */
1934template<class EventHandler>
1935void ParseEngine<EventHandler>::_check_trailing_doc_token()
1936{
1937 const bool is_root = (m_evt_handler->m_stack.size() == 1u);
1938 const bool isndoc = (m_evt_handler->m_curr->flags & NDOC) != 0;
1939 const bool suspicious = m_evt_handler->template _has_any__<MAP|SEQ|VAL>();
1940 _c4dbgpf("target={} isroot={} suspicious={} ndoc={}", m_evt_handler->m_curr->node_id, is_root, suspicious, isndoc);
1941 if((is_root || m_evt_handler->template _has_any__<DOC>()) && suspicious && !isndoc)
1942 _c4err("parse error");
1943}
1944
1945template<class EventHandler>
1946void ParseEngine<EventHandler>::_end_doc_suddenly()
1947{
1948 _c4dbgp("end doc suddenly");
1949 _end_doc_suddenly__pop();
1950 _end2_doc_expl();
1951 addrem_flags(RUNK|RTOP|NDOC, RMAP|RSEQ|RDOC);
1952}
1953
1954template<class EventHandler>
1955void ParseEngine<EventHandler>::_check_doc_end_tokens() const
1956{
1957 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1958 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !rem.begins_with_any(". \t"), m_evt_handler->m_curr->pos);
1959 if(C4_UNLIKELY(rem.len && !rem.begins_with('#')))
1960 {
1961 _c4err("parse error");
1962 }
1963}
1964
1965template<class EventHandler>
1966void ParseEngine<EventHandler>::_start_doc_suddenly()
1967{
1968 _c4dbgp("start doc suddenly");
1969 _end_doc_suddenly__pop();
1970 _end2_doc();
1971 _begin2_doc_expl();
1972}
1973
1974template<class EventHandler>
1975void ParseEngine<EventHandler>::_end_stream()
1976{
1977 _c4dbgpf("end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1978 if(C4_UNLIKELY(has_all(RSEQ|RFLOW)))
1979 _c4err("missing terminating ]");
1980 else if(C4_UNLIKELY(has_all(RMAP|RFLOW)))
1981 _c4err("missing terminating }");
1982 if(m_evt_handler->m_stack.size() > 1)
1983 _handle_indentation_pop(m_evt_handler->m_stack.begin());
1984 if(has_all(RDOC))
1985 {
1986 _end2_doc();
1987 }
1988 else if(has_all(RTOP|RUNK))
1989 {
1990 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1991 {
1992 if(m_doc_empty)
1993 {
1994 m_evt_handler->begin_doc();
1995 _handle_annotations_before_blck_val_scalar();
1996 m_evt_handler->set_val_scalar_plain_empty();
1997 m_evt_handler->end_doc();
1998 }
1999 }
2000 }
2001 m_evt_handler->end_stream();
2002 if(C4_UNLIKELY(m_has_directives))
2003 _c4err("directives cannot be used without a document");
2004}
2005
2006
2007template<class EventHandler>
2008void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState const* popto)
2009{
2010 _c4dbgpf("popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ? "s" : ""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
2011 while(m_evt_handler->m_curr != popto)
2012 {
2013 if(has_any(RSEQ))
2014 {
2015 _c4dbgpf("popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2016 _end2_seq();
2017 }
2018 else if(has_any(RMAP))
2019 {
2020 _c4dbgpf("popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2021 _end2_map();
2022 }
2023 else
2024 {
2025 break;
2026 }
2027 }
2028 _c4dbgpf("current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
2029}
2030
2031template<class EventHandler>
2032void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
2033{
2034 // search the stack frame to jump to based on its indentation
2035 using state_type = typename EventHandler::state;
2036 state_type const* popto = nullptr;
2037 auto &stack = m_evt_handler->m_stack;
2038 _RYML_ASSERT_PARSE_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos); // this search relies on the stack being contiguous
2039 _RYML_ASSERT_PARSE_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2040 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2041 #ifdef RYML_DBG
2042 _print_state_stack();
2043 #endif
2044 for(state_type const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
2045 {
2046 _c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
2047 if(s->indref == ind)
2048 {
2049 _c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id);
2050 popto = s;
2051 break;
2052 }
2053 }
2054 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2055 {
2056 _c4err("parse error: incorrect indentation?");
2057 }
2058 _handle_indentation_pop(popto);
2059}
2060
2061template<class EventHandler>
2062void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
2063{
2064 // search the stack frame to jump to based on its indentation
2065 using state_type = typename EventHandler::state;
2066 auto &stack = m_evt_handler->m_stack;
2067 _RYML_ASSERT_PARSE_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos); // this search relies on the stack being contiguous
2068 _RYML_ASSERT_PARSE_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2069 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2070 state_type const* popto = nullptr;
2071 #ifdef RYML_DBG
2072 char flagbuf_[128];
2073 _print_state_stack(flagbuf_);
2074 #endif
2075 for(state_type const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s) // never go to the stack bottom. that's the root
2076 {
2077 _c4dbgpf("searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
2078 if(s->indref < ind)
2079 {
2080 break;
2081 }
2082 else if(s->indref == ind)
2083 {
2084 _c4dbgpf("same indentation!!! level={} node={}", s->level, s->node_id);
2085 if(popto && has_any(RTOP, s) && has_none(RMAP|RSEQ, s))
2086 {
2087 break;
2088 }
2089 popto = s;
2090 if(has_all(RSEQ|RBLCK, s))
2091 {
2092 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2093 const size_t first = rem.first_not_of(' ');
2094 _RYML_ASSERT_PARSE_(stack.m_callbacks, first == ind || first == npos, m_evt_handler->m_curr->pos);
2095 rem = rem.right_of(first, true);
2096 _c4dbgpf("indentless? rem='{}' first={}", rem, first);
2097 if(rem.begins_with('-') && _is_blck_token(rem))
2098 {
2099 _c4dbgp("parent was indentless seq");
2100 break;
2101 }
2102 }
2103 }
2104 }
2105 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2106 {
2107 _c4err("parse error: incorrect indentation?");
2108 }
2109 _handle_indentation_pop(popto);
2110}
2111
2112
2113//-----------------------------------------------------------------------------
2114template<class EventHandler>
2115void ParseEngine<EventHandler>::_check_valid_newline_in_quoted_scalar()
2116{
2117 if(C4_UNLIKELY(has_all(RMAP|RBLCK|RKEY)))
2118 {
2119 _c4err("multiline quoted keys are invalid");
2120 }
2121 else // check contextual indentation
2122 {
2123 const size_t minindent = m_evt_handler->m_curr->indref + ((has_any(RMAP|RSEQ) && has_any(RBLCK)));
2124 _c4dbgpf("indent={} vs minindent={} indref={}", m_evt_handler->m_curr->line_contents.indentation, minindent, m_evt_handler->m_curr->indref);
2125 if(m_evt_handler->m_curr->line_contents.indentation < minindent)
2126 {
2127 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks,
2128 m_evt_handler->m_curr->line_contents.indentation == m_evt_handler->m_curr->line_contents.rem.first_not_of(' '),
2129 m_evt_handler->m_curr->pos);
2130 csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation);
2131 _c4dbgpf("trimmed.len={} line={}", trimmed.len, _prs(m_evt_handler->m_curr->line_contents.rem, true));
2132 if(C4_UNLIKELY(!!trimmed.len))
2133 {
2134 _c4err("bad indentation");
2135 }
2136 }
2137 }
2138}
2139
2140
2141//-----------------------------------------------------------------------------
2142template<class EventHandler>
2143typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
2144{
2145 // quoted scalars can spread over multiple lines!
2146 // nice explanation here: http://yaml-multiline.info/
2147
2148 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with('\''), m_evt_handler->m_curr->pos);
2149
2150 // a span to the end of the file, skipping the opening quote
2151 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset + 1);
2152 _line_progressed(1); // advance over the opening quote
2153 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
2154
2155 bool needs_filter = false;
2156 size_t pos = npos; // find the pos of the matching quote
2157 while( ! _finished_file())
2158 {
2159 const csubstr line = m_evt_handler->m_curr->line_contents.rem;
2160 _c4dbgpf("scanning single quoted scalar @ line[{}]: {}", m_evt_handler->m_curr->pos.line, _prs(line));
2161 if(C4_UNLIKELY(m_evt_handler->m_curr->at_line_beginning() && _is_doc_token(line)))
2162 _c4err("token can not appear at line begin");
2163 for(size_t i = 0; i < line.len; ++i)
2164 {
2165 const char curr = line.str[i];
2166 if(curr == '\'') // single quotes are escaped with two single quotes
2167 {
2168 const char next = i+1 < line.len ? line.str[i+1] : '~';
2169 if(next != '\'') // so just look for the first quote
2170 { // without another after it
2171 _line_progressed(i + 1); // progress beyond the quote
2172 pos = i + (size_t)(line.str - s.str); // set pos to before the quote
2173 goto found_close;
2174 }
2175 else
2176 {
2177 needs_filter = true; // needs filter to remove escaped quotes
2178 ++i; // skip the escaped quote
2179 }
2180 }
2181 }
2182
2183 needs_filter = true;
2184 _line_progressed(line.len);
2185 _line_ended();
2186 _scan_line();
2187 _check_valid_newline_in_quoted_scalar();
2188 }
2189
2190 _c4err("reached end of file while looking for closing quote");
2191
2192found_close:
2193
2194 _c4dbgpf("found closing quote at: {}", pos);
2195 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos != npos, m_evt_handler->m_curr->pos);
2196 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2197 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2198 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() == '\'', m_evt_handler->m_curr->pos);
2199 _set_first_strict(s, pos);
2200
2201 _c4prscalar("scanned squoted scalar", s, /*keep_newlines*/true);
2202
2203 return ScannedScalar { s, needs_filter };
2204}
2205
2206
2207//-----------------------------------------------------------------------------
2208template<class EventHandler>
2209typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
2210{
2211 // quoted scalars can spread over multiple lines!
2212 // nice explanation here: http://yaml-multiline.info/
2213
2214 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with('"'), m_evt_handler->m_curr->pos);
2215
2216 // a span to the end of the file, skipping the opening quote
2217 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset + 1);
2218 _line_progressed(1); // advance over the opening quote
2219 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
2220
2221 bool needs_filter = false;
2222 size_t pos = npos; // find the pos of the matching quote
2223 while( ! _finished_file())
2224 {
2225 #if defined(__GNUC__) && (/*__GNUC__ == 12 || */__GNUC__ == 13)
2226 C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem); // prevent hoisting
2227 #endif
2228 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2229 _c4dbgpf("scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, rem);
2230 if(C4_UNLIKELY(m_evt_handler->m_curr->at_line_beginning() && _is_doc_token(rem)))
2231 _c4err("token can not appear at line begin");
2232 for(size_t i = 0; i < rem.len; ++i)
2233 {
2234 const char curr = rem.str[i];
2235 // every \ is an escape
2236 if(curr == '\\')
2237 {
2238 const char next = i+1 < rem.len ? rem.str[i+1] : '~';
2239 needs_filter = true;
2240 if(next == '"' || next == '\\')
2241 ++i;
2242 }
2243 else if(curr == '"')
2244 {
2245 _line_progressed(i + 1); // progress beyond the quote
2246 pos = i + (size_t)(rem.str - s.str); // set pos to before the quote
2247 goto found_close;
2248 }
2249 }
2250
2251 // leading whitespace also needs filtering
2252 needs_filter = true;
2253 _line_progressed(rem.len);
2254 _line_ended();
2255 _scan_line();
2256 _check_valid_newline_in_quoted_scalar();
2257 }
2258
2259 _c4err("reached end of file while looking for closing quote");
2260
2261found_close:
2262
2263 _c4dbgpf("found closing quote at: {}", pos);
2264 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos != npos, m_evt_handler->m_curr->pos);
2265 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2266 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2267 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() == '"', m_evt_handler->m_curr->pos);
2268 _set_first_strict(s, pos);
2269
2270 _c4prscalar("scanned dquoted scalar", s, /*keep_newlines*/true);
2271
2272 return ScannedScalar{s, needs_filter};
2273}
2274
2275
2276//-----------------------------------------------------------------------------
2277template<class EventHandler>
2278void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t indref)
2279{
2280 _c4dbgpf("blck: indref={}", indref);
2281 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, indref != npos, m_evt_handler->m_curr->pos);
2282
2283 // nice explanation here: http://yaml-multiline.info/
2284 csubstr s = m_evt_handler->m_curr->line_contents.rem;
2285 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>'), m_evt_handler->m_curr->pos);
2286
2287 _c4dbgpf("blck: specs={}", _prs(s));
2288
2289 // parse the spec
2290 BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used
2291 size_t indentation = npos; // have to find out if no spec is given
2292 if(s.len > 1)
2293 {
2294 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any("|>"), m_evt_handler->m_curr->pos);
2295 csubstr t = s.sub(1);
2296 _c4dbgpf("blck: spec is multichar: {}", _prs(t));
2297 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.len >= 1, m_evt_handler->m_curr->pos);
2298 size_t pos = t.first_of("-+");
2299 _c4dbgpf("blck: spec chomp char: pos={}", pos);
2300 if(pos != npos)
2301 {
2302 _c4dbgpf("blck: spec chomp char: {}", _c4prc(t[pos]));
2303 if(t[pos] == '-')
2304 {
2305 _c4dbgp("blck: chomp=STRIP");
2306 chomp = CHOMP_STRIP;
2307 }
2308 else if(t[pos] == '+')
2309 {
2310 _c4dbgp("blck: chomp=KEEP");
2311 chomp = CHOMP_KEEP;
2312 }
2313 if(pos == 0)
2314 t = t.sub(1);
2315 else
2316 t = t.first(pos);
2317 _c4dbgpf("blck: spec is now: {}", _prs(t));
2318 }
2319 // from here to the end, only digits are considered
2320 pos = t.first_not_of("0123456789");
2321 csubstr rest = t.first(pos);
2322 if( ! rest.empty())
2323 {
2324 _c4dbgpf("blck: parse indentation digits: {}", _prs(rest));
2325 if(C4_UNLIKELY(rest.len > 1))
2326 _c4err("parse error: invalid indentation");
2327 if(C4_UNLIKELY( ! c4::atou(rest, &indentation)))
2328 _c4err("parse error: could not read indentation as decimal"); // LCOV_EXCL_LINE
2329 if(C4_UNLIKELY( ! indentation))
2330 _c4err("parse error: null indentation");
2331 _c4dbgpf("blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2332 indentation += m_evt_handler->m_curr->indref;
2333 }
2334 else
2335 {
2336 rest = t.triml(" \t");
2337 _c4dbgpf("blck: digits empty. t={} trimmed={} iscomm={} t.iscomm={}", _prs(t), _prs(rest), rest.begins_with('#'), t.begins_with('#'));
2338 if(C4_UNLIKELY(rest.len && (rest.str[0] != '#' || t.str[0] == '#')))
2339 _c4err("parse error: invalid token");
2340 }
2341 }
2342
2343 _c4dbgpf("blck: style={} chomp={} indentation={}", s.begins_with('>') ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation);
2344
2345 // finish the current line
2346 _line_progressed(s.len);
2347 _line_ended();
2348 _scan_line();
2349
2350 // start with a zero-length block, already pointing at the right place
2351 substr raw_block(_buf().data() + m_evt_handler->m_curr->pos.offset, size_t(0));
2352 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.str, m_evt_handler->m_curr->pos);
2353
2354 // read every full line into a raw block,
2355 // from which newlines are to be stripped as needed.
2356 //
2357 // If no explicit indentation was given, pick it from the first
2358 // non-empty line. See
2359 // https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator
2360 size_t num_lines = 0;
2361 size_t first = m_evt_handler->m_curr->pos.line;
2362 size_t provisional_indentation = npos;
2363 LineContents lc;
2364 while(( ! _finished_file()))
2365 {
2366 // peek next line, but do not advance immediately
2367 lc.reset_with_next_line(_buf(), m_evt_handler->m_curr->pos.offset);
2368 #if defined(__GNUC__) && (__GNUC__ == 12 || __GNUC__ == 13)
2369 C4_DONT_OPTIMIZE(lc.rem);
2370 #endif
2371 _c4dbgpf("blck: peeking at {}", _prs(lc.rem.trimr("\r\n"), true));
2372 // evaluate termination conditions
2373 if(indentation != npos)
2374 {
2375 _c4dbgpf("blck: indentation={}", indentation);
2376 // stop when the line is deindented and not empty
2377 if(lc.indentation < indentation && ( ! lc.rem.trim(" \t").empty()))
2378 {
2379 if(raw_block.len)
2380 {
2381 _c4dbgpf("blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2382 }
2383 else
2384 {
2385 _c4err("indentation decreased without any scalar");
2386 }
2387 break;
2388 }
2389 else if(indentation == 0)
2390 {
2391 _c4dbgpf("blck: noindent. lc.rem={}", _prs(lc.rem));
2392 if(_is_doc_token(lc.rem))
2393 {
2394 _c4dbgp("blck: stop. indentation=0 and doc ended");
2395 break;
2396 }
2397 }
2398 }
2399 else
2400 {
2401 const size_t fns = lc.rem.first_not_of(' ');
2402 _c4dbgpf("blck: indentation ref not set. firstnonws={}", fns);
2403 if(fns != npos) // non-empty line
2404 {
2405 _c4dbgpf("blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2406 if(C4_UNLIKELY(lc.full.begins_with('\t')))
2407 _c4err("parse error");
2408 if(provisional_indentation == npos)
2409 {
2410 if(lc.indentation < indref)
2411 {
2412 _c4dbgpf("blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2413 if(raw_block.len == 0)
2414 {
2415 _c4dbgp("blck: was empty, undo next line");
2416 _line_ended_undo();
2417 }
2418 break;
2419 }
2420 else if(lc.indentation == m_evt_handler->m_curr->indref)
2421 {
2422 if(has_any(RSEQ|RMAP))
2423 {
2424 _c4dbgpf("blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2425 break;
2426 }
2427 }
2428 _c4dbgpf("blck: set indentation ref from this line: ref={}", lc.indentation);
2429 indentation = lc.indentation;
2430 }
2431 else
2432 {
2433 if(lc.indentation >= provisional_indentation)
2434 {
2435 _c4dbgpf("blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2436 //indentation = provisional_indentation ? provisional_indentation : lc.indentation;
2437 indentation = lc.indentation;
2438 }
2439 else
2440 {
2441 if(lc.indentation >= indref)
2442 _c4err("parse error: first non-empty block line should have at least the original indentation");
2443 _c4dbgp("blck: finished");
2444 break;
2445 }
2446 }
2447 }
2448 else // empty line
2449 {
2450 _c4dbgpf("blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.rem.len, lc.indentation, provisional_indentation);
2451 if(provisional_indentation != npos)
2452 {
2453 if(lc.rem.len >= provisional_indentation)
2454 {
2455 _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.rem.len);
2456 provisional_indentation = lc.rem.len;
2457 }
2458 }
2459 else
2460 {
2461 provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL);
2462 _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2463 if(provisional_indentation == npos)
2464 {
2465 provisional_indentation = lc.rem.len ? lc.rem.len : has_any(RSEQ|RVAL);
2466 _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2467 }
2468 if(provisional_indentation < indref)
2469 {
2470 provisional_indentation = indref;
2471 _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2472 }
2473 }
2474 }
2475 }
2476 // advance now that we know the folded scalar continues
2477 m_evt_handler->m_curr->line_contents = lc;
2478 _c4dbgpf("blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2479 raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2480 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2481 _line_ended();
2482 ++num_lines;
2483 }
2484 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0), m_evt_handler->m_curr->pos);
2485 C4_UNUSED(num_lines);
2486 C4_UNUSED(first);
2487
2488 if(indentation == npos)
2489 {
2490 _c4dbgpf("blck: set indentation from provisional: {}", provisional_indentation);
2491 indentation = provisional_indentation;
2492 }
2493
2494 if(num_lines)
2495 _line_ended_undo();
2496
2497 _c4prscalar("scanned block", raw_block, /*keep_newlines*/true);
2498
2499 sb->scalar = raw_block;
2500 sb->indentation = indentation;
2501 sb->chomp = chomp;
2502}
2503
2504
2505//-----------------------------------------------------------------------------
2506//-----------------------------------------------------------------------------
2507//-----------------------------------------------------------------------------
2508/** @cond dev */
2509
2510// a debugging scaffold:
2511#if 0
2512#define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2513#else
2514#define _c4dbgfws(...)
2515#endif
2516
2517template<class EventHandler>
2518template<class FilterProcessor>
2520{
2521 _c4dbgfws("found whitespace '{}'", _c4prc(proc.curr()));
2522 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == ' ' || proc.curr() == '\t', m_evt_handler->m_curr->pos);
2523
2524 const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(" \t", proc.rpos) : proc.src.first_not_of(' ', proc.rpos);
2525 if(first_pos != npos)
2526 {
2527 const char first_char = proc.src[first_pos];
2528 _c4dbgfws("firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2529 if(first_char == '\n' || first_char == '\r') // skip trailing whitespace
2530 {
2531 _c4dbgfws("whitespace is trailing on line", "");
2532 proc.skip(first_pos - proc.rpos);
2533 }
2534 else // a legit whitespace
2535 {
2536 proc.copy();
2537 _c4dbgfws("legit whitespace. sofar={}", _prs(proc.sofar()));
2538 }
2539 return true;
2540 }
2541 _c4dbgfws("whitespace is trailing on line", "");
2542 return false;
2543}
2544
2545template<class EventHandler>
2546template<class FilterProcessor>
2548{
2549 if(!_filter_ws_handle_to_first_non_space(proc))
2550 {
2551 _c4dbgfws("... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2552 proc.copy(proc.src.len - proc.rpos);
2553 }
2554}
2555
2556template<class EventHandler>
2557template<class FilterProcessor>
2559{
2560 if(!_filter_ws_handle_to_first_non_space(proc))
2561 {
2562 _c4dbgfws("... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2563 proc.skip(proc.src.len - proc.rpos);
2564 }
2565}
2566
2567#undef _c4dbgfws
2568
2569
2570//-----------------------------------------------------------------------------
2571//-----------------------------------------------------------------------------
2572//-----------------------------------------------------------------------------
2573/* plain scalars */
2574
2575// a debugging scaffold:
2576#if 0
2577#define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2578#else
2579#define _c4dbgfps(fmt, ...)
2580#endif
2581
2582template<class EventHandler>
2583template<class FilterProcessor>
2584void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation)
2585{
2586 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
2587
2588 _c4dbgfps("found newline. sofar={}", _prs(proc.sofar()));
2589 size_t ii = proc.rpos;
2590 const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2591 if(numnl_following)
2592 {
2593 proc.set('\n', numnl_following);
2594 _c4dbgfps("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2595 }
2596 else
2597 {
2598 const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2599 if(ret != npos)
2600 {
2601 proc.set(' ');
2602 _c4dbgfps("single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2603 }
2604 else
2605 {
2606 _c4dbgfps("last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2607 ii = proc.src.len;
2608 }
2609 }
2610 proc.rpos = ii;
2611}
2612
2613template<class EventHandler>
2614template<class FilterProcessor>
2615auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation) -> decltype(proc.result())
2616{
2617 _RYML_ASSERT_PARSE_(this->callbacks(), indentation != npos, m_evt_handler->m_curr->pos);
2618 _c4dbgfps("before={}", _prs(proc.src));
2619
2620 while(proc.has_more_chars())
2621 {
2622 const char curr = proc.curr();
2623 _c4dbgfps("'{}', sofar={}", _c4prc(curr), _prs(proc.sofar()));
2624 switch(curr)
2625 {
2626 case ' ':
2627 _RYML_WITH_TAB_TOKENS(case '\t':)
2628 _c4dbgfps("whitespace", curr);
2629 _filter_ws_skip_trailing(proc);
2630 break;
2631 case '\n':
2632 _c4dbgfps("newline", curr);
2633 _filter_nl_plain(proc, /*indentation*/indentation);
2634 break;
2635 case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2636 _c4dbgfps("carriage return, ignore", curr);
2637 proc.skip();
2638 break;
2639 default:
2640 proc.copy();
2641 break;
2642 }
2643 }
2644
2645 _c4dbgfps("after={}", _prs(proc.sofar()));
2646
2647 return proc.result();
2648}
2649
2650#undef _c4dbgfps
2651
2652
2653template<class EventHandler>
2655{
2656 FilterProcessorSrcDst proc(scalar, dst);
2657 return _filter_plain(proc, indentation);
2658}
2659
2660template<class EventHandler>
2662{
2664 return _filter_plain(proc, indentation);
2665}
2666
2667
2668//-----------------------------------------------------------------------------
2669//-----------------------------------------------------------------------------
2670//-----------------------------------------------------------------------------
2671/* single quoted */
2672
2673// a debugging scaffold:
2674#if 0
2675#define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2676#else
2677#define _c4dbgfsq(fmt, ...)
2678#endif
2679
2680template<class EventHandler>
2681template<class FilterProcessor>
2682void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2683{
2684 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
2685
2686 _c4dbgfsq("found newline. sofar={}", _prs(proc.sofar()));
2687 size_t ii = proc.rpos;
2688 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2689 if(numnl_following)
2690 {
2691 proc.set('\n', numnl_following);
2692 _c4dbgfsq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2693 }
2694 else
2695 {
2696 const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2697 if(ret != npos)
2698 {
2699 proc.set(' ');
2700 _c4dbgfsq("single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2701 }
2702 else
2703 {
2704 proc.set(' ');
2705 _c4dbgfsq("single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2706 }
2707 }
2708 proc.rpos = ii;
2709}
2710
2711template<class EventHandler>
2712template<class FilterProcessor>
2713auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2714{
2715 _c4dbgfsq("before={}", _prs(proc.src));
2716
2717 // from the YAML spec for double-quoted scalars:
2718 // https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted
2719 while(proc.has_more_chars())
2720 {
2721 const char curr = proc.curr();
2722 _c4dbgfsq("'{}', sofar={}", _c4prc(curr), _prs(proc.sofar()));
2723 switch(curr)
2724 {
2725 case ' ':
2726 case '\t':
2727 _c4dbgfsq("whitespace", curr);
2728 _filter_ws_copy_trailing(proc);
2729 break;
2730 case '\n':
2731 _c4dbgfsq("newline", curr);
2732 _filter_nl_squoted(proc);
2733 break;
2734 case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2735 _c4dbgfsq("skip cr", curr);
2736 proc.skip();
2737 break;
2738 case '\'':
2739 _c4dbgfsq("squote", curr);
2740 if(proc.next() == '\'')
2741 {
2742 _c4dbgfsq("two consecutive squotes", curr);
2743 proc.skip();
2744 proc.copy();
2745 }
2746 else
2747 {
2748 _c4err("filter error");
2749 }
2750 break;
2751 default:
2752 proc.copy();
2753 break;
2754 }
2755 }
2756
2757 _c4dbgfsq(": #filteredchars={} after={}", proc.src.len-proc.sofar().len, _prs(proc.sofar()));
2758
2759 return proc.result();
2760}
2761
2762#undef _c4dbgfsq
2763
2764template<class EventHandler>
2766{
2767 FilterProcessorSrcDst proc(scalar, dst);
2768 return _filter_squoted(proc);
2769}
2770
2771template<class EventHandler>
2773{
2775 return _filter_squoted(proc);
2776}
2777
2778
2779//-----------------------------------------------------------------------------
2780//-----------------------------------------------------------------------------
2781//-----------------------------------------------------------------------------
2782/* double quoted */
2783
2784// a debugging scaffold:
2785#if 0
2786#define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2787#else
2788#define _c4dbgfdq(...)
2789#endif
2790
2791template<class EventHandler>
2792template<class FilterProcessor>
2793void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2794{
2795 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
2796
2797 _c4dbgfdq("found newline. sofar={}", _prs(proc.sofar()));
2798 size_t ii = proc.rpos;
2799 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2800 if(numnl_following)
2801 {
2802 proc.set('\n', numnl_following);
2803 _c4dbgfdq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2804 }
2805 else
2806 {
2807 const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2808 if(ret != npos)
2809 {
2810 proc.set(' ');
2811 _c4dbgfdq("single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2812 }
2813 else
2814 {
2815 proc.set(' ');
2816 _c4dbgfdq("single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2817 }
2818 if(ii < proc.src.len && proc.src.str[ii] == '\\')
2819 {
2820 _c4dbgfdq("backslash at [{}]", ii);
2821 const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] : '\0';
2822 if(next == ' ' || next == '\t')
2823 {
2824 _c4dbgfdq("extend skip to backslash", "");
2825 ++ii;
2826 }
2827 }
2828 }
2829 proc.rpos = ii;
2830}
2831
2832template<class EventHandler>
2833template<class FilterProcessor>
2834void ParseEngine<EventHandler>::_filter_dquoted_backslash_decode(FilterProcessor &C4_RESTRICT proc, size_t sz)
2835{
2836 const size_t szp1 = sz + 1u;
2837 if(C4_UNLIKELY(proc.rpos + szp1 >= proc.src.len))
2838 _c4err("codepoint requires {} hex digits. scalar pos={}", sz, proc.rpos);
2839 char readbuf[8];
2840 csubstr codepoint = proc.src.sub(proc.rpos + 2u, sz);
2841 _c4dbgfdq("utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2842 uint32_t codepoint_val = {};
2843 if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2844 _c4err("failed to parse codepoint. scalar pos={}", proc.rpos);
2845 const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2846 if(C4_UNLIKELY(numbytes == 0))
2847 _c4err("failed to decode code point={}", proc.rpos);
2848 _RYML_ASSERT_PARSE_(callbacks(), numbytes <= 4, m_evt_handler->m_curr->pos);
2849 proc.translate_esc_bulk(readbuf, numbytes, /*nread*/szp1);
2850 _c4dbgfdq("utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2851}
2852
2853template<class EventHandler>
2854template<class FilterProcessor>
2855void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2856{
2857 char next = proc.next();
2858 _c4dbgfdq("backslash, next='{}'", _c4prc(next));
2859 if(next == '\r')
2860 {
2861 if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] == '\n')
2862 {
2863 proc.skip(); // newline escaped with \ -- skip both (add only one as i is loop-incremented)
2864 next = '\n';
2865 _c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2866 }
2867 }
2868
2869 if(next == '\n')
2870 {
2871 size_t ii = proc.rpos + 2;
2872 for( ; ii < proc.src.len; ++ii)
2873 {
2874 // skip leading whitespace
2875 if(proc.src.str[ii] == ' ' || proc.src.str[ii] == '\t')
2876 ;
2877 else
2878 break;
2879 }
2880 proc.skip(ii - proc.rpos);
2881 }
2882 else if(next == '"' || next == '/' || next == ' ' || next == '\t')
2883 {
2884 // escapes for json compatibility
2885 proc.translate_esc(next);
2886 _c4dbgfdq("here, used '{}'", _c4prc(next));
2887 }
2888 else if(next == '\r')
2889 {
2890 proc.skip();
2891 }
2892 else if(next == 'n')
2893 {
2894 proc.translate_esc('\n');
2895 }
2896 else if(next == 'r')
2897 {
2898 proc.translate_esc('\r');
2899 }
2900 else if(next == 't')
2901 {
2902 proc.translate_esc('\t');
2903 }
2904 else if(next == '\\')
2905 {
2906 proc.translate_esc('\\');
2907 }
2908 else if(next == 'x') // 2-digit Unicode escape (\xXX), code point 0x00–0xFF
2909 {
2910 _filter_dquoted_backslash_decode(proc, 2u);
2911 }
2912 else if(next == 'u') // 4-digit Unicode escape (\uXXXX), code point 0x0000–0xFFFF
2913 {
2914 _filter_dquoted_backslash_decode(proc, 4u);
2915 }
2916 else if(next == 'U') // 8-digit Unicode escape (\UXXXXXXXX), full 32-bit code point
2917 {
2918 _filter_dquoted_backslash_decode(proc, 8u);
2919 }
2920 // https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char
2921 else if(next == '0')
2922 {
2923 proc.translate_esc('\0');
2924 }
2925 else if(next == 'b') // backspace
2926 {
2927 proc.translate_esc('\b');
2928 }
2929 else if(next == 'f') // form feed
2930 {
2931 proc.translate_esc('\f');
2932 }
2933 else if(next == 'a') // bell character
2934 {
2935 proc.translate_esc('\a');
2936 }
2937 else if(next == 'v') // vertical tab
2938 {
2939 proc.translate_esc('\v');
2940 }
2941 else if(next == 'e') // escape character
2942 {
2943 proc.translate_esc('\x1b');
2944 }
2945 else if(next == '_') // unicode non breaking space \u00a0
2946 {
2947 // https://www.compart.com/en/unicode/U+00a0
2948 const char payload[] = {
2949 _RYML_CHCONST(-0x3e, 0xc2),
2950 _RYML_CHCONST(-0x60, 0xa0),
2951 };
2952 proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2953 }
2954 else if(next == 'N') // unicode next line \u0085
2955 {
2956 // https://www.compart.com/en/unicode/U+0085
2957 const char payload[] = {
2958 _RYML_CHCONST(-0x3e, 0xc2),
2959 _RYML_CHCONST(-0x7b, 0x85),
2960 };
2961 proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2962 }
2963 else if(next == 'L') // unicode line separator \u2028
2964 {
2965 // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2966 const char payload[] = {
2967 _RYML_CHCONST(-0x1e, 0xe2),
2968 _RYML_CHCONST(-0x80, 0x80),
2969 _RYML_CHCONST(-0x58, 0xa8),
2970 };
2971 proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2972 }
2973 else if(next == 'P') // unicode paragraph separator \u2029
2974 {
2975 // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2976 const char payload[] = {
2977 _RYML_CHCONST(-0x1e, 0xe2),
2978 _RYML_CHCONST(-0x80, 0x80),
2979 _RYML_CHCONST(-0x57, 0xa9),
2980 };
2981 proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2982 }
2983 else if(next == '\0')
2984 {
2985 proc.skip();
2986 }
2987 else
2988 {
2989 _c4err("unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2990 }
2991 _c4dbgfdq("backslash...sofar={}", _prs(proc.sofar()));
2992}
2993
2994
2995template<class EventHandler>
2996template<class FilterProcessor>
2997auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2998{
2999 _c4dbgfdq("before={}", _prs(proc.src));
3000 // from the YAML spec for double-quoted scalars:
3001 // https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted
3002 while(proc.has_more_chars())
3003 {
3004 const char curr = proc.curr();
3005 _c4dbgfdq("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3006 switch(curr)
3007 {
3008 case ' ':
3009 case '\t':
3010 {
3011 _c4dbgfdq("whitespace", curr);
3012 _filter_ws_copy_trailing(proc);
3013 break;
3014 }
3015 case '\n':
3016 {
3017 _c4dbgfdq("newline", curr);
3018 _filter_nl_dquoted(proc);
3019 break;
3020 }
3021 case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
3022 {
3023 _c4dbgfdq("carriage return, ignore", curr);
3024 proc.skip();
3025 break;
3026 }
3027 case '\\':
3028 {
3029 _filter_dquoted_backslash(proc);
3030 break;
3031 }
3032 default:
3033 {
3034 proc.copy();
3035 break;
3036 }
3037 }
3038 }
3039 _c4dbgfdq("after={}", _prs(proc.sofar()));
3040 return proc.result();
3041}
3042
3043#undef _c4dbgfdq
3044
3045
3046template<class EventHandler>
3048{
3049 FilterProcessorSrcDst proc(scalar, dst);
3050 return _filter_dquoted(proc);
3051}
3052
3053template<class EventHandler>
3055{
3057 return _filter_dquoted(proc);
3058}
3059
3060
3061//-----------------------------------------------------------------------------
3062//-----------------------------------------------------------------------------
3063//-----------------------------------------------------------------------------
3064// block filtering helpers
3065
3066C4_NO_INLINE inline size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept
3067{
3068 if(indentation + 1 > s.len)
3069 return npos;
3070 for(size_t i = s.len-indentation-1; i != size_t(-1); --i)
3071 {
3072 if(s.str[i] == '\n')
3073 {
3074 csubstr rem = s.sub(i + 1);
3075 size_t first = rem.first_not_of(' ');
3076 first = (first != npos) ? first : rem.len;
3077 if(first > indentation)
3078 return i;
3079 }
3080 }
3081 return npos;
3082}
3083
3084template<class EventHandler>
3085template<class FilterProcessor>
3086void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp, size_t indentation)
3087{
3088 _RYML_ASSERT_PARSE_(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP, m_evt_handler->m_curr->pos);
3089 _RYML_ASSERT_PARSE_(this->callbacks(), proc.rem().first_not_of(" \n\r") == npos, m_evt_handler->m_curr->pos);
3090
3091 // a debugging scaffold:
3092 #if 0
3093 #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3094 #else
3095 #define _c4dbgchomp(...)
3096 #endif
3097
3098 // advance to the last line having spaces beyond the indentation
3099 {
3100 size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
3101 if(last != npos)
3102 {
3103 _c4dbgchomp("found newline and larger indentation. last={}", last);
3104 last = proc.rpos + last + size_t(1) + indentation; // last started at to-be-read.
3105 _RYML_ASSERT_PARSE_(this->callbacks(), last <= proc.src.len, m_evt_handler->m_curr->pos);
3106 // remove indentation spaces, copy the rest
3107 while((proc.rpos < last) && proc.has_more_chars())
3108 {
3109 const char curr = proc.curr();
3110 _c4dbgchomp("curr='{}'", _c4prc(curr));
3111 switch(curr)
3112 {
3113 case '\n':
3114 {
3115 _c4dbgchomp("newline! remlen={}", proc.rem().len);
3116 proc.copy();
3117 // are there spaces after the newline?
3118 csubstr at_next_line = proc.rem();
3119 if(at_next_line.begins_with(' '))
3120 {
3121 _c4dbgchomp("next line begins with spaces. indentation={}", indentation);
3122 // there are spaces.
3123 size_t first_non_space = at_next_line.first_not_of(' ');
3124 _c4dbgchomp("first_non_space={}", first_non_space);
3125 if(first_non_space == npos)
3126 {
3127 _c4dbgchomp("{} spaces, to the end", at_next_line.len);
3128 first_non_space = at_next_line.len;
3129 }
3130 if(first_non_space <= indentation)
3131 {
3132 _c4dbgchomp("skip spaces={}<=indentation={}", first_non_space, indentation);
3133 proc.skip(first_non_space);
3134 }
3135 else
3136 {
3137 _c4dbgchomp("skip indentation={}<spaces={}", indentation, first_non_space);
3138 proc.skip(indentation);
3139 // copy the spaces after the indentation
3140 _c4dbgchomp("copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
3141 proc.copy(first_non_space - indentation);
3142 }
3143 }
3144 break;
3145 }
3146 case '\r':
3147 proc.skip();
3148 break;
3149 }
3150 }
3151 }
3152 }
3153
3154 // from now on, we only have line ends (or indentation spaces)
3155 switch(chomp)
3156 {
3157 case CHOMP_CLIP:
3158 {
3159 bool had_one = false;
3160 while(proc.has_more_chars())
3161 {
3162 const char curr = proc.curr();
3163 _c4dbgchomp("CLIP: '{}'", _c4prc(curr));
3164 switch(curr)
3165 {
3166 case '\n':
3167 {
3168 _c4dbgchomp("copy newline!", curr);
3169 proc.copy();
3170 proc.set_at_end();
3171 had_one = true;
3172 break;
3173 }
3174 case ' ':
3175 case '\r':
3176 _c4dbgchomp("skip!", curr);
3177 proc.skip();
3178 break;
3179 }
3180 }
3181 if(!had_one) // there were no newline characters. add one.
3182 {
3183 _c4dbgchomp("chomp=CLIP: add missing newline @{}", proc.wpos);
3184 proc.set('\n');
3185 }
3186 break;
3187 }
3188 case CHOMP_KEEP:
3189 {
3190 _c4dbgchomp("chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
3191 while(proc.has_more_chars())
3192 {
3193 const char curr = proc.curr();
3194 _c4dbgchomp("KEEP: '{}'", _c4prc(curr));
3195 switch(curr)
3196 {
3197 case '\n':
3198 _c4dbgchomp("copy newline!", curr);
3199 proc.copy();
3200 break;
3201 case ' ':
3202 case '\r':
3203 _c4dbgchomp("skip!", curr);
3204 proc.skip();
3205 break;
3206 }
3207 }
3208 break;
3209 }
3210 case CHOMP_STRIP:
3211 {
3212 _c4dbgchomp("chomp=STRIP: strip {} characters", proc.rem().len);
3213 // nothing to do!
3214 break;
3215 }
3216 }
3217
3218 #undef _c4dbgchomp
3219}
3220
3221
3222// a debugging scaffold:
3223#if 0
3224#define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3225#else
3226#define _c4dbgfb(...)
3227#endif
3228
3229template<class EventHandler>
3230template<class FilterProcessor>
3231void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc, size_t indentation)
3232{
3233 csubstr rem = proc.rem(); // remaining
3234 if(rem.len)
3235 {
3236 size_t first = rem.first_not_of(' ');
3237 if(first != npos)
3238 {
3239 _c4dbgfb("{} spaces follow before next nonws character", first);
3240 if(first < indentation)
3241 {
3242 _c4dbgfb("skip {}<{} spaces from indentation", first, indentation);
3243 proc.skip(first);
3244 }
3245 else
3246 {
3247 _c4dbgfb("skip {} spaces from indentation", indentation);
3248 proc.skip(indentation);
3249 }
3250 }
3251 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
3252 else
3253 {
3254 _c4dbgfb("all spaces to the end: {} spaces", first);
3255 first = rem.len;
3256 if(first)
3257 {
3258 if(first < indentation)
3259 {
3260 _c4dbgfb("skip everything", first);
3261 proc.skip(proc.src.len - proc.rpos);
3262 }
3263 else
3264 {
3265 _c4dbgfb("skip {} spaces from indentation", indentation);
3266 proc.skip(indentation);
3267 }
3268 }
3269 }
3270 #endif
3271 }
3272}
3273
3274template<class EventHandler>
3275template<class FilterProcessor>
3276size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
3277{
3278 csubstr contents = proc.src.trimr(" \n\r");
3279 _c4dbgfb("ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
3280 if(!contents.len)
3281 {
3282 _c4dbgfb("ws: all whitespace: len={}", proc.src.len);
3283 if(chomp == CHOMP_KEEP && proc.src.len)
3284 {
3285 _c4dbgfb("ws: chomp=KEEP all {} newlines", proc.src.count('\n'));
3286 while(proc.has_more_chars())
3287 {
3288 const char curr = proc.curr();
3289 if(curr == '\n')
3290 proc.copy();
3291 else
3292 proc.skip();
3293 }
3294 if(!proc.wpos)
3295 {
3296 proc.set('\n');
3297 }
3298 }
3299 }
3300 return contents.len;
3301}
3302
3303template<class EventHandler>
3304template<class FilterProcessor>
3305size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc, size_t contents_len)
3306{
3307 _c4dbgfb("contents_len={}", contents_len);
3308
3309 _RYML_ASSERT_PARSE_(this->callbacks(), contents_len > 0u, m_evt_handler->m_curr->pos);
3310
3311 // extend contents to just before the first newline at the end,
3312 // in case it is preceded by spaces
3313 size_t firstnewl = proc.src.first_of('\n', contents_len);
3314 if(firstnewl != npos)
3315 {
3316 contents_len = firstnewl;
3317 _c4dbgfb("contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3318 }
3319 else
3320 {
3321 contents_len = proc.src.len;
3322 _c4dbgfb("contents_len={} <--- src.len={}", contents_len, proc.src.len);
3323 }
3324
3325 return contents_len;
3326}
3327
3328#undef _c4dbgfb
3329
3330
3331//-----------------------------------------------------------------------------
3332//-----------------------------------------------------------------------------
3333//-----------------------------------------------------------------------------
3334
3335// a debugging scaffold:
3336#if 0
3337#define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3338#else
3339#define _c4dbgfbl(...)
3340#endif
3341
3342template<class EventHandler>
3343template<class FilterProcessor>
3344auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3345{
3346 _c4dbgfbl("indentation={} before={}", indentation, _prs(proc.src));
3347
3348 size_t contents_len = _handle_all_whitespace(proc, chomp);
3349 if(!contents_len)
3350 return proc.result();
3351
3352 contents_len = _extend_to_chomp(proc, contents_len);
3353
3354 _c4dbgfbl("to filter={}", _prs(proc.src.first(contents_len)));
3355
3356 _filter_block_indentation(proc, indentation);
3357
3358 // now filter the bulk
3359 while(proc.has_more_chars(/*maxpos*/contents_len))
3360 {
3361 const char curr = proc.curr();
3362 _c4dbgfbl("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3363 switch(curr)
3364 {
3365 case '\n':
3366 {
3367 _c4dbgfbl("found newline. skip indentation on the next line", curr);
3368 proc.copy(); // copy the newline
3369 _filter_block_indentation(proc, indentation);
3370 break;
3371 }
3372 case '\r':
3373 proc.skip();
3374 break;
3375 default:
3376 proc.copy();
3377 break;
3378 }
3379 }
3380
3381 _c4dbgfbl("before chomp: #tochomp={} sofar={}", proc.rem().len, _prs(proc.sofar()));
3382
3383 _filter_chomp(proc, chomp, indentation);
3384
3385 _c4dbgfbl("final={}", _prs(proc.sofar()));
3386
3387 return proc.result();
3388}
3389
3390#undef _c4dbgfbl
3391
3392template<class EventHandler>
3393FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3394{
3395 FilterProcessorSrcDst proc(scalar, dst);
3396 return _filter_block_literal(proc, indentation, chomp);
3397}
3398
3399template<class EventHandler>
3400FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3401{
3402 FilterProcessorInplaceEndExtending proc(scalar, cap);
3403 return _filter_block_literal(proc, indentation, chomp);
3404}
3405
3406
3407//-----------------------------------------------------------------------------
3408//-----------------------------------------------------------------------------
3409//-----------------------------------------------------------------------------
3410
3411// a debugging scaffold:
3412#if 0
3413#define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3414#else
3415#define _c4dbgfbf(...)
3416#endif
3417
3418
3419template<class EventHandler>
3420template<class FilterProcessor>
3421void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3422{
3423 _filter_block_indentation(proc, indentation);
3424 while(proc.has_more_chars(len))
3425 {
3426 const char curr = proc.curr();
3427 _c4dbgfbf("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3428 switch(curr)
3429 {
3430 case '\n':
3431 _c4dbgfbf("newline.", curr);
3432 proc.copy();
3433 _filter_block_indentation(proc, indentation);
3434 break;
3435 case '\r':
3436 proc.skip();
3437 break;
3438 case ' ':
3439 case '\t':
3440 {
3441 size_t first = proc.rem().first_not_of(" \t");
3442 _c4dbgfbf("space. first={}", first);
3443 if(first == npos)
3444 first = proc.rem().len;
3445 _c4dbgfbf("... indentation increased to {}", first);
3446 _filter_block_folded_indented_block(proc, indentation, len, first);
3447 break;
3448 }
3449 default:
3450 _c4dbgfbf("newl leading: not space, not newline. stop.", 0);
3451 return;
3452 }
3453 }
3454}
3455
3456template<class EventHandler>
3457template<class FilterProcessor>
3458size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc, size_t num_newl, size_t wpos_at_first_newl)
3459{
3460 switch(num_newl)
3461 {
3462 case 1u:
3463 _c4dbgfbf("... this is the first newline. turn into space. wpos={}", proc.wpos);
3464 wpos_at_first_newl = proc.wpos;
3465 proc.skip();
3466 proc.set(' ');
3467 break;
3468 case 2u:
3469 _c4dbgfbf("... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3470 _RYML_ASSERT_PARSE_(this->callbacks(), wpos_at_first_newl != npos, m_evt_handler->m_curr->pos);
3471 _RYML_ASSERT_PARSE_(this->callbacks(), proc.sofar()[wpos_at_first_newl] == ' ', m_evt_handler->m_curr->pos);
3472 _RYML_ASSERT_PARSE_(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos, m_evt_handler->m_curr->pos);
3473 proc.skip();
3474 proc.set_at(wpos_at_first_newl, '\n');
3475 _RYML_ASSERT_PARSE_(this->callbacks(), proc.sofar()[wpos_at_first_newl] == '\n', m_evt_handler->m_curr->pos);
3476 break;
3477 default:
3478 _c4dbgfbf("... subsequent newline (num_newl={}). copy", num_newl);
3479 proc.copy();
3480 break;
3481 }
3482 return wpos_at_first_newl;
3483}
3484
3485template<class EventHandler>
3486template<class FilterProcessor>
3487void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3488{
3489 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
3490 size_t num_newl = 0;
3491 size_t wpos_at_first_newl = npos;
3492 while(proc.has_more_chars(len))
3493 {
3494 const char curr = proc.curr();
3495 _c4dbgfbf("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3496 switch(curr)
3497 {
3498 case '\n':
3499 {
3500 _c4dbgfbf("newline. sofar={}", num_newl);
3501 // NOTE: vs2022-32bit-release builds were giving wrong
3502 // results in this block, if it was written as either
3503 // as a switch(num_newl) or its equivalent if-form.
3504 //
3505 // For this reason, we're using a dedicated function
3506 // (**_compress), which seems to work around the issue.
3507 //
3508 // The manifested problem was that somewhere between the
3509 // assignment to curr and this point, proc.wpos (the
3510 // write-position of the processor) jumped to npos, which
3511 // made the write wrap-around! To make things worse,
3512 // enabling prints via _c4dbgpf() and _c4dbgfbf() made the
3513 // problem go away!
3514 //
3515 // The only way to make the problem appear with prints
3516 // enabled was by disabling all prints in this function
3517 // (including in the block which was moved to the compress
3518 // function) and then selectively enabling only some of
3519 // those prints.
3520 //
3521 // This may be due to some bug in the cl-x86 optimizer; or
3522 // it may be triggered by some UB which may be
3523 // inadvertedly present in this function or in the filter
3524 // processor. This is despite our best efforts to weed out
3525 // any such UB problem: neither clang-tidy nor none of the
3526 // sanitizers, or gcc's -fanalyzer pointed to any problems
3527 // in this code.
3528 //
3529 // In the end, moving this block to a separate function
3530 // was the only way to bury the problem. But it may
3531 // resurface again, as The Undead, rising to from the
3532 // grave to haunt us with his terrible presence.
3533 //
3534 // We may have to revisit this. With a stake, and lots of
3535 // garlic.
3536 wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3537 _filter_block_indentation(proc, indentation);
3538 break;
3539 }
3540 case ' ':
3541 case '\t':
3542 {
3543 size_t first = proc.rem().first_not_of(" \t");
3544 _c4dbgfbf("space. first={}", first);
3545 if(first == npos)
3546 first = proc.rem().len;
3547 _c4dbgfbf("... indentation increased to {}", first);
3548 if(num_newl)
3549 {
3550 _c4dbgfbf("... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3551 proc.set_at(wpos_at_first_newl, '\n');
3552 }
3553 if(num_newl > 1u)
3554 {
3555 _c4dbgfbf("... add missing newline", wpos_at_first_newl);
3556 proc.set('\n');
3557 }
3558 _filter_block_folded_indented_block(proc, indentation, len, first);
3559 num_newl = 0;
3560 wpos_at_first_newl = npos;
3561 break;
3562 }
3563 case '\r':
3564 proc.skip();
3565 break;
3566 default:
3567 _c4dbgfbf("not space, not newline. stop.", 0);
3568 return;
3569 }
3570 }
3571}
3572
3573
3574template<class EventHandler>
3575template<class FilterProcessor>
3576void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len, size_t curr_indentation) noexcept
3577{
3578 _RYML_ASSERT_PARSE_(this->callbacks(), (proc.rem().first_not_of(" \t") == curr_indentation) || (proc.rem().first_not_of(" \t") == npos), m_evt_handler->m_curr->pos);
3579 if(curr_indentation)
3580 proc.copy(curr_indentation);
3581 while(proc.has_more_chars(len))
3582 {
3583 const char curr = proc.curr();
3584 _c4dbgfbf("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3585 switch(curr)
3586 {
3587 case '\n':
3588 {
3589 proc.copy();
3590 _filter_block_indentation(proc, indentation);
3591 csubstr rem = proc.rem();
3592 const size_t first = rem.first_not_of(' ');
3593 _c4dbgfbf("newline. firstns={}", first);
3594 if(first == 0)
3595 {
3596 const char c = rem[first];
3597 _c4dbgfbf("firstns={}='{}'", first, _c4prc(c));
3598 if(c != '\n' && c != '\r')
3599 {
3600 _c4dbgfbf("done with indented block", first);
3601 goto endloop;
3602 }
3603 }
3604 else if(first != npos)
3605 {
3606 proc.copy(first);
3607 _c4dbgfbf("copy all {} spaces", first);
3608 }
3609 break;
3610 }
3611 break;
3612 case '\r':
3613 proc.skip();
3614 break;
3615 default:
3616 proc.copy();
3617 break;
3618 }
3619 }
3620 endloop:
3621 return;
3622}
3623
3624
3625template<class EventHandler>
3626template<class FilterProcessor>
3627auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3628{
3629 _c4dbgfbf("indentation={} before={}", indentation, _prs(proc.src));
3630
3631 size_t contents_len = _handle_all_whitespace(proc, chomp);
3632 if(!contents_len)
3633 return proc.result();
3634
3635 contents_len = _extend_to_chomp(proc, contents_len);
3636
3637 _c4dbgfbf("to filter={}", _prs(proc.src.first(contents_len)));
3638
3639 _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3640
3641 // now filter the bulk
3642 while(proc.has_more_chars(/*maxpos*/contents_len))
3643 {
3644 const char curr = proc.curr();
3645 _c4dbgfbf("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3646 switch(curr)
3647 {
3648 case '\n':
3649 {
3650 _c4dbgfbf("found newline", curr);
3651 _filter_block_folded_newlines(proc, indentation, contents_len);
3652 break;
3653 }
3654 case '\r':
3655 proc.skip();
3656 break;
3657 default:
3658 proc.copy();
3659 break;
3660 }
3661 }
3662
3663 _c4dbgfbf("before chomp: #tochomp={} sofar={}", proc.rem().len, _prs(proc.sofar()));
3664
3665 _filter_chomp(proc, chomp, indentation);
3666
3667 _c4dbgfbf("final={}", proc.sofar().len, _prs(proc.sofar()));
3668
3669 return proc.result();
3670}
3671
3672#undef _c4dbgfbf
3673
3674template<class EventHandler>
3675FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3676{
3677 FilterProcessorSrcDst proc(scalar, dst);
3678 return _filter_block_folded(proc, indentation, chomp);
3679}
3680
3681template<class EventHandler>
3682FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3683{
3684 FilterProcessorInplaceEndExtending proc(scalar, cap);
3685 return _filter_block_folded(proc, indentation, chomp);
3686}
3687
3688
3689//-----------------------------------------------------------------------------
3690//-----------------------------------------------------------------------------
3691//-----------------------------------------------------------------------------
3692
3693template<class EventHandler>
3695{
3696 _c4dbgpf("filtering plain scalar: s={}", _prs(s));
3697 FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3698 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.valid(), m_evt_handler->m_curr->pos);
3699 _c4dbgpf("filtering plain scalar: success! s={}", _prs(r.get()));
3700 return r.get();
3701}
3702
3703//-----------------------------------------------------------------------------
3704
3705template<class EventHandler>
3707{
3708 _c4dbgpf("filtering squo scalar: s={}", _prs(s));
3709 FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3710 _RYML_ASSERT_PARSE_(this->callbacks(), r.valid(), m_evt_handler->m_curr->pos);
3711 _c4dbgpf("filtering squo scalar: success! s={}", _prs(r.get()));
3712 return r.get();
3713}
3714
3715
3716//-----------------------------------------------------------------------------
3717
3718template<class EventHandler>
3720{
3721 _c4dbgpf("filtering dquo scalar: s={}", _prs(s));
3722 FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3723 if(C4_LIKELY(r.valid()))
3724 {
3725 _c4dbgpf("filtering dquo scalar: success! s={}", _prs(r.get()));
3726 return r.get();
3727 }
3728 else
3729 {
3730 const size_t len = r.required_len();
3731 _c4dbgpf("filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3732 substr dst = _alloc_arena(len, &s);
3733 _c4dbgpf("filtering dquo scalar: dst.len={}", dst.len);
3734 if(dst.str)
3735 {
3736 _RYML_ASSERT_PARSE_(this->callbacks(), dst.len == len, m_evt_handler->m_curr->pos);
3737 FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3738 _c4dbgpf("filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3739 _RYML_ASSERT_PARSE_(this->callbacks(), rsd.required_len() <= len, m_evt_handler->m_curr->pos); // may be smaller!
3740 _RYML_CHECK_PARSE_(m_evt_handler->m_stack.m_callbacks, rsd.valid(), m_evt_handler->m_curr->pos);
3741 _c4dbgpf("filtering dquo scalar: success! s={}", _prs(rsd.get()));
3742 return rsd.get();
3743 }
3744 return dst;
3745 }
3746}
3747
3748
3749//-----------------------------------------------------------------------------
3750
3751template<class EventHandler>
3753{
3754 if(s.is_sub(_buf()))
3755 {
3756 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.str > _buf().str, m_evt_handler->m_curr->pos);
3757 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.str-1 >= _buf().str, m_evt_handler->m_curr->pos);
3758 if(s.len)
3759 memmove(s.str - 1, s.str, s.len);
3760 --s.str;
3761 s.str[s.len] = '\n';
3762 ++s.len;
3763 return s;
3764 }
3765 else
3766 {
3767 substr dst = _alloc_arena(s.len + 1, &s);
3768 if(s.len)
3769 memcpy(dst.str, s.str, s.len);
3770 dst[s.len] = '\n';
3771 return dst;
3772 }
3773}
3774
3775template<class EventHandler>
3776csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s, size_t indentation, BlockChomp_e chomp)
3777{
3778 _c4dbgpf("filtering block literal scalar: s={}", _prs(s));
3779 FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3780 csubstr result;
3781 if(C4_LIKELY(r.valid()))
3782 {
3783 result = r.get();
3784 }
3785 else
3786 {
3787 _c4dbgpf("filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3788 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3789 // this can only happen when adding a single newline in clip mode.
3790 // so we shift left the scalar by one place
3791 result = _move_scalar_left_and_add_newline(s);
3792 }
3793 _c4dbgpf("filtering block literal scalar: success! s={}", _prs(result));
3794 return result;
3795}
3796
3797
3798//-----------------------------------------------------------------------------
3799template<class EventHandler>
3800csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s, size_t indentation, BlockChomp_e chomp)
3801{
3802 _c4dbgpf("filtering block folded scalar: s={}", _prs(s));
3803 FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3804 csubstr result;
3805 if(C4_LIKELY(r.valid()))
3806 {
3807 result = r.get();
3808 }
3809 else
3810 {
3811 _c4dbgpf("filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3812 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3813 // this can only happen when adding a single newline in clip mode.
3814 // so we shift left the scalar by one place
3815 result = _move_scalar_left_and_add_newline(s);
3816 }
3817 _c4dbgpf("filtering block folded scalar: success! s={}", _prs(result));
3818 return result;
3819}
3820
3821
3822//-----------------------------------------------------------------------------
3823
3824template<class EventHandler>
3825csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3826{
3827 if(sc.needs_filter)
3828 {
3829 if(m_options.scalar_filtering())
3830 {
3831 return _filter_scalar_plain(sc.scalar, indentation);
3832 }
3833 else
3834 {
3835 _c4dbgp("plain scalar left unfiltered");
3836 m_evt_handler->mark_key_scalar_unfiltered();
3837 }
3838 }
3839 else
3840 {
3841 _c4dbgp("plain scalar doesn't need filtering");
3842 }
3843 return sc.scalar;
3844}
3845
3846template<class EventHandler>
3847csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3848{
3849 if(sc.needs_filter)
3850 {
3851 if(m_options.scalar_filtering())
3852 {
3853 return _filter_scalar_plain(sc.scalar, indentation);
3854 }
3855 else
3856 {
3857 _c4dbgp("plain scalar left unfiltered");
3858 m_evt_handler->mark_val_scalar_unfiltered();
3859 }
3860 }
3861 else
3862 {
3863 _c4dbgp("plain scalar doesn't need filtering");
3864 }
3865 return sc.scalar;
3866}
3867
3868
3869//-----------------------------------------------------------------------------
3870
3871template<class EventHandler>
3872csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3873{
3874 if(sc.needs_filter)
3875 {
3876 if(m_options.scalar_filtering())
3877 {
3878 return _filter_scalar_squot(sc.scalar);
3879 }
3880 else
3881 {
3882 _c4dbgp("squo key scalar left unfiltered");
3883 m_evt_handler->mark_key_scalar_unfiltered();
3884 }
3885 }
3886 else
3887 {
3888 _c4dbgp("squo key scalar doesn't need filtering");
3889 }
3890 return sc.scalar;
3891}
3892
3893template<class EventHandler>
3894csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3895{
3896 if(sc.needs_filter)
3897 {
3898 if(m_options.scalar_filtering())
3899 {
3900 return _filter_scalar_squot(sc.scalar);
3901 }
3902 else
3903 {
3904 _c4dbgp("squo val scalar left unfiltered");
3905 m_evt_handler->mark_val_scalar_unfiltered();
3906 }
3907 }
3908 else
3909 {
3910 _c4dbgp("squo val scalar doesn't need filtering");
3911 }
3912 return sc.scalar;
3913}
3914
3915
3916//-----------------------------------------------------------------------------
3917
3918template<class EventHandler>
3919csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3920{
3921 if(sc.needs_filter)
3922 {
3923 if(m_options.scalar_filtering())
3924 {
3925 return _filter_scalar_dquot(sc.scalar);
3926 }
3927 else
3928 {
3929 _c4dbgp("dquo scalar left unfiltered");
3930 m_evt_handler->mark_key_scalar_unfiltered();
3931 }
3932 }
3933 else
3934 {
3935 _c4dbgp("dquo scalar doesn't need filtering");
3936 }
3937 return sc.scalar;
3938}
3939
3940template<class EventHandler>
3941csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3942{
3943 if(sc.needs_filter)
3944 {
3945 if(m_options.scalar_filtering())
3946 {
3947 return _filter_scalar_dquot(sc.scalar);
3948 }
3949 else
3950 {
3951 _c4dbgp("dquo scalar left unfiltered");
3952 m_evt_handler->mark_val_scalar_unfiltered();
3953 }
3954 }
3955 else
3956 {
3957 _c4dbgp("dquo scalar doesn't need filtering");
3958 }
3959 return sc.scalar;
3960}
3961
3962
3963//-----------------------------------------------------------------------------
3964
3965template<class EventHandler>
3967{
3968 if(m_options.scalar_filtering())
3969 {
3970 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3971 }
3972 else
3973 {
3974 _c4dbgp("literal scalar left unfiltered");
3975 m_evt_handler->mark_key_scalar_unfiltered();
3976 }
3977 return sb.scalar;
3978}
3979
3980template<class EventHandler>
3982{
3983 if(m_options.scalar_filtering())
3984 {
3985 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3986 }
3987 else
3988 {
3989 _c4dbgp("literal scalar left unfiltered");
3990 m_evt_handler->mark_val_scalar_unfiltered();
3991 }
3992 return sb.scalar;
3993}
3994
3995
3996//-----------------------------------------------------------------------------
3997
3998template<class EventHandler>
4000{
4001 if(m_options.scalar_filtering())
4002 {
4003 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
4004 }
4005 else
4006 {
4007 _c4dbgp("folded scalar left unfiltered");
4008 m_evt_handler->mark_key_scalar_unfiltered();
4009 }
4010 return sb.scalar;
4011}
4012
4013template<class EventHandler>
4015{
4016 if(m_options.scalar_filtering())
4017 {
4018 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
4019 }
4020 else
4021 {
4022 _c4dbgp("folded scalar left unfiltered");
4023 m_evt_handler->mark_val_scalar_unfiltered();
4024 }
4025 return sb.scalar;
4026}
4027
4028
4029//-----------------------------------------------------------------------------
4030//-----------------------------------------------------------------------------
4031//-----------------------------------------------------------------------------
4032
4033#ifdef RYML_DBG // !!! <----------------------------------
4034
4035template<class EventHandler>
4036void ParseEngine<EventHandler>::add_flags(ParserFlag_t on)
4037{
4038 ParserState *s = m_evt_handler->m_curr;
4039 char buf1_[64], buf2_[64], buf3_[64];
4040 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4041 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4042 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
4043 _c4dbgpf("state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
4044 s->flags |= on;
4045}
4046
4047template<class EventHandler>
4048void ParseEngine<EventHandler>::addrem_flags(ParserFlag_t on, ParserFlag_t off)
4049{
4050 ParserState *s = m_evt_handler->m_curr;
4051 char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
4052 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4053 csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
4054 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
4055 csubstr buf4 = detail::_parser_flags_to_str(buf4_, (~off)&((s->flags|on)));
4056 _c4dbgpf("state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
4057 _RYML_ASSERT_BASIC((on & off) == ParserFlag_t(0));
4058 s->flags &= ~off;
4059 s->flags |= on;
4060}
4061
4062template<class EventHandler>
4063void ParseEngine<EventHandler>::rem_flags(ParserFlag_t off)
4064{
4065 ParserState *s = m_evt_handler->m_curr;
4066 char buf1_[64], buf2_[64], buf3_[64];
4067 csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
4068 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4069 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
4070 _c4dbgpf("state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
4071 s->flags &= ~off;
4072}
4073
4074inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf, ParserFlag_t flags)
4075{
4076 size_t pos = 0;
4077 bool gotone = false;
4078
4079 #define _prflag(fl) \
4080 if((flags & fl) == (fl)) \
4081 { \
4082 if(gotone) \
4083 { \
4084 if(pos + 1 < buf.len) \
4085 buf[pos] = '|'; \
4086 ++pos; \
4087 } \
4088 csubstr fltxt = #fl; \
4089 if(pos + fltxt.len <= buf.len) \
4090 memcpy(buf.str + pos, fltxt.str, fltxt.len); \
4091 pos += fltxt.len; \
4092 gotone = true; \
4093 }
4094
4095 _prflag(RTOP);
4096 _prflag(RUNK);
4097 _prflag(RMAP);
4098 _prflag(RSEQ);
4099 _prflag(RFLOW);
4100 _prflag(RBLCK);
4101 _prflag(QMRK);
4102 _prflag(RKEY);
4103 _prflag(RVAL);
4104 _prflag(RKCL);
4105 _prflag(RNXT);
4106 _prflag(SSCL);
4107 _prflag(QSCL);
4108 _prflag(RSET);
4109 _prflag(RDOC);
4110 _prflag(NDOC);
4111 _prflag(USTY);
4113
4114 #undef _prflag
4115
4116 if(pos == 0)
4117 if(buf.len > 0)
4118 buf[pos++] = '0';
4119
4120 _RYML_CHECK_BASIC(pos <= buf.len);
4121
4122 return buf.first(pos);
4123}
4124
4125#endif // RYML_DBG !!! <----------------------------------
4126
4127
4128//-----------------------------------------------------------------------------
4129//-----------------------------------------------------------------------------
4130//-----------------------------------------------------------------------------
4131
4132template<class EventHandler>
4134{
4135 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, loc.offset < _buf().len);
4136 return _buf().sub(loc.offset);
4137}
4138
4139template<class EventHandler>
4141{
4142 if(C4_UNLIKELY(val == nullptr))
4143 return {m_evt_handler->m_curr->pos.name, 0, 0, 0};
4144 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4145 // NOTE: if any of these checks fails, the parser needs to be
4146 // instantiated with locations enabled.
4147 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4148 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
4149 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets != nullptr);
4150 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
4151 // NOTE: the pointer needs to belong to the buffer that was used to parse.
4152 csubstr src = _buf();
4153 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, val != nullptr || src.str == nullptr);
4154 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr));
4155 // ok. search the first stored newline after the given ptr
4156 using lineptr_type = size_t const* C4_RESTRICT;
4157 lineptr_type lineptr = nullptr;
4158 size_t offset = (size_t)(val - src.begin());
4159 if(m_newline_offsets_size < RYML_LOCATIONS_SMALL_THRESHOLD)
4160 {
4161 // just do a linear search if the size is small.
4162 for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
4163 {
4164 if(*curr > offset)
4165 {
4166 lineptr = curr;
4167 break;
4168 }
4169 }
4170 }
4171 else
4172 {
4173 // do a bisection search if the size is not small.
4174 //
4175 // We could use std::lower_bound but this is simple enough and
4176 // spares the costly include of <algorithm>.
4177 size_t count = m_newline_offsets_size;
4178 lineptr = m_newline_offsets;
4179 while(count)
4180 {
4181 size_t step = count >> 1;
4182 lineptr_type it = lineptr + step;
4183 if(*it < offset)
4184 {
4185 lineptr = ++it;
4186 count -= step + 1;
4187 }
4188 else
4189 {
4190 count = step;
4191 }
4192 }
4193 }
4194 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
4195 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
4196 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
4197 Location loc;
4198 loc.name = m_evt_handler->m_curr->pos.name;
4199 loc.offset = offset;
4200 loc.line = (size_t)(lineptr - m_newline_offsets);
4201 if(lineptr > m_newline_offsets)
4202 loc.col = (offset - *(lineptr-1) - 1u);
4203 else
4204 loc.col = offset;
4205 return loc;
4206}
4207
4208template<class EventHandler>
4209void ParseEngine<EventHandler>::_prepare_locations()
4210{
4211 csubstr src = _buf();
4212 size_t numnewlines = 1u + src.count('\n');
4213 _resize_locations(numnewlines);
4214 m_newline_offsets_size = 0;
4215 for(size_t i = 0; i < src.len; i++)
4216 if(src.str[i] == '\n')
4217 m_newline_offsets[m_newline_offsets_size++] = i; // NOLINT
4218 m_newline_offsets[m_newline_offsets_size++] = src.len; // NOLINT
4219 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4220}
4221
4222template<class EventHandler>
4223void ParseEngine<EventHandler>::_resize_locations(size_t numnewlines)
4224{
4225 numnewlines = numnewlines >= 16 ? numnewlines : 16;
4226 if(numnewlines > m_newline_offsets_capacity)
4227 {
4228 if(m_newline_offsets)
4229 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
4230 m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets);
4231 m_newline_offsets_capacity = numnewlines;
4232 }
4233}
4234
4235template<class EventHandler>
4236bool ParseEngine<EventHandler>::_locations_dirty() const
4237{
4238 return !m_newline_offsets_size;
4239}
4240
4241
4242//-----------------------------------------------------------------------------
4243//-----------------------------------------------------------------------------
4244//-----------------------------------------------------------------------------
4245
4246template<class EventHandler>
4247void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4248{
4249 // don't assign to csubstr rem: otherwise, gcc12,13,14 -O3 -m32 misbuilds
4250 if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4251 {
4252 if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
4253 {
4254 _c4dbgpf("starts with whitespace: '{}'", _c4prc(m_evt_handler->m_curr->line_contents.rem.str[0]));
4255 _skipchars(" \t");
4256 }
4257 // comments
4258 if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
4259 {
4260 _c4dbgpf("it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4261 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4262 }
4263 }
4264}
4265
4266
4267template<class EventHandler>
4268void ParseEngine<EventHandler>::_handle_flow_line_beginning()
4269{
4270 _c4dbgpf("flow: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
4271 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
4272 if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
4273 {
4274 csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation);
4275 _c4dbgpf("flow: after indentation={}", _prs(trimmed));
4276 if(trimmed.len && trimmed.triml(" \t").len)
4277 {
4278 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
4279 _c4err("bad indentation");
4280 }
4281 }
4282}
4283
4284template<class EventHandler>
4285size_t ParseEngine<EventHandler>::_handle_block_skip_leading_whitespace()
4286{
4287 const size_t mark = m_evt_handler->m_curr->pos.offset;
4288 const size_t firstpos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
4289 _c4dbgpf("block: mark={} firstpos={}", mark, firstpos);
4290 if(firstpos != npos)
4291 {
4292 _c4dbgp("block: non empty line");
4293 _line_progressed(firstpos);
4294 return mark;
4295 }
4296 else
4297 {
4298 _c4dbgp("block: rest of line is whitespace");
4299 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4300 return npos;
4301 }
4302}
4303
4304template<class EventHandler>
4305void ParseEngine<EventHandler>::_handle_block_check_leading_tabs(size_t start_mark, size_t end_mark)
4306{
4307 _c4dbgpf("block: start_mark={} end_mark={}", start_mark, end_mark);
4308 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, end_mark >= start_mark, m_evt_handler->m_curr->pos);
4309 if(end_mark != start_mark)
4310 {
4311 csubstr leading = _buf().range(start_mark, end_mark);
4312 _c4dbgpf("block: leading[{}-{}]={}", start_mark, end_mark, _prs(leading, true));
4313 size_t pos = leading.find('\t');
4314 if(pos != npos)
4315 {
4316 size_t fno = leading.first_not_of(" \t");
4317 if(fno == npos || pos < fno)
4318 _c4err("invalid tab character to the left");
4319 }
4320 (void)leading;
4321 }
4322}
4323
4324
4325//-----------------------------------------------------------------------------
4326
4327
4328template<class EventHandler>
4329void ParseEngine<EventHandler>::_handle_colon()
4330{
4331 size_t curr = m_evt_handler->m_curr->pos.line;
4332 if(C4_UNLIKELY(m_prev_colon != npos && curr == m_prev_colon))
4333 {
4334 _c4dbgpf("colon: prevline={} currline={}", m_prev_colon, curr);
4335 _c4err("two colons on same line");
4336 }
4337 _c4dbgpf("colon: set prevline={}->{}", m_prev_colon, curr);
4338 m_prev_colon = curr;
4339}
4340
4341template<class EventHandler>
4342void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str)
4343{
4344 _c4dbgpf("store annotation[{}]: {}", dst->num_entries, _prs(str));
4345 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos); // NOLINT(bugprone-sizeof-expression)
4346 dst->annotations[dst->num_entries].str = str;
4347 dst->annotations[dst->num_entries].indentation = {};
4348 dst->annotations[dst->num_entries].line = {};
4349 dst->annotations[dst->num_entries].orig = {};
4350 ++dst->num_entries;
4351}
4352
4353template<class EventHandler>
4354void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line)
4355{
4356 _c4dbgpf("store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, _maybe_null_str(str), indentation, line);
4357 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos); // NOLINT(bugprone-sizeof-expression)
4358 if(C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line))
4359 {
4360 _c4err("parse error");
4361 }
4362 dst->annotations[dst->num_entries].str = str;
4363 dst->annotations[dst->num_entries].indentation = indentation;
4364 dst->annotations[dst->num_entries].line = line;
4365 dst->annotations[dst->num_entries].orig = {};
4366 ++dst->num_entries;
4367}
4368
4369template<class EventHandler>
4370void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line, csubstr orig)
4371{
4372 _c4dbgpf("store annotation[{}]: '{}'->'{}' indentation={} line={}", dst->num_entries, orig, _maybe_null_str(str), indentation, line);
4373 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos); // NOLINT(bugprone-sizeof-expression)
4374 if(C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line))
4375 {
4376 _c4err("parse error");
4377 }
4378 dst->annotations[dst->num_entries].str = str;
4379 dst->annotations[dst->num_entries].indentation = indentation;
4380 dst->annotations[dst->num_entries].line = line;
4381 dst->annotations[dst->num_entries].orig = orig;
4382 ++dst->num_entries;
4383}
4384
4385template<class EventHandler>
4386bool ParseEngine<EventHandler>::_annotations_require_key_container() const
4387{
4388 return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4389}
4390
4391template<class EventHandler>
4392bool ParseEngine<EventHandler>::_handle_annotations_before_unexpected_flow_token_rkey()
4393{
4394 if(!(m_pending_tags.num_entries | m_pending_anchors.num_entries))
4395 return false;
4396 _c4dbgpf("handle_annotations_before_unexpected_flow_comma_rkey, node={}", m_evt_handler->m_curr->node_id);
4397 if(m_pending_tags.num_entries)
4398 {
4399 _c4dbgpf("handle_annotations_before_unexpected_flow_comma_rkey, #tags={}", m_pending_tags.num_entries);
4400 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4401 {
4402 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4403 _clear_annotations(&m_pending_tags);
4404 }
4405 else
4406 {
4407 _c4err("too many tags");
4408 }
4409 }
4410 if(m_pending_anchors.num_entries)
4411 {
4412 _c4dbgpf("handle_annotations_before_unexpected_flow_comma, #anchors={}", m_pending_tags.num_entries);
4413 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4414 {
4415 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4416 _clear_annotations(&m_pending_anchors);
4417 }
4418 else
4419 {
4420 _c4err("too many anchors");
4421 }
4422 }
4423 m_evt_handler->set_key_scalar_plain_empty();
4424 m_evt_handler->set_val_scalar_plain_empty();
4425 return true;
4426}
4427
4428template<class EventHandler>
4429void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4430{
4431 _c4dbgpf("annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4432 if(m_pending_tags.num_entries)
4433 {
4434 _c4dbgpf("annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4435 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4436 {
4437 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4438 _clear_annotations(&m_pending_tags);
4439 }
4440 else
4441 {
4442 _c4err("too many tags"); // LCOV_EXCL_LINE
4443 }
4444 }
4445 if(m_pending_anchors.num_entries)
4446 {
4447 _c4dbgpf("annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4448 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4449 {
4450 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4451 _clear_annotations(&m_pending_anchors);
4452 }
4453 else
4454 {
4455 _c4err("too many anchors"); // LCOV_EXCL_LINE
4456 }
4457 }
4458}
4459
4460template<class EventHandler>
4461void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4462{
4463 _c4dbgpf("annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4464 if(m_pending_tags.num_entries)
4465 {
4466 _c4dbgpf("annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4467 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4468 {
4469 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4470 _clear_annotations(&m_pending_tags);
4471 }
4472 else
4473 {
4474 _c4err("too many tags");
4475 }
4476 }
4477 if(m_pending_anchors.num_entries)
4478 {
4479 _c4dbgpf("annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4480 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4481 {
4482 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4483 _clear_annotations(&m_pending_anchors);
4484 }
4485 else
4486 {
4487 _c4err("too many anchors");
4488 }
4489 }
4490}
4491
4492template<class EventHandler>
4493void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(size_t current_line)
4494{
4495 _c4dbgpf("annotations_before_start_mapblck, current_line={}", current_line);
4496 if(m_pending_tags.num_entries == 2)
4497 {
4498 _c4dbgp("2 tags, setting entry 0");
4499 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4500 }
4501 else if(m_pending_tags.num_entries == 1)
4502 {
4503 _c4dbgpf("1 tag. line={}, curr={}", m_pending_tags.annotations[0].line, current_line);
4504 if(m_pending_tags.annotations[0].line < current_line)
4505 {
4506 _c4dbgp("...tag is for the map. setting it.");
4507 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4508 _clear_annotations(&m_pending_tags);
4509 }
4510 }
4511 //
4512 if(m_pending_anchors.num_entries == 2)
4513 {
4514 _c4dbgp("2 anchors, setting entry 0");
4515 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4516 }
4517 else if(m_pending_anchors.num_entries == 1)
4518 {
4519 _c4dbgpf("1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line, current_line);
4520 if(m_pending_anchors.annotations[0].line < current_line)
4521 {
4522 _c4dbgp("...anchor is for the map. setting it.");
4523 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4524 _clear_annotations(&m_pending_anchors);
4525 }
4526 }
4527}
4528
4529template<class EventHandler>
4530void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4531{
4532 _c4dbgp("annotations_before_start_mapblck_as_key");
4533 switch(m_pending_tags.num_entries)
4534 {
4535 case 1u:
4536 _c4dbgpf("annotations_after_start_mapblck_as_key: 1 tag={} line={} currline=", _prs(m_pending_tags.annotations[0].str), m_pending_tags.annotations[0].line, m_evt_handler->m_curr->pos.line);
4537 if(m_pending_tags.annotations[0].line != m_evt_handler->m_curr->pos.line)
4538 {
4539 _c4dbgp("annotations_after_start_mapblck_as_key: is map tag");
4540 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4541 _clear_annotations(&m_pending_tags);
4542 }
4543 break;
4544 case 2u:
4545 _c4dbgpf("annotations_after_start_mapblck_as_key: 2 tags: {} -> {}", _prs(m_pending_tags.annotations[0].str), _prs(m_pending_tags.annotations[1].str));
4546 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4547 break;
4548 }
4549 switch(m_pending_anchors.num_entries)
4550 {
4551 case 1u:
4552 _c4dbgpf("annotations_after_start_mapblck_as_key: 1 anchor={} line={} currline=", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[0].line, m_evt_handler->m_curr->pos.line);
4553 if(m_pending_anchors.annotations[0].line != m_evt_handler->m_curr->pos.line)
4554 {
4555 _c4dbgp("annotations_after_start_mapblck_as_key: is map anchor");
4556 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4557 _clear_annotations(&m_pending_anchors);
4558 }
4559 break;
4560 case 2u:
4561 _c4dbgpf("annotations_after_start_mapblck_as_key: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4562 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4563 break;
4564 }
4565}
4566
4567template<class EventHandler>
4568void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(size_t key_indentation, size_t key_line)
4569{
4570 _c4dbgp("annotations_after_start_mapblck");
4571 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2, m_evt_handler->m_curr->pos);
4572 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2, m_evt_handler->m_curr->pos);
4573 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4574 {
4575 key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4576 switch(m_pending_tags.num_entries)
4577 {
4578 case 1u:
4579 _c4dbgpf("annotations_after_start_mapblck: 1 tag: {}", _prs(m_pending_tags.annotations[0].str));
4580 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4581 _clear_annotations(&m_pending_tags);
4582 break;
4583 case 2u:
4584 _c4dbgpf("annotations_after_start_mapblck: 2 tags: {} -> {}", _prs(m_pending_tags.annotations[0].str), _prs(m_pending_tags.annotations[1].str));
4585 m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4586 _clear_annotations(&m_pending_tags);
4587 break;
4588 }
4589 switch(m_pending_anchors.num_entries)
4590 {
4591 case 1u:
4592 _c4dbgpf("annotations_after_start_mapblck: 1 anchors: {} -> {}", m_pending_anchors.annotations[0].str);
4593 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4594 _clear_annotations(&m_pending_anchors);
4595 break;
4596 case 2u:
4597 _c4dbgpf("annotations_after_start_mapblck: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4598 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4599 _clear_annotations(&m_pending_anchors);
4600 break;
4601 }
4602 }
4603 _set_indentation(key_indentation);
4604}
4605
4606template<class EventHandler>
4607size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(size_t val_indentation, size_t val_line)
4608{
4609 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries | m_pending_anchors.num_entries, m_evt_handler->m_curr->pos);
4610 // select the left-most annotation on the max line
4611 auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4612 for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4613 {
4614 auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4615 if(ann.line > curr->line)
4616 curr = &ann;
4617 else if(ann.indentation < curr->indentation)
4618 curr = &ann;
4619 }
4620 for(size_t j = 0; j < m_pending_tags.num_entries; ++j)
4621 {
4622 auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4623 if(ann.line > curr->line)
4624 curr = &ann;
4625 else if(ann.indentation < curr->indentation)
4626 curr = &ann;
4627 }
4628 return curr->line < val_line ? val_indentation : curr->indentation;
4629}
4630
4631template<class EventHandler>
4632void ParseEngine<EventHandler>::_handle_keyref(csubstr alias)
4633{
4634 if(C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries)))
4635 m_evt_handler->set_key_ref(alias);
4636 else
4637 _c4err("aliases cannot have anchors or tags");
4638}
4639
4640template<class EventHandler>
4641void ParseEngine<EventHandler>::_handle_valref(csubstr alias)
4642{
4643 if(C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries)))
4644 m_evt_handler->set_val_ref(alias);
4645 else
4646 _c4err("aliases cannot have anchors or tags");
4647}
4648
4649template<class EventHandler>
4650csubstr ParseEngine<EventHandler>::_resolve_tag(csubstr tag)
4651{
4652 _c4dbgpf("resolving tag: {} curr_doc={}", _prs(tag), m_evt_handler->m_curr_doc);
4653 _c4assert(tag.is_sub(_buf()));
4654 TagCache::LookupResult ret = m_evt_handler->tag_cache().find(tag, m_evt_handler->m_curr_doc);
4655 if(ret)
4656 {
4657 _c4dbgpf("resolving tag: found in cache[{}]: {}", ret.pos, _prs(ret.resolved));
4658 return ret.resolved;
4659 }
4660 _c4dbgpf("resolving tag: not in cache: {} curr_doc={}", _prs(tag), m_evt_handler->m_curr_doc);
4661 size_t bufsz = 0;
4662 substr buf = m_evt_handler->arena_rem();
4663 TagDirectives const& C4_RESTRICT tds = m_evt_handler->tag_directives();
4664 csubstr ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4665 m_evt_handler->m_curr->pos,
4666 m_evt_handler->m_stack.m_callbacks);
4667 _c4dbgpf("resolving tag: bufsz={} ttag.len={} !!ttag.str={}", bufsz, ttag.len, !!ttag.str);
4668 _c4assert((bufsz > buf.len) == (!ttag.str));
4669 _c4assert(!!bufsz == (ttag.len == bufsz));
4670 // try again if the arena size was not enough
4671 if(!ttag.str)
4672 {
4673 _c4dbgpf("tag requires arena, but it was small. arena.len={} arena.slack={} tag.required={}", m_evt_handler->arena_rem().len, m_evt_handler->arena().len, ttag.len);
4674 _c4assert(ttag.len == bufsz);
4675 buf = _alloc_arena(bufsz, &tag);
4676 if(buf.str) // the alloc may fail eg with the ints handler
4677 {
4678 ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4679 m_evt_handler->m_curr->pos,
4680 m_evt_handler->m_stack.m_callbacks);
4681 }
4682 _c4assert(ttag.len == bufsz);
4683 _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4684 }
4685 else if(bufsz) // if we succeeded writing into the arena, grow it as needed
4686 {
4687 _c4dbgp("tag required arena. update size");
4688 _c4assert(ttag.len == bufsz);
4689 _c4assert(ttag.is_sub(buf));
4690 (void)_alloc_arena(bufsz);
4691 }
4692 C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4127) // conditional expression is constant
4693 if C4_IF_CONSTEXPR (EventHandler::requires_strings_on_buffers) // NOLINT
4694 {
4695 _c4dbgpf("handler requires tags in buffers. !!ttag.str={} in_arena={} in_src={}", !!ttag.str, ttag.is_sub(m_evt_handler->arena()), ttag.is_sub(_buf()));
4696 // is the resolved tag not in any of those buffers?
4697 if(ttag.str && !ttag.is_sub(m_evt_handler->arena()) && !ttag.is_sub(_buf()))
4698 {
4699 _c4dbgpf("copying resolved tag to arena: slack={} required={}", m_evt_handler->arena_rem().len, ttag.len);
4700 buf = _alloc_arena(ttag.len, &tag);
4701 if(buf.str) // the alloc may fail eg with the ints handler
4702 memcpy(buf.str, ttag.str, ttag.len);
4703 ttag.str = buf.str; // keep the current len!
4704 _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4705 }
4706 }
4707 C4_SUPPRESS_WARNING_MSVC_POP
4708 _c4dbgpf("resolved tag: {} --> [{}]~~~{}~~~", _prs(tag), ttag.len, _maybe_null_str(ttag));
4709 _c4assert(ttag.len > 0);
4710 // cache the hard-earned result!
4711 m_evt_handler->tag_cache().add(tag, ttag, m_evt_handler->m_curr_doc, ret.pos);
4712 return ttag;
4713}
4714
4715template<class EventHandler>
4716bool ParseEngine<EventHandler>::_validate_directive_yaml(csubstr *C4_RESTRICT directive, csubstr *C4_RESTRICT version) const
4717{
4718 _c4assert(directive->begins_with("%YAML"));
4719 size_t version_start = directive->first_not_of(" \t", 5);
4720 if(version_start != npos)
4721 {
4722 csubstr digits = "0123456789";
4723 size_t major_end = directive->first_not_of(digits, version_start);
4724 if(major_end != npos && directive->str[major_end] == '.') // single dot
4725 {
4726 size_t minor_end = directive->first_not_of(digits, major_end + 1);
4727 if(minor_end == npos)
4728 minor_end = directive->len;
4729 _set_first_strict(*directive, minor_end);
4730 *version = directive->range(version_start, minor_end);
4731 _c4dbgpf("%YAML: version={} full={}", *version, _prs(*directive, true));
4732 return true;
4733 }
4734 }
4735 return false;
4736}
4737
4738template<class EventHandler>
4739bool ParseEngine<EventHandler>::_validate_directive_tag(csubstr *C4_RESTRICT directive, csubstr *C4_RESTRICT handle, csubstr *C4_RESTRICT prefix) const
4740{
4741 _c4assert(directive->begins_with("%TAG"));
4742 csubstr whitespace = " \t";
4743 size_t handle_start = directive->first_not_of(whitespace, 4);
4744 if(handle_start != npos && directive->str[handle_start] == '!')
4745 {
4746 size_t handle_end = directive->first_of(whitespace, handle_start);
4747 if(handle_end != npos)
4748 {
4749 size_t prefix_start = directive->first_not_of(whitespace, handle_end);
4750 if(prefix_start != npos)
4751 {
4752 size_t prefix_end = directive->first_of(whitespace, prefix_start);
4753 if(prefix_end == npos)
4754 prefix_end = directive->len;
4755 _set_first_strict(*directive, prefix_end);
4756 *handle = directive->range(handle_start, handle_end);
4757 *prefix = directive->range(prefix_start, prefix_end);
4758 _c4dbgpf("%TAG: handle={} prefix={} full={}", *handle, *prefix, _prs(*directive, true));
4759 if(is_valid_tag_handle(*handle))
4760 return true;
4761 }
4762 }
4763 }
4764 return false;
4765}
4766
4767template<class EventHandler>
4768void ParseEngine<EventHandler>::_handle_directive(csubstr directive)
4769{
4770 _c4dbgpf("handle_directive: rem={}", _prs(directive, true));
4771 _c4assert(m_evt_handler->m_curr->line_contents.rem.begins_with('%'));
4772 _c4assert(directive.str == m_evt_handler->m_curr->line_contents.rem.str);
4773 const char *err = nullptr;
4774 csubstr rem;
4775 size_t pos;
4776 auto isdirective = [](csubstr str, csubstr dir) {
4777 if(str.begins_with(dir))
4778 {
4779 csubstr rest = str.sub(dir.len);
4780 return (!rest.len || rest.str[0] == ' ' || rest.str[0] == '\t');
4781 }
4782 return false;
4783 };
4784 if(isdirective(directive, "%TAG"))
4785 {
4786 csubstr handle;
4787 csubstr prefix;
4788 if(C4_UNLIKELY(!_validate_directive_tag(&directive, &handle, &prefix)))
4789 {
4790 err = "invalid %TAG directive";
4791 goto directive_error; // NOLINT
4792 }
4793 m_evt_handler->add_directive_tag(handle, prefix);
4794 }
4795 else if(isdirective(directive, "%YAML"))
4796 {
4798 if(C4_UNLIKELY(!_validate_directive_yaml(&directive, &version)))
4799 {
4800 err = "invalid %YAML directive";
4801 goto directive_error; // NOLINT
4802 }
4803 if(C4_UNLIKELY(m_has_directives_yaml))
4804 {
4805 err = "multiple %YAML directives";
4806 goto directive_error; // NOLINT
4807 }
4808 m_has_directives_yaml = true;
4809 m_evt_handler->add_directive_yaml(version);
4810 }
4811 m_has_directives = true;
4812 rem = m_evt_handler->m_curr->line_contents.rem;
4813 pos = rem.first_not_of(" \t", directive.len);
4814 pos = pos != npos ? pos : rem.len;
4815 _line_progressed(pos);
4816 rem = rem.sub(pos);
4817 _c4dbgpf("handle_directive: rest={}", _prs(rem));
4818 if(C4_UNLIKELY(rem.len && !rem.begins_with('#')))
4819 {
4820 err = "invalid tokens after directive";
4821 goto directive_error; // NOLINT
4822 }
4823directive_error:
4824 if(C4_UNLIKELY(err != nullptr))
4825 _c4err(err);
4826}
4827
4828template<class EventHandler>
4829bool ParseEngine<EventHandler>::_handle_bom()
4830{
4831 const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4832 if(rem.len)
4833 {
4834 const csubstr rest = rem.sub(1);
4835 // https://yaml.org/spec/1.2.2/#52-character-encodings
4836 #define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f') // is the character ASCII?
4837 if(rem.begins_with(csubstr{"\x00\x00\xfe\xff", 4}) || (rem.begins_with(csubstr{"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
4838 {
4839 _c4dbgp("byte order mark: UTF32BE");
4840 _handle_bom(UTF32BE);
4841 _line_progressed(4);
4842 m_bom_len = 4;
4843 return true;
4844 }
4845 else if(rem.begins_with(csubstr{"\xff\xfe\x00\x00", 4}) || (rest.begins_with(csubstr{"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
4846 {
4847 _c4dbgp("byte order mark: UTF32LE");
4848 _handle_bom(UTF32LE);
4849 _line_progressed(4);
4850 m_bom_len = 4;
4851 return true;
4852 }
4853 else if(rem.begins_with("\xfe\xff") || (rem.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
4854 {
4855 _c4dbgp("byte order mark: UTF16BE");
4856 _handle_bom(UTF16BE);
4857 _line_progressed(2);
4858 m_bom_len = 2;
4859 return true;
4860 }
4861 else if(rem.begins_with("\xff\xfe") || (rest.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
4862 {
4863 _c4dbgp("byte order mark: UTF16LE");
4864 _handle_bom(UTF16LE);
4865 _line_progressed(2);
4866 m_bom_len = 2;
4867 return true;
4868 }
4869 else if(rem.begins_with("\xef\xbb\xbf"))
4870 {
4871 _c4dbgp("byte order mark: UTF8");
4872 _handle_bom(UTF8);
4873 _line_progressed(3);
4874 m_bom_len = 3;
4875 return true;
4876 }
4877 #undef _rymlisascii
4878 }
4879 return false;
4880}
4881
4882template<class EventHandler>
4883void ParseEngine<EventHandler>::_handle_bom(Encoding_e enc)
4884{
4885 if(m_encoding == NOBOM)
4886 {
4887 if(enc == UTF8 || /*beginning of file*/(m_evt_handler->m_curr->line_contents.rem.str == _buf().str))
4888 m_encoding = enc;
4889 else
4890 _c4err("non-UTF8 byte order mark can appear only at the beginning of the file");
4891 }
4892 else if(enc != m_encoding)
4893 {
4894 _c4err("byte order mark can only be set once");
4895 }
4896}
4897
4898
4899//-----------------------------------------------------------------------------
4900
4901template<class EventHandler>
4902void ParseEngine<EventHandler>::_handle_seq_json()
4903{
4904seqjson_start:
4905 _c4dbgpf("handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4906
4907 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
4908 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ), m_evt_handler->m_curr->pos);
4909 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
4910 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT), m_evt_handler->m_curr->pos);
4911 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT), m_evt_handler->m_curr->pos);
4912
4913 _handle_flow_skip_whitespace();
4914 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4915 if(!rem.len)
4916 goto seqjson_again;
4917
4918 if(has_any(RVAL))
4919 {
4920 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
4921 const char first = rem.str[0];
4922 _c4dbgpf("mapjson[RVAL]: '{}'", first);
4923 switch(first)
4924 {
4925 case '"':
4926 {
4927 _c4dbgp("seqjson[RVAL]: scanning double-quoted scalar");
4928 ScannedScalar sc = _scan_scalar_dquot();
4929 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4930 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4931 addrem_flags(RNXT, RVAL);
4932 break;
4933 }
4934 case '[':
4935 {
4936 _c4dbgp("seqjson[RVAL]: start child seqjson");
4937 addrem_flags(RNXT, RVAL);
4938 m_evt_handler->begin_seq_val_flow();
4939 addrem_flags(RVAL, RNXT);
4940 _line_progressed(1);
4941 break;
4942 }
4943 case '{':
4944 {
4945 _c4dbgp("seqjson[RVAL]: start child mapjson");
4946 addrem_flags(RNXT, RVAL);
4947 m_evt_handler->begin_map_val_flow();
4948 addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
4949 _line_progressed(1);
4950 goto seqjson_finish;
4951 }
4952 case ']': // this happens on a trailing comma like ", ]"
4953 {
4954 _c4dbgp("seqjson[RVAL]: end!");
4955 rem_flags(RSEQ);
4956 _end_seq_flow();
4957 _line_progressed(1);
4958 if(!has_all(RSEQ|RFLOW))
4959 goto seqjson_finish;
4960 break;
4961 }
4962 default:
4963 {
4964 ScannedScalar sc;
4965 if(_scan_scalar_seq_json(&sc))
4966 {
4967 _c4dbgp("seqjson[RVAL]: it's a plain scalar.");
4968 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4969 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4970 addrem_flags(RNXT, RVAL);
4971 }
4972 else
4973 {
4974 _c4err("parse error");
4975 }
4976 }
4977 }
4978 }
4979 else // RNXT
4980 {
4981 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
4982 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
4983 const char first = rem.str[0];
4984 _c4dbgpf("mapjson[RNXT]: '{}'", first);
4985 switch(first)
4986 {
4987 case ',':
4988 {
4989 _c4dbgp("seqjson[RNXT]: expect next val");
4990 addrem_flags(RVAL, RNXT);
4991 m_evt_handler->add_sibling();
4992 _line_progressed(1);
4993 break;
4994 }
4995 case ']':
4996 {
4997 _c4dbgp("seqjson[RNXT]: end!");
4998 _end_seq_flow();
4999 _line_progressed(1);
5000 goto seqjson_finish;
5001 }
5002 default:
5003 _c4err("parse error");
5004 }
5005 }
5006
5007 seqjson_again:
5008 _c4dbgt("seqjson: go again", 0);
5009 if(_finished_line())
5010 {
5011 if(C4_LIKELY(!_finished_file()))
5012 {
5013 _line_ended();
5014 _scan_line();
5016 }
5017 else
5018 {
5019 _c4err("missing terminating ]");
5020 }
5021 }
5022 goto seqjson_start;
5023
5024 seqjson_finish:
5025 _c4dbgp("seqjson: finish");
5026}
5027
5028
5029//-----------------------------------------------------------------------------
5030
5031template<class EventHandler>
5032void ParseEngine<EventHandler>::_handle_map_json()
5033{
5034mapjson_start:
5035 _c4dbgpf("handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5036
5037 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
5038 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
5039 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5040 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT), m_evt_handler->m_curr->pos);
5041 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT)), m_evt_handler->m_curr->pos);
5042
5043 _handle_flow_skip_whitespace();
5044 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5045 if(!rem.len)
5046 goto mapjson_again;
5047
5048 if(has_any(RKEY))
5049 {
5050 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5051 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5052 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5053 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5054 const char first = rem.str[0];
5055 _c4dbgpf("mapjson[RKEY]: '{}'", first);
5056 switch(first)
5057 {
5058 case '"':
5059 {
5060 _c4dbgp("mapjson[RKEY]: scanning double-quoted scalar");
5061 ScannedScalar sc = _scan_scalar_dquot();
5062 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5063 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5064 addrem_flags(RKCL, RKEY);
5065 break;
5066 }
5067 case '}': // this happens on a trailing comma like ", }"
5068 {
5069 _c4dbgp("mapjson[RKEY]: end!");
5070 _end_map_flow();
5071 _line_progressed(1);
5072 goto mapjson_finish;
5073 }
5074 default:
5075 _c4err("parse error");
5076 }
5077 }
5078 else if(has_any(RVAL))
5079 {
5080 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5081 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5082 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5083 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5084 const char first = rem.str[0];
5085 _c4dbgpf("mapjson[RVAL]: '{}'", first);
5086 switch(first)
5087 {
5088 case '"':
5089 {
5090 _c4dbgp("mapjson[RVAL]: scanning double-quoted scalar");
5091 ScannedScalar sc = _scan_scalar_dquot();
5092 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5093 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5094 addrem_flags(RNXT, RVAL);
5095 break;
5096 }
5097 case '[':
5098 {
5099 _c4dbgp("mapjson[RVAL]: start val seqjson");
5100 addrem_flags(RNXT, RVAL);
5101 m_evt_handler->begin_seq_val_flow();
5102 _set_indentation(m_evt_handler->m_parent->indref);
5103 addrem_flags(RSEQ|RVAL, RMAP|RNXT);
5104 _line_progressed(1);
5105 goto mapjson_finish;
5106 }
5107 case '{':
5108 {
5109 _c4dbgp("mapjson[RVAL]: start val mapjson");
5110 addrem_flags(RNXT, RVAL);
5111 m_evt_handler->begin_map_val_flow();
5112 _set_indentation(m_evt_handler->m_parent->indref);
5113 addrem_flags(RKEY, RNXT);
5114 _line_progressed(1);
5115 // keep going in this function
5116 break;
5117 }
5118 default:
5119 {
5120 ScannedScalar sc;
5121 if(_scan_scalar_map_json(&sc))
5122 {
5123 _c4dbgp("mapjson[RVAL]: plain scalar.");
5124 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5125 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5126 addrem_flags(RNXT, RVAL);
5127 }
5128 else
5129 {
5130 _c4err("parse error");
5131 }
5132 break;
5133 }
5134 }
5135 }
5136 else if(has_any(RKCL)) // read the key colon
5137 {
5138 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5139 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5140 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5141 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5142 const char first = rem.str[0];
5143 _c4dbgpf("mapjson[RKCL]: '{}'", first);
5144 if(first == ':')
5145 {
5146 _c4dbgp("mapjson[RKCL]: found the colon");
5147 addrem_flags(RVAL, RKCL);
5148 _line_progressed(1);
5149 }
5150 else
5151 {
5152 _c4err("parse error");
5153 }
5154 }
5155 else if(has_any(RNXT))
5156 {
5157 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5158 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5159 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5160 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5161 _c4dbgpf("mapjson[RNXT]: '{}'", rem.str[0]);
5162 if(rem.begins_with(','))
5163 {
5164 _c4dbgp("mapjson[RNXT]: expect next keyval");
5165 m_evt_handler->add_sibling();
5166 addrem_flags(RKEY, RNXT);
5167 _line_progressed(1);
5168 }
5169 else if(rem.begins_with('}'))
5170 {
5171 _c4dbgp("mapjson[RNXT]: end!");
5172 _end_map_flow();
5173 _line_progressed(1);
5174 goto mapjson_finish;
5175 }
5176 else
5177 {
5178 _c4err("parse error"); // LCOV_EXCL_LINE
5179 }
5180 }
5181
5182 mapjson_again:
5183 _c4dbgt("mapjson: go again", 0);
5184 if(_finished_line())
5185 {
5186 if(C4_LIKELY(!_finished_file()))
5187 {
5188 _line_ended();
5189 _scan_line();
5191 }
5192 else
5193 {
5194 _c4err("missing terminating }");
5195 }
5196 }
5197 goto mapjson_start;
5198
5199 mapjson_finish:
5200 _c4dbgp("mapjson: finish");
5201}
5202
5203
5204//-----------------------------------------------------------------------------
5205
5206template<class EventHandler>
5207void ParseEngine<EventHandler>::_handle_seq_imap()
5208{
5209seqimap_start:
5210 _c4dbgpf("handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5211
5212 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQIMAP), m_evt_handler->m_curr->pos);
5213 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5214 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT|QMRK|RKCL), m_evt_handler->m_curr->pos);
5215 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == has_all(RVAL) + has_all(RNXT) + has_all(QMRK) + has_all(RKCL), m_evt_handler->m_curr->pos);
5216 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3, m_evt_handler->m_curr->pos);
5217
5218 _handle_flow_skip_whitespace();
5219 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5220 if(!rem.len)
5221 goto seqimap_again;
5222
5223 if(has_any(RVAL))
5224 {
5225 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL), m_evt_handler->m_curr->pos);
5226 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5227 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5228 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5229 const char first = rem.str[0];
5230 _c4dbgpf("seqimap[RVAL]: '{}'", _c4prc(first));
5231 ScannedScalar sc;
5232 if(first == '\'')
5233 {
5234 _c4dbgp("seqimap[RVAL]: scanning single-quoted scalar");
5235 sc = _scan_scalar_squot();
5236 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5237 _handle_annotations_before_blck_val_scalar();
5238 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5239 _end_map_flow();
5240 goto seqimap_finish;
5241 }
5242 else if(first == '"')
5243 {
5244 _c4dbgp("seqimap[RVAL]: scanning double-quoted scalar");
5245 sc = _scan_scalar_dquot();
5246 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5247 _handle_annotations_before_blck_val_scalar();
5248 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5249 _end_map_flow();
5250 goto seqimap_finish;
5251 }
5252 // block scalars (ie | and >) cannot appear in flow containers
5253 else if(_scan_scalar_plain_map_flow(&sc))
5254 {
5255 _c4dbgp("seqimap[RVAL]: it's a scalar.");
5256 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5257 _handle_annotations_before_blck_val_scalar();
5258 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5259 _end_map_flow();
5260 goto seqimap_finish;
5261 }
5262 else if(first == '[')
5263 {
5264 _c4dbgp("seqimap[RVAL]: start child seqflow");
5265 addrem_flags(RNXT, RVAL);
5266 _handle_annotations_before_blck_val_scalar();
5267 m_evt_handler->begin_seq_val_flow();
5268 addrem_flags(RVAL, RNXT|RSEQIMAP);
5269 _set_indentation(m_evt_handler->m_parent->indref);
5270 _line_progressed(1);
5271 goto seqimap_finish;
5272 }
5273 else if(first == '{')
5274 {
5275 _c4dbgp("seqimap[RVAL]: start child mapflow");
5276 addrem_flags(RNXT, RVAL);
5277 _handle_annotations_before_blck_val_scalar();
5278 m_evt_handler->begin_map_val_flow();
5279 addrem_flags(RMAP|RKEY, RSEQ|RVAL|RSEQIMAP|RNXT);
5280 _set_indentation(m_evt_handler->m_parent->indref);
5281 _line_progressed(1);
5282 goto seqimap_finish;
5283 }
5284 else if(first == ',' || first == ']')
5285 {
5286 _c4dbgp("seqimap[RVAL]: finish without val.");
5287 _handle_annotations_before_blck_val_scalar();
5288 m_evt_handler->set_val_scalar_plain_empty();
5289 _end_map_flow();
5290 goto seqimap_finish;
5291 }
5292 else if(first == '*')
5293 {
5294 csubstr ref = _scan_ref_seq();
5295 _c4dbgpf("seqimap[RVAL]: ref! {}", _prs(ref));
5296 _handle_valref(ref);
5297 addrem_flags(RNXT, RVAL);
5298 }
5299 else if(first == '&')
5300 {
5301 csubstr anchor = _scan_anchor();
5302 _c4dbgpf("seqimap[RVAL]: anchor! {}", _prs(anchor));
5303 _add_annotation(&m_pending_anchors, anchor);
5304 }
5305 else if(first == '!')
5306 {
5307 csubstr tag = _scan_tag();
5308 _c4dbgpf("seqimap[RVAL]: tag! {}", _prs(tag));
5309 _add_annotation(&m_pending_tags, tag);
5310 }
5311 else
5312 {
5313 _c4err("parse error"); // LCOV_EXCL_LINE
5314 }
5315 }
5316 else if(has_any(RNXT))
5317 {
5318 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
5319 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5320 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5321 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5322 const char first = rem.str[0];
5323 _c4dbgpf("seqimap[RNXT]: '{}'", _c4prc(first));
5324 if(first == ',' || first == ']')
5325 {
5326 // we may get here because a map or a seq started and we
5327 // return later
5328 _c4dbgp("seqimap: done");
5329 _end_map_flow();
5330 goto seqimap_finish;
5331 }
5332 else
5333 {
5334 _c4err("parse error"); // LCOV_EXCL_LINE
5335 }
5336 }
5337 else if(has_any(QMRK))
5338 {
5339 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(QMRK), m_evt_handler->m_curr->pos);
5340 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5341 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5342 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5343 const char first = rem.str[0];
5344 _c4dbgpf("seqimap[QMRK]: '{}'", _c4prc(first));
5345 ScannedScalar sc;
5346 if(first == '\'')
5347 {
5348 _c4dbgp("seqimap[QMRK]: scanning single-quoted scalar");
5349 sc = _scan_scalar_squot();
5350 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5351 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5352 addrem_flags(RKCL, QMRK);
5353 goto seqimap_again;
5354 }
5355 else if(first == '"')
5356 {
5357 _c4dbgp("seqimap[QMRK]: scanning double-quoted scalar");
5358 sc = _scan_scalar_dquot();
5359 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5360 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5361 addrem_flags(RKCL, QMRK);
5362 goto seqimap_again;
5363 }
5364 // block scalars (ie | and >) cannot appear in flow containers
5365 else if(_scan_scalar_plain_map_flow(&sc))
5366 {
5367 _c4dbgp("seqimap[QMRK]: it's a scalar.");
5368 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5369 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5370 addrem_flags(RKCL, QMRK);
5371 goto seqimap_again;
5372 }
5373 else if(first == '[')
5374 {
5375 _c4dbgp("seqimap[QMRK]: start child seqflow");
5376 addrem_flags(RKCL, QMRK);
5377 m_evt_handler->begin_seq_key_flow();
5378 addrem_flags(RSEQ|RVAL, RKCL|RSEQIMAP);
5379 _set_indentation(m_evt_handler->m_parent->indref);
5380 _line_progressed(1);
5381 goto seqimap_finish;
5382 }
5383 else if(first == '{')
5384 {
5385 _c4dbgp("seqimap[QMRK]: start child mapflow");
5386 addrem_flags(RKCL, QMRK);
5387 m_evt_handler->begin_map_key_flow();
5388 addrem_flags(RMAP|RKEY, RSEQ|RKCL|RSEQIMAP);
5389 _set_indentation(m_evt_handler->m_parent->indref);
5390 _line_progressed(1);
5391 goto seqimap_finish;
5392 }
5393 else if(first == ',' || first == ']')
5394 {
5395 _c4dbgp("seqimap[QMRK]: finish without key.");
5396 m_evt_handler->set_key_scalar_plain_empty();
5397 m_evt_handler->set_val_scalar_plain_empty();
5398 _end_map_flow();
5399 goto seqimap_finish;
5400 }
5401 else if(first == '&')
5402 {
5403 csubstr anchor = _scan_anchor();
5404 _c4dbgp("seqimap[QMRK]: anchor!");
5405 m_evt_handler->set_key_anchor(anchor);
5406 }
5407 else if(first == '*')
5408 {
5409 csubstr ref = _scan_ref_seq();
5410 _c4dbgp("seqimap[QMRK]: ref!");
5411 _handle_keyref(ref);
5412 addrem_flags(RKCL, QMRK);
5413 }
5414 else
5415 {
5416 _c4err("parse error"); // LCOV_EXCL_LINE
5417 }
5418 }
5419 else if(has_any(RKCL))
5420 {
5421 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5422 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5423 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5424 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKCL), m_evt_handler->m_curr->pos);
5425 const char first = rem.str[0];
5426 _c4dbgpf("seqimap[RKCL]: '{}'", _c4prc(first));
5427 if(first == ':')
5428 {
5429 _c4dbgp("seqimap[RKCL]: found ':'");
5430 addrem_flags(RVAL, RKCL);
5431 _line_progressed(1);
5432 goto seqimap_again;
5433 }
5434 else if(first == ',' || first == ']')
5435 {
5436 _c4dbgp("seqimap[RKCL]: found ','. finish without val");
5437 m_evt_handler->set_val_scalar_plain_empty();
5438 _end_map_flow();
5439 goto seqimap_finish;
5440 }
5441 else
5442 {
5443 _c4err("parse error"); // LCOV_EXCL_LINE
5444 }
5445 }
5446
5447 seqimap_again:
5448 _c4dbgt("seqimap: go again", 0);
5449 if(_finished_line())
5450 {
5451 if(C4_LIKELY(!_finished_file()))
5452 {
5453 _line_ended();
5454 _scan_line();
5456 }
5457 else
5458 {
5459 _c4err("parse error");
5460 }
5461 }
5462 goto seqimap_start;
5463
5464 seqimap_finish:
5465 _c4dbgp("seqimap: finish");
5466}
5467
5468
5469//-----------------------------------------------------------------------------
5470
5471template<class EventHandler>
5472void ParseEngine<EventHandler>::_handle_seq_flow()
5473{
5474seqflow_start:
5475 _c4dbgpf("handle_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5476
5477 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5478 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ), m_evt_handler->m_curr->pos);
5479 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
5480 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT), m_evt_handler->m_curr->pos);
5481 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT), m_evt_handler->m_curr->pos);
5482 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
5483
5484 if(m_evt_handler->m_curr->at_line_beginning())
5485 {
5486 _handle_flow_line_beginning();
5487 }
5488
5489 _handle_flow_skip_whitespace();
5490 if(!m_evt_handler->m_curr->line_contents.rem.len)
5491 goto seqflow_again;
5492
5493 if(has_any(RVAL))
5494 {
5495 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5496 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5497 ScannedScalar sc;
5498 if(first == '\'')
5499 {
5500 _c4dbgp("seqflow[RVAL]: scanning single-quoted scalar");
5501 sc = _scan_scalar_squot();
5502 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5503 _handle_annotations_before_blck_val_scalar();
5504 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5505 addrem_flags(RNXT, RVAL);
5506 _mark_seqflow_val_end();
5507 }
5508 else if(first == '"')
5509 {
5510 _c4dbgp("seqflow[RVAL]: scanning double-quoted scalar");
5511 sc = _scan_scalar_dquot();
5512 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5513 _handle_annotations_before_blck_val_scalar();
5514 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5515 addrem_flags(RNXT, RVAL);
5516 _mark_seqflow_val_end();
5517 }
5518 // block scalars (ie | and >) cannot appear in flow containers
5519 else if(_scan_scalar_plain_seq_flow(&sc))
5520 {
5521 _c4dbgp("seqflow[RVAL]: it's a scalar.");
5522 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5523 _handle_annotations_before_blck_val_scalar();
5524 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5525 addrem_flags(RNXT, RVAL);
5526 _mark_seqflow_val_end();
5527 }
5528 else if(first == '[')
5529 {
5530 _c4dbgp("seqflow[RVAL]: start child seqflow");
5531 addrem_flags(RNXT, RVAL);
5532 _handle_annotations_before_blck_val_scalar();
5533 m_evt_handler->begin_seq_val_flow();
5534 _set_indentation(m_evt_handler->m_parent->indref);
5535 addrem_flags(RVAL, RNXT);
5536 _line_progressed(1);
5537 }
5538 else if(first == '{')
5539 {
5540 _c4dbgp("seqflow[RVAL]: start child mapflow");
5541 addrem_flags(RNXT, RVAL);
5542 _handle_annotations_before_blck_val_scalar();
5543 m_evt_handler->begin_map_val_flow();
5544 _set_indentation(m_evt_handler->m_parent->indref);
5545 addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
5546 _line_progressed(1);
5547 goto seqflow_finish;
5548 }
5549 else if(first == ']') // this happens on cases such as [] or [.., ]
5550 {
5551 _c4dbgp("seqflow[RVAL]: end!");
5552 if(m_pending_anchors.num_entries | m_pending_tags.num_entries)
5553 {
5554 _c4dbgp("seqflow[RVAL]: add pending annotations");
5555 _handle_annotations_before_blck_val_scalar();
5556 m_evt_handler->set_val_scalar_plain_empty();
5557 }
5558 _line_progressed(1);
5559 _end_seq_flow();
5560 goto seqflow_finish;
5561 }
5562 else if(first == '*')
5563 {
5564 csubstr ref = _scan_ref_seq();
5565 _c4dbgpf("seqflow[RVAL]: ref! {}", _prs(ref));
5566 _handle_valref(ref);
5567 addrem_flags(RNXT, RVAL);
5568 }
5569 else if(first == '&')
5570 {
5571 csubstr anchor = _scan_anchor();
5572 _c4dbgpf("seqflow[RVAL]: anchor! {}", _prs(anchor));
5573 _add_annotation(&m_pending_anchors, anchor);
5574 }
5575 else if(first == '!')
5576 {
5577 csubstr tag = _scan_tag();
5578 _c4dbgpf("seqflow[RVAL]: tag! {}", _prs(tag));
5579 _add_annotation(&m_pending_tags, tag);
5580 }
5581 else if(first == ':')
5582 {
5583 _c4dbgpf("seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5584 addrem_flags(RNXT, RVAL);
5585 m_evt_handler->begin_map_val_flow();
5586 _set_indentation(m_evt_handler->m_parent->indref);
5587 _handle_annotations_before_blck_key_scalar();
5588 m_evt_handler->set_key_scalar_plain_empty();
5589 addrem_flags(RSEQIMAP|RVAL, RSEQ|RNXT);
5590 _line_progressed(1);
5591 goto seqflow_finish;
5592 }
5593 else if(first == '?')
5594 {
5595 _c4dbgp("seqflow[RVAL]: start child mapflow, explicit key");
5596 addrem_flags(RNXT, RVAL);
5597 m_evt_handler->begin_map_val_flow();
5598 _set_indentation(m_evt_handler->m_parent->indref);
5599 addrem_flags(RSEQIMAP|QMRK, RSEQ|RNXT);
5600 _line_progressed(1);
5601 _maybe_skip_whitespace_tokens();
5602 goto seqflow_finish;
5603 }
5604 else if(first == ',')
5605 {
5606 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
5607 {
5608 _c4dbgp("seqflow[RVAL]: add pending annotations");
5609 _handle_annotations_before_blck_val_scalar();
5610 m_evt_handler->set_val_scalar_plain_empty();
5611 addrem_flags(RNXT, RVAL);
5612 _mark_seqflow_val_end();
5613 }
5614 else
5615 {
5616 _c4err("parse error");
5617 }
5618 }
5619 else
5620 {
5621 _c4err("parse error");
5622 }
5623 }
5624 else // RNXT
5625 {
5626 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
5627 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5628 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5629 if(first == ',')
5630 {
5631 _c4dbgp("seqflow[RNXT]: expect next val");
5632 addrem_flags(RVAL, RNXT);
5633 m_evt_handler->add_sibling();
5634 _line_progressed(1);
5635 if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
5636 {
5637 _c4err("parse error: invalid comment after comma");
5638 }
5639 _mark_seqflow_val_end();
5640 }
5641 else if(first == ']')
5642 {
5643 _c4dbgp("seqflow[RNXT]: end!");
5644 _line_progressed(1);
5645 _end_seq_flow();
5646 goto seqflow_finish;
5647 }
5648 else if(first == ':')
5649 {
5650 _c4dbgpf("seqflow[RNXT]: line@valend={} line@now={}", m_prev_val_end, m_evt_handler->m_curr->pos.line);
5651 if(m_prev_val_end != NONE && m_evt_handler->m_curr->pos.line == m_prev_val_end)
5652 {
5653 _c4dbgpf("seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5654 m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5655 _set_indentation(m_evt_handler->m_parent->indref);
5656 _line_progressed(1);
5657 addrem_flags(RSEQIMAP|RVAL, RNXT);
5658 goto seqflow_finish;
5659 }
5660 else
5661 {
5662 _c4err("parse error");
5663 }
5664 }
5665 else
5666 {
5667 _c4err("parse error");
5668 }
5669 }
5670
5671 seqflow_again:
5672 _c4dbgt("seqflow: go again", 0);
5673 if(_finished_line())
5674 {
5675 if(C4_LIKELY(!_finished_file()))
5676 {
5677 _line_ended();
5678 _scan_line();
5680 }
5681 else
5682 {
5683 _c4err("missing terminating ]");
5684 }
5685 }
5686 goto seqflow_start;
5687
5688 seqflow_finish:
5689 _c4dbgp("seqflow: finish");
5690}
5691
5692
5693//-----------------------------------------------------------------------------
5694
5695template<class EventHandler>
5696void ParseEngine<EventHandler>::_handle_map_flow()
5697{
5698mapflow_start:
5699 _c4dbgpf("handle_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5700
5701 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
5702 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
5703 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK), m_evt_handler->m_curr->pos);
5704 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)), m_evt_handler->m_curr->pos);
5705
5706 if(m_evt_handler->m_curr->at_line_beginning())
5707 {
5708 _handle_flow_line_beginning();
5709 }
5710
5711 _handle_flow_skip_whitespace();
5712 if(!m_evt_handler->m_curr->line_contents.rem.len)
5713 goto mapflow_again;
5714
5715 if(has_any(RKEY))
5716 {
5717 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5718 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5719 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5720 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5721 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5722 _c4dbgpf("mapflow[RKEY]: '{}'", first);
5723 ScannedScalar sc;
5724 if(first == '\'')
5725 {
5726 _c4dbgp("mapflow[RKEY]: scanning single-quoted scalar");
5727 sc = _scan_scalar_squot();
5728 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5729 _handle_annotations_before_blck_key_scalar();
5730 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5731 addrem_flags(RKCL, RKEY|QMRK);
5732 }
5733 else if(first == '"')
5734 {
5735 _c4dbgp("mapflow[RKEY]: scanning double-quoted scalar");
5736 sc = _scan_scalar_dquot();
5737 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5738 _handle_annotations_before_blck_key_scalar();
5739 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5740 addrem_flags(RKCL, RKEY|QMRK);
5741 }
5742 // block scalars (ie | and >) cannot appear in flow containers
5743 else if(_scan_scalar_plain_map_flow(&sc))
5744 {
5745 _c4dbgp("mapflow[RKEY]: plain scalar");
5746 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5747 _handle_annotations_before_blck_key_scalar();
5748 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5749 addrem_flags(RKCL, RKEY|QMRK);
5750 }
5751 else if(first == '?')
5752 {
5753 _c4dbgp("mapflow[RKEY]: explicit key");
5754 _handle_annotations_before_blck_key_scalar();
5755 addrem_flags(QMRK, RKEY);
5756 _line_progressed(1);
5757 _maybe_skip_whitespace_tokens();
5758 }
5759 else if(first == ':')
5760 {
5761 _c4dbgp("mapflow[RKEY]: setting empty key");
5762 _handle_annotations_before_blck_key_scalar();
5763 m_evt_handler->set_key_scalar_plain_empty();
5764 addrem_flags(RVAL, RKEY|QMRK);
5765 _line_progressed(1);
5766 _maybe_skip_whitespace_tokens();
5767 }
5768 else if(first == ',')
5769 {
5770 _c4dbgp("mapflow[RKEY]: comma!");
5771 if(!_handle_annotations_before_unexpected_flow_token_rkey())
5772 _c4err("unexpected comma");
5773 addrem_flags(RNXT, RKEY|QMRK);
5774 // keep going in this function
5775 }
5776 else if(first == '}') // this happens on a trailing comma like ", }"
5777 {
5778 _c4dbgp("mapflow[RKEY]: end!");
5779 (void)_handle_annotations_before_unexpected_flow_token_rkey();
5780 _line_progressed(1);
5781 _end_map_flow();
5782 goto mapflow_finish;
5783 }
5784 else if(first == '&')
5785 {
5786 csubstr anchor = _scan_anchor();
5787 _c4dbgpf("mapflow[RKEY]: key anchor! {}", _prs(anchor));
5788 _add_annotation(&m_pending_anchors, anchor);
5789 }
5790 else if(first == '!')
5791 {
5792 csubstr tag = _scan_tag();
5793 _c4dbgpf("mapflow[RKEY]: tag! {}", _prs(tag));
5794 _add_annotation(&m_pending_tags, tag);
5795 }
5796 else if(first == '*')
5797 {
5798 csubstr ref = _scan_ref_map();
5799 _c4dbgpf("mapflow[RKEY]: key ref! {}", _prs(ref));
5800 _handle_keyref(ref);
5801 addrem_flags(RKCL, RKEY);
5802 }
5803 else if(first == '[')
5804 {
5805 // RYML's tree cannot store container keys, but that's
5806 // handled inside the tree event handler. Other handler
5807 // types may be able to handle it.
5808 _c4dbgp("mapflow[RKEY]: start child seqflow (!)");
5809 _handle_annotations_before_blck_key_scalar();
5810 addrem_flags(RKCL, RKEY);
5811 m_evt_handler->begin_seq_key_flow();
5812 addrem_flags(RSEQ|RVAL, RMAP|RKCL);
5813 _set_indentation(m_evt_handler->m_parent->indref);
5814 _line_progressed(1);
5815 goto mapflow_finish;
5816 }
5817 else if(first == '{')
5818 {
5819 // RYML's tree cannot store container keys, but that's
5820 // handled inside the tree event handler. Other handler
5821 // types may be able to handle it.
5822 _c4dbgp("mapflow[RKEY]: start child mapflow (!)");
5823 _handle_annotations_before_blck_key_scalar();
5824 addrem_flags(RKCL, RKEY);
5825 m_evt_handler->begin_map_key_flow();
5826 addrem_flags(RKEY, RVAL|RKCL);
5827 _set_indentation(m_evt_handler->m_parent->indref);
5828 _line_progressed(1);
5829 // keep going in this function
5830 }
5831 else
5832 {
5833 _c4err("parse error"); // LCOV_EXCL_LINE
5834 }
5835 }
5836 else if(has_any(RKCL)) // read the key colon
5837 {
5838 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5839 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5840 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5841 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5842 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5843 _c4dbgpf("mapflow[RKCL]: '{}'", first);
5844 if(first == ':')
5845 {
5846 _c4dbgp("mapflow[RKCL]: found the colon");
5847 addrem_flags(RVAL, RKCL);
5848 _line_progressed(1);
5849 }
5850 else if(first == '}')
5851 {
5852 _c4dbgp("mapflow[RKCL]: end with missing val!");
5853 addrem_flags(RVAL, RKCL);
5854 m_evt_handler->set_val_scalar_plain_empty();
5855 _line_progressed(1);
5856 _end_map_flow();
5857 goto mapflow_finish;
5858 }
5859 else if(first == ',')
5860 {
5861 _c4dbgp("mapflow[RKCL]: got comma. val is missing");
5862 m_evt_handler->set_val_scalar_plain_empty();
5863 m_evt_handler->add_sibling();
5864 addrem_flags(RKEY, RKCL);
5865 _line_progressed(1);
5866 if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
5867 {
5868 _c4err("parse error: invalid comment after comma");
5869 }
5870 }
5871 else
5872 {
5873 _c4err("parse error");
5874 }
5875 }
5876 else if(has_any(RVAL))
5877 {
5878 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5879 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5880 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5881 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5882 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5883 _c4dbgpf("mapflow[RVAL]: '{}'", first);
5884 ScannedScalar sc;
5885 if(first == '\'')
5886 {
5887 _c4dbgp("mapflow[RVAL]: scanning single-quoted scalar");
5888 sc = _scan_scalar_squot();
5889 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5890 _handle_annotations_before_blck_val_scalar();
5891 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5892 addrem_flags(RNXT, RVAL);
5893 }
5894 else if(first == '"')
5895 {
5896 _c4dbgp("mapflow[RVAL]: scanning double-quoted scalar");
5897 sc = _scan_scalar_dquot();
5898 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5899 _handle_annotations_before_blck_val_scalar();
5900 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5901 addrem_flags(RNXT, RVAL);
5902 }
5903 // block scalars (ie | and >) cannot appear in flow containers
5904 else if(_scan_scalar_plain_map_flow(&sc))
5905 {
5906 _c4dbgp("mapflow[RVAL]: plain scalar.");
5907 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5908 _handle_annotations_before_blck_val_scalar();
5909 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5910 addrem_flags(RNXT, RVAL);
5911 }
5912 else if(first == '[')
5913 {
5914 _c4dbgp("mapflow[RVAL]: start val seqflow");
5915 addrem_flags(RNXT, RVAL);
5916 _handle_annotations_before_blck_val_scalar();
5917 m_evt_handler->begin_seq_val_flow();
5918 _set_indentation(m_evt_handler->m_parent->indref);
5919 addrem_flags(RSEQ|RVAL, RMAP|RNXT);
5920 _line_progressed(1);
5921 goto mapflow_finish;
5922 }
5923 else if(first == '{')
5924 {
5925 _c4dbgp("mapflow[RVAL]: start val mapflow");
5926 addrem_flags(RNXT, RVAL);
5927 _handle_annotations_before_blck_val_scalar();
5928 m_evt_handler->begin_map_val_flow();
5929 _set_indentation(m_evt_handler->m_parent->indref);
5930 addrem_flags(RKEY, RNXT);
5931 _line_progressed(1);
5932 // keep going in this function
5933 }
5934 else if(first == '}')
5935 {
5936 _c4dbgp("mapflow[RVAL]: end!");
5937 _handle_annotations_before_blck_val_scalar();
5938 m_evt_handler->set_val_scalar_plain_empty();
5939 _line_progressed(1);
5940 _end_map_flow();
5941 goto mapflow_finish;
5942 }
5943 else if(first == ',')
5944 {
5945 _c4dbgp("mapflow[RVAL]: empty val!");
5946 _handle_annotations_before_blck_val_scalar();
5947 m_evt_handler->set_val_scalar_plain_empty();
5948 addrem_flags(RNXT, RVAL);
5949 // keep going in this function
5950 }
5951 else if(first == '*')
5952 {
5953 csubstr ref = _scan_ref_map();
5954 _c4dbgpf("mapflow[RVAL]: key ref! {}", _prs(ref));
5955 _handle_valref(ref);
5956 addrem_flags(RNXT, RVAL);
5957 }
5958 else if(first == '&')
5959 {
5960 csubstr anchor = _scan_anchor();
5961 _c4dbgpf("mapflow[RVAL]: key anchor! {}", _prs(anchor));
5962 _add_annotation(&m_pending_anchors, anchor);
5963 }
5964 else if(first == '!')
5965 {
5966 csubstr tag = _scan_tag();
5967 _c4dbgpf("mapflow[RVAL]: tag! {}", _prs(tag));
5968 _add_annotation(&m_pending_tags, tag);
5969 }
5970 else
5971 {
5972 _c4err("parse error");
5973 }
5974 }
5975 else if(has_any(RNXT))
5976 {
5977 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5978 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5979 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5980 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5981 _c4dbgpf("mapflow[RNXT]: '{}'", m_evt_handler->m_curr->line_contents.rem.str[0]);
5982 if(m_evt_handler->m_curr->line_contents.rem.begins_with(','))
5983 {
5984 _c4dbgp("mapflow[RNXT]: expect next keyval");
5985 m_evt_handler->add_sibling();
5986 addrem_flags(RKEY, RNXT);
5987 _line_progressed(1);
5988 if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
5989 {
5990 _c4err("parse error: invalid comment after comma");
5991 }
5992 }
5993 else if(m_evt_handler->m_curr->line_contents.rem.begins_with('}'))
5994 {
5995 _c4dbgp("mapflow[RNXT]: end!");
5996 _line_progressed(1);
5997 _end_map_flow();
5998 goto mapflow_finish;
5999 }
6000 else
6001 {
6002 _c4err("parse error");
6003 }
6004 }
6005 else if(has_any(QMRK))
6006 {
6007 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
6008 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
6009 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
6010 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
6011 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6012 _c4dbgpf("mapflow[QMRK]: '{}'", first);
6013 ScannedScalar sc;
6014 if(first == '\'')
6015 {
6016 _c4dbgp("mapflow[QMRK]: scanning single-quoted scalar");
6017 sc = _scan_scalar_squot();
6018 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6019 _handle_annotations_before_blck_key_scalar();
6020 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6021 addrem_flags(RKCL, QMRK);
6022 }
6023 else if(first == '"')
6024 {
6025 _c4dbgp("mapflow[QMRK]: scanning double-quoted scalar");
6026 sc = _scan_scalar_dquot();
6027 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6028 _handle_annotations_before_blck_key_scalar();
6029 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6030 addrem_flags(RKCL, QMRK);
6031 }
6032 // block scalars (ie | and >) cannot appear in flow containers
6033 else if(_scan_scalar_plain_map_flow(&sc))
6034 {
6035 _c4dbgp("mapflow[QMRK]: plain scalar");
6036 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6037 _handle_annotations_before_blck_key_scalar();
6038 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6039 addrem_flags(RKCL, QMRK);
6040 }
6041 else if(first == ':')
6042 {
6043 _c4dbgp("mapflow[QMRK]: setting empty key");
6044 _handle_annotations_before_blck_key_scalar();
6045 m_evt_handler->set_key_scalar_plain_empty();
6046 addrem_flags(RVAL, QMRK);
6047 _line_progressed(1);
6048 _maybe_skip_whitespace_tokens();
6049 }
6050 else if(first == '}') // this happens on a trailing comma like ", }"
6051 {
6052 _c4dbgp("mapflow[QMRK]: end!");
6053 _handle_annotations_before_blck_key_scalar();
6054 m_evt_handler->set_key_scalar_plain_empty();
6055 m_evt_handler->set_val_scalar_plain_empty();
6056 _end_map_flow();
6057 _line_progressed(1);
6058 goto mapflow_finish;
6059 }
6060 else if(first == ',')
6061 {
6062 _c4dbgp("mapflow[QMRK]: empty key+val!");
6063 _handle_annotations_before_blck_key_scalar();
6064 m_evt_handler->set_key_scalar_plain_empty();
6065 m_evt_handler->set_val_scalar_plain_empty();
6066 addrem_flags(RNXT, QMRK);
6067 }
6068 else if(first == '&')
6069 {
6070 csubstr anchor = _scan_anchor();
6071 _c4dbgpf("mapflow[QMRK]: key anchor! {}", _prs(anchor));
6072 _add_annotation(&m_pending_anchors, anchor);
6073 }
6074 else if(first == '*')
6075 {
6076 csubstr ref = _scan_ref_map();
6077 _c4dbgpf("mapflow[QMRK]: key ref! {}", _prs(ref));
6078 _handle_keyref(ref);
6079 addrem_flags(RKCL, QMRK);
6080 }
6081 else if(first == '[')
6082 {
6083 // RYML's tree cannot store container keys, but that's
6084 // handled inside the tree sink. Other sink types may be
6085 // able to handle it.
6086 _c4dbgp("mapflow[QMRK]: start child seqflow (!)");
6087 addrem_flags(RKCL, QMRK);
6088 _handle_annotations_before_blck_key_scalar();
6089 m_evt_handler->begin_seq_key_flow();
6090 addrem_flags(RSEQ|RVAL, RMAP|RKCL);
6091 _set_indentation(m_evt_handler->m_parent->indref);
6092 _line_progressed(1);
6093 goto mapflow_finish;
6094 }
6095 else if(first == '{')
6096 {
6097 // RYML's tree cannot store container keys, but that's
6098 // handled inside the tree sink. Other sink types may be
6099 // able to handle it.
6100 _c4dbgp("mapflow[QMRK]: start child mapflow (!)");
6101 addrem_flags(RKCL, QMRK);
6102 _handle_annotations_before_blck_key_scalar();
6103 m_evt_handler->begin_map_key_flow();
6104 _set_indentation(m_evt_handler->m_parent->indref);
6105 addrem_flags(RKEY, RKCL);
6106 _line_progressed(1);
6107 // keep going in this function
6108 }
6109 else if(first == '!')
6110 {
6111 csubstr tag = _scan_tag();
6112 _c4dbgpf("mapflow[QMRK]: tag! {}", _prs(tag));
6113 _add_annotation(&m_pending_tags, tag);
6114 }
6115 else
6116 {
6117 _c4err("parse error"); // LCOV_EXCL_LINE
6118 }
6119 }
6120
6121 mapflow_again:
6122 _c4dbgt("mapflow: go again", 0);
6123 if(_finished_line())
6124 {
6125 if(C4_LIKELY(!_finished_file()))
6126 {
6127 _line_ended();
6128 _scan_line();
6130 }
6131 else
6132 {
6133 _c4err("missing terminating }");
6134 }
6135 }
6136 goto mapflow_start;
6137
6138 mapflow_finish:
6139 _c4dbgp("mapflow: finish");
6140}
6141
6142
6143//-----------------------------------------------------------------------------
6144
6145template<class EventHandler>
6146void ParseEngine<EventHandler>::_handle_seq_block()
6147{
6148seqblck_start:
6149 _c4dbgpf("handle_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6150
6151 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ), m_evt_handler->m_curr->pos);
6152 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RBLCK), m_evt_handler->m_curr->pos);
6153 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT), m_evt_handler->m_curr->pos);
6154 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RVAL) + has_any(RNXT)), m_evt_handler->m_curr->pos);
6155
6156 _maybe_skip_comment_strict();
6157 if(!m_evt_handler->m_curr->line_contents.rem.len)
6158 goto seqblck_again;
6159
6160 if(has_any(RVAL))
6161 {
6162 _c4dbgpf("seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
6163 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
6164 if(m_evt_handler->m_curr->at_line_beginning())
6165 {
6166 _c4dbgpf("seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6167 if(m_evt_handler->m_curr->indentation_ge_extra())
6168 {
6169 _c4dbgpf("seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
6170 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6171 if(!m_evt_handler->m_curr->line_contents.rem.len)
6172 goto seqblck_again;
6173 }
6174 else if(m_evt_handler->m_curr->indentation_lt_extra())
6175 {
6176 _c4dbgp("seqblck[RVAL]: smaller indentation than RVAL!");
6177 if(m_evt_handler->m_curr->indentation_eq())
6178 {
6179 _c4dbgp("seqblck[RVAL]: smaller indentation than RVAL!");
6180 _handle_annotations_before_blck_val_scalar();
6181 m_evt_handler->set_val_scalar_plain_empty();
6182 addrem_flags(RNXT, RVAL);
6183 goto seqblck_again;
6184 }
6185 else
6186 {
6187 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6188 _c4dbgp("seqblck[RVAL]: smaller indentation!");
6189 _handle_indentation_pop_from_block_seq();
6190 goto seqblck_finish;
6191 }
6192 }
6193 else if(m_evt_handler->m_curr->line_contents.indentation == npos)
6194 {
6195 _c4dbgp("seqblck[RVAL]: empty line!");
6196 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6197 goto seqblck_again;
6198 }
6199 }
6200 _RYML_ASSERT_PARSE_(callbacks(), m_evt_handler->m_curr->line_contents.rem.len, m_evt_handler->m_curr->pos);
6201 const size_t startmark = _handle_block_skip_leading_whitespace();
6202 _c4dbgpf("seqblck[RVAL]: startmark={}", startmark);
6203 if(startmark == npos)
6204 {
6205 _c4dbgp("seqblck[RVAL]: whitespace only");
6206 goto seqblck_again;
6207 }
6208 const size_t tabmark = _handle_block_get_whitespace_mark();
6209 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6210 _c4dbgpf("seqblck[RVAL]: first='{}' currcol={}", first, m_evt_handler->m_curr->pos.col - 1);
6211 const size_t startline = m_evt_handler->m_curr->pos.line;
6212 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col() - m_bom_len;
6213 ScannedScalar sc;
6214 if(first == '\'')
6215 {
6216 _c4dbgp("seqblck[RVAL]: single-quoted scalar");
6217 sc = _scan_scalar_squot();
6218 if(!_maybe_scan_following_colon())
6219 {
6220 _c4dbgp("seqblck[RVAL]: set as val");
6221 _handle_annotations_before_blck_val_scalar();
6222 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
6223 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6224 addrem_flags(RNXT, RVAL);
6225 }
6226 else
6227 {
6228 _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
6229 _handle_block_check_leading_tabs(startmark);
6230 addrem_flags(RNXT, RVAL);
6231 _handle_annotations_before_start_mapblck(startline);
6232 _handle_colon();
6233 m_evt_handler->begin_map_val_block();
6234 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6235 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6236 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6237 addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6238 _maybe_skip_whitespace_tokens();
6239 goto seqblck_finish;
6240 }
6241 }
6242 else if(first == '"')
6243 {
6244 _c4dbgp("seqblck[RVAL]: double-quoted scalar");
6245 sc = _scan_scalar_dquot();
6246 if(!_maybe_scan_following_colon())
6247 {
6248 _c4dbgp("seqblck[RVAL]: set as val");
6249 _handle_annotations_before_blck_val_scalar();
6250 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
6251 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6252 addrem_flags(RNXT, RVAL);
6253 }
6254 else
6255 {
6256 _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
6257 addrem_flags(RNXT, RVAL);
6258 _handle_block_check_leading_tabs(startmark);
6259 _handle_annotations_before_start_mapblck(startline);
6260 _handle_colon();
6261 m_evt_handler->begin_map_val_block();
6262 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6263 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6264 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6265 addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6266 _maybe_skip_whitespace_tokens();
6267 goto seqblck_finish;
6268 }
6269 }
6270 // block scalars can only appear as keys when in QMRK scope
6271 // (ie, after ? tokens), so no need to scan following colon in
6272 // here.
6273 else if(first == '|')
6274 {
6275 _c4dbgp("seqblck[RVAL]: block-literal scalar");
6276 ScannedBlock sb;
6277 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6278 _handle_annotations_before_blck_val_scalar();
6279 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6280 m_evt_handler->set_val_scalar_literal(maybe_filtered);
6281 addrem_flags(RNXT, RVAL);
6282 }
6283 else if(first == '>')
6284 {
6285 _c4dbgp("seqblck[RVAL]: block-folded scalar");
6286 ScannedBlock sb;
6287 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6288 _handle_annotations_before_blck_val_scalar();
6289 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6290 m_evt_handler->set_val_scalar_folded(maybe_filtered);
6291 addrem_flags(RNXT, RVAL);
6292 }
6293 else if(_scan_scalar_plain_seq_blck(&sc))
6294 {
6295 _c4dbgp("seqblck[RVAL]: plain scalar.");
6296 if(!_maybe_scan_following_colon())
6297 {
6298 _c4dbgp("seqblck[RVAL]: set as val");
6299 _handle_annotations_before_blck_val_scalar();
6300 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
6301 m_evt_handler->set_val_scalar_plain(maybe_filtered);
6302 addrem_flags(RNXT, RVAL);
6303 }
6304 else
6305 {
6306 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
6307 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6308 _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
6309 _handle_block_check_leading_tabs(startmark, tabmark);
6310 addrem_flags(RNXT, RVAL);
6311 _handle_annotations_before_start_mapblck(startline);
6312 _handle_colon();
6313 m_evt_handler->begin_map_val_block();
6314 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6315 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
6316 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6317 addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6318 _maybe_skip_whitespace_tokens();
6319 goto seqblck_finish;
6320 }
6321 }
6322 else if(first == '[')
6323 {
6324 _c4dbgp("seqblck[RVAL]: start child seqflow");
6325 addrem_flags(RNXT, RVAL);
6326 _handle_annotations_before_blck_val_scalar();
6327 m_evt_handler->begin_seq_val_flow();
6328 addrem_flags(RFLOW|RVAL, RBLCK|RNXT);
6329 _line_progressed(1);
6330 _set_indentation(m_evt_handler->m_parent->indref + 1u);
6331 goto seqblck_finish;
6332 }
6333 else if(first == '{')
6334 {
6335 _c4dbgp("seqblck[RVAL]: start child mapflow");
6336 addrem_flags(RNXT, RVAL);
6337 _handle_annotations_before_blck_val_scalar();
6338 m_evt_handler->begin_map_val_flow();
6339 addrem_flags(RMAP|RKEY|RFLOW, RBLCK|RSEQ|RVAL|RNXT);
6340 _line_progressed(1);
6341 _set_indentation(m_evt_handler->m_parent->indref + 1u);
6342 goto seqblck_finish;
6343 }
6344 else if(first == '-')
6345 {
6346 _c4dbgp("seqblck[RVAL]: dash");
6347 _handle_block_check_leading_tabs(startmark);
6348 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
6349 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6350 _c4dbgp("seqblck[RVAL]: start child seqblck");
6351 _RYML_ASSERT_PARSE_(this->callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6352 addrem_flags(RNXT, RVAL);
6353 _handle_annotations_before_blck_val_scalar();
6354 m_evt_handler->begin_seq_val_block();
6355 addrem_flags(RVAL, RNXT);
6356 _set_indentation(startindent);
6357 // keep going on inside this function
6358 _line_progressed(1);
6359 }
6360 else if(first == ':')
6361 {
6362 _c4dbgp("seqblck[RVAL]: start child mapblck with empty key");
6363 addrem_flags(RNXT, RVAL);
6364 _handle_annotations_before_start_mapblck(startline);
6365 _handle_colon();
6366 m_evt_handler->begin_map_val_block();
6367 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6368 m_evt_handler->set_key_scalar_plain_empty();
6369 addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6370 _line_progressed(1);
6371 _maybe_skip_whitespace_tokens();
6372 goto seqblck_finish;
6373 }
6374 else if(first == '&')
6375 {
6376 const csubstr anchor = _scan_anchor();
6377 _c4dbgpf("seqblck[RVAL]: anchor! {}", _prs(anchor));
6378 // we need to buffer the anchors, as there may be two
6379 // consecutive anchors in here
6380 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6381 }
6382 else if(first == '*')
6383 {
6384 csubstr ref = _scan_ref_seq();
6385 _c4dbgpf("seqblck[RVAL]: ref! {}", _prs(ref));
6386 if(!_maybe_scan_following_colon())
6387 {
6388 _c4dbgp("seqblck[RVAL]: set ref as val!");
6389 _handle_valref(ref);
6390 addrem_flags(RNXT, RVAL);
6391 }
6392 else
6393 {
6394 _c4dbgp("seqblck[RVAL]: ref is key of map");
6395 addrem_flags(RNXT, RVAL);
6396 _handle_annotations_before_start_mapblck(startline);
6397 m_evt_handler->begin_map_val_block();
6398 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6399 _handle_keyref(ref);
6400 addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6401 _set_indentation(startindent);
6402 _maybe_skip_whitespace_tokens();
6403 goto seqblck_finish;
6404 }
6405 }
6406 else if(first == '!')
6407 {
6408 csubstr tag = _scan_tag();
6409 _c4dbgpf("seqblck[RVAL]: val tag! {}", _prs(tag));
6410 // we need to buffer the tags, as there may be two
6411 // consecutive tags in here
6412 _add_annotation(&m_pending_tags, tag, startindent, startline);
6413 }
6414 else if(first == '?')
6415 {
6416 _c4dbgp("seqblck[RVAL]: start child mapblck, explicit key");
6417 addrem_flags(RNXT, RVAL);
6418 m_evt_handler->begin_map_val_block();
6419 addrem_flags(RMAP|QMRK, RSEQ|RNXT);
6420 _set_indentation(startindent);
6421 _line_progressed(1);
6422 _maybe_skipchars(' ');
6423 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6424 {
6425 _c4dbgp("seqblck[RVAL]: seqblck starts after ?");
6426 addrem_flags(RKCL, QMRK);
6427 m_evt_handler->begin_seq_key_block();
6428 addrem_flags(RSEQ|RVAL, RMAP|RKCL);
6429 _save_indentation();
6430 _line_progressed(1);
6431 _maybe_skipchars(' ');
6432 }
6433 goto seqblck_finish;
6434 }
6435 else
6436 {
6437 _c4err("parse error");
6438 }
6439 }
6440 else // RNXT
6441 {
6442 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
6443 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
6444 //
6445 // handle indentation
6446 //
6447 _c4dbgpf("seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6448 if(C4_LIKELY(m_evt_handler->m_curr->at_line_beginning()))
6449 {
6450 _c4dbgp("seqblck[RNXT]: at line begin");
6451 if(m_evt_handler->m_curr->indentation_ge())
6452 {
6453 _c4dbgpf("seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6454 _line_progressed(m_evt_handler->m_curr->indref);
6455 if(!m_evt_handler->m_curr->line_contents.rem.len)
6456 goto seqblck_again;
6457 }
6458 else if(m_evt_handler->m_curr->indentation_lt())
6459 {
6460 _c4dbgp("seqblck[RNXT]: smaller indentation!");
6461 _handle_indentation_pop_from_block_seq();
6462 if(has_all(RSEQ|RBLCK))
6463 {
6464 _c4dbgp("seqblck[RNXT]: still seqblck!");
6465 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
6466 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6467 if(!m_evt_handler->m_curr->line_contents.rem.len)
6468 goto seqblck_again; // LCOV_EXCL_LINE
6469 }
6470 else
6471 {
6472 _c4dbgp("seqblck[RNXT]: no longer seqblck!");
6473 goto seqblck_finish;
6474 }
6475 }
6476 else if(m_evt_handler->m_curr->line_contents.indentation == npos)
6477 {
6478 _c4dbgpf("seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
6479 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6480 if(!m_evt_handler->m_curr->line_contents.rem.len)
6481 goto seqblck_again; // LCOV_EXCL_LINE
6482 }
6483 }
6484 else
6485 {
6486 _c4dbgp("seqblck[RNXT]: NOT at line begin");
6487 if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(" \t"))
6488 {
6489 _c4err("parse error");
6490 }
6491 else
6492 {
6493 _skipchars(" \t");
6494 if(!m_evt_handler->m_curr->line_contents.rem.len)
6495 {
6496 _c4dbgp("seqblck[RNXT]: again");
6497 goto seqblck_again; // LCOV_EXCL_LINE
6498 }
6499 }
6500 }
6501 //
6502 // now handle the tokens
6503 //
6504 _c4assert(m_evt_handler->m_curr->line_contents.rem.len > 0);
6505 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6506 _c4dbgpf("seqblck[RNXT]: '{}' node_id={}", _c4prc(first), m_evt_handler->m_curr->node_id);
6507 if(first == '-')
6508 {
6509 if(m_evt_handler->m_curr->indref > 0
6510 || m_evt_handler->m_curr->line_contents.indentation > 0
6511 || !_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6512 {
6513 if(C4_LIKELY(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem)))
6514 {
6515 _c4dbgp("seqblck[RNXT]: expect next val");
6516 addrem_flags(RVAL, RNXT);
6517 m_evt_handler->add_sibling();
6518 _line_progressed(1);
6519 }
6520 else
6521 {
6522 _c4err("parse error");
6523 }
6524 }
6525 else
6526 {
6527 _c4dbgp("seqblck[RNXT]: start doc");
6528 _start_doc_suddenly();
6529 _line_progressed(3);
6530 _maybe_skip_whitespace_tokens();
6531 goto seqblck_finish;
6532 }
6533 }
6534 else if(first == ':')
6535 {
6536 // This happens for example in `- [a: b]: c` (after
6537 // terminating the seq, ie, after `]`). All other cases
6538 // (ie colon after scalars) are caught elsewhere (ie, in
6539 // RVAL state).
6540 if(C4_LIKELY(m_evt_handler->m_parent && (m_evt_handler->m_parent->flags & RMAP)))
6541 {
6542 _c4dbgp("seqblck[RNXT]: actually this seq was '?' key of parent map");
6543 m_evt_handler->end_seq_block();
6544 goto seqblck_finish;
6545 }
6546 else
6547 {
6548 _c4err("parse error");
6549 }
6550 }
6551 else if(first == '.')
6552 {
6553 _c4dbgp("seqblck[RNXT]: maybe doc?");
6554 if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6555 {
6556 _c4dbgp("seqblck[RNXT]: end doc");
6557 _end_doc_suddenly();
6558 _line_progressed(3);
6559 _maybe_skip_whitespace_tokens();
6560 _check_doc_end_tokens();
6561 goto seqblck_finish;
6562 }
6563 else
6564 {
6565 _c4err("parse error");
6566 }
6567 }
6568 else
6569 {
6570 // may be an indentless sequence nested in a map...
6571 #ifdef RYML_DBG
6572 _print_state_stack();
6573 #endif
6574 if(m_evt_handler->m_parent
6575 && has_all(RMAP|RBLCK, m_evt_handler->m_parent)
6576 && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
6577 {
6578 _c4dbgpf("seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
6579 _RYML_ASSERT_PARSE_(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent, m_evt_handler->m_curr->pos);
6580 _handle_indentation_pop(m_evt_handler->m_parent);
6581 _RYML_ASSERT_PARSE_(this->callbacks(), has_all(RMAP|RBLCK), m_evt_handler->m_curr->pos);
6582 m_evt_handler->add_sibling();
6583 addrem_flags(RKEY, RNXT);
6584 goto seqblck_finish;
6585 }
6586 else if(first == '\t')
6587 {
6588 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of('\t');
6589 if(pos == npos)
6590 {
6591 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6592 goto seqblck_again;
6593 }
6594 }
6595 _c4err("parse error");
6596 }
6597 }
6598
6599 seqblck_again:
6600 _c4dbgt("seqblck: go again", 0);
6601 if(_finished_line())
6602 {
6603 m_bom_len = 0;
6604 _line_ended();
6605 _scan_line();
6606 if(_finished_file())
6607 {
6608 _c4dbgp("seqblck: finish!");
6609 _end_seq_blck();
6610 goto seqblck_finish;
6611 }
6613 }
6614 goto seqblck_start;
6615
6616 seqblck_finish:
6617 _c4dbgp("seqblck: finish");
6618}
6619
6620
6621//-----------------------------------------------------------------------------
6622
6623template<class EventHandler>
6624void ParseEngine<EventHandler>::_handle_map_block()
6625{
6626mapblck_start:
6627 _c4dbgpf("handle_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6628
6629 // states: RKEY -> RVAL -> RNXT
6630 // states: QMRK -> RKCL -> RVAL -> RNXT
6631 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
6632 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RBLCK), m_evt_handler->m_curr->pos);
6633 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK), m_evt_handler->m_curr->pos);
6634 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)), m_evt_handler->m_curr->pos);
6635
6636 _maybe_skip_comment();
6637 if(!m_evt_handler->m_curr->line_contents.rem.len)
6638 goto mapblck_again;
6639
6640 if(has_any(RKEY))
6641 {
6642 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
6643 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
6644 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
6645 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
6646 //
6647 // handle indentation
6648 //
6649 if(m_evt_handler->m_curr->at_line_beginning())
6650 {
6651 if(m_evt_handler->m_curr->indentation_eq())
6652 {
6653 _c4dbgpf("mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
6654 _line_progressed(m_evt_handler->m_curr->indref);
6655 if(!m_evt_handler->m_curr->line_contents.rem.len)
6656 goto mapblck_again;
6657 }
6658 else if(m_evt_handler->m_curr->indentation_lt())
6659 {
6660 _c4dbgp("mapblck[RKEY]: smaller indentation!");
6661 _handle_indentation_pop_from_block_map();
6662 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6663 if(has_all(RMAP|RBLCK))
6664 {
6665 _c4dbgp("mapblck[RKEY]: still mapblck!");
6666 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY), m_evt_handler->m_curr->pos);
6667 if(!m_evt_handler->m_curr->line_contents.rem.len)
6668 goto mapblck_again;
6669 }
6670 else
6671 {
6672 _c4dbgp("mapblck[RKEY]: no longer mapblck!");
6673 goto mapblck_finish;
6674 }
6675 }
6676 else
6677 {
6678 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt(), m_evt_handler->m_curr->pos);
6679 _c4err("invalid indentation");
6680 }
6681 }
6682 //
6683 // now handle the tokens
6684 //
6685 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6686 const size_t startline = m_evt_handler->m_curr->pos.line;
6687 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6688 _c4dbgpf("mapblck[RKEY]: '{}'", _c4prc(first));
6689 ScannedScalar sc;
6690 if(first == '\'')
6691 {
6692 _c4dbgp("mapblck[RKEY]: scanning single-quoted scalar");
6693 sc = _scan_scalar_squot();
6694 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6695 _handle_annotations_before_blck_key_scalar();
6696 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6697 addrem_flags(RVAL, RKEY);
6698 if(!_maybe_scan_following_colon())
6699 _c4err("could not find ':' colon after key");
6700 _handle_colon();
6701 _maybe_skip_whitespace_tokens();
6702 }
6703 else if(first == '"')
6704 {
6705 _c4dbgp("mapblck[RKEY]: scanning double-quoted scalar");
6706 sc = _scan_scalar_dquot();
6707 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6708 _handle_annotations_before_blck_key_scalar();
6709 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6710 addrem_flags(RVAL, RKEY);
6711 if(!_maybe_scan_following_colon())
6712 _c4err("could not find ':' colon after key");
6713 _handle_colon();
6714 _maybe_skip_whitespace_tokens();
6715 }
6716 // block scalars (| and >) can not be used as keys unless they
6717 // appear in an explicit QMRK scope (ie, after the ? token),
6718 else if(C4_UNLIKELY(first == '|'))
6719 {
6720 _c4err("block map: literal keys must be enclosed in '?'");
6721 }
6722 else if(C4_UNLIKELY(first == '>'))
6723 {
6724 _c4err("block map: folded keys must be enclosed in '?'");
6725 }
6726 else if(_scan_scalar_plain_map_blck(&sc))
6727 {
6728 _c4dbgp("mapblck[RKEY]: plain scalar");
6729 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6730 _handle_annotations_before_blck_key_scalar();
6731 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6732 addrem_flags(RVAL, RKEY);
6733 if(!_maybe_scan_following_colon())
6734 _c4err("could not find ':' colon after key");
6735 _handle_colon();
6736 _maybe_skip_whitespace_tokens();
6737 }
6738 else if(first == '?')
6739 {
6740 _c4dbgp("mapblck[RKEY]: key token!");
6741 addrem_flags(QMRK, RKEY);
6742 _line_progressed(1);
6743 _maybe_skipchars(' ');
6744 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6745 {
6746 _c4dbgp("mapblck[RKEY]: seqblck starts after ?");
6747 addrem_flags(RKCL, QMRK);
6748 m_evt_handler->begin_seq_key_block();
6749 addrem_flags(RSEQ|RVAL, RMAP|RKCL);
6750 _save_indentation();
6751 _line_progressed(1);
6752 _maybe_skipchars(' ');
6753 goto mapblck_finish;
6754 }
6755 goto mapblck_again;
6756 }
6757 else if(first == ':')
6758 {
6759 _c4dbgp("mapblck[RKEY]: setting empty key");
6760 _handle_annotations_before_blck_key_scalar();
6761 m_evt_handler->set_key_scalar_plain_empty();
6762 addrem_flags(RVAL, RKEY);
6763 _line_progressed(1);
6764 _handle_colon();
6765 _maybe_skip_whitespace_tokens();
6766 }
6767 else if(first == '*')
6768 {
6769 csubstr ref = _scan_ref_map();
6770 _c4dbgpf("mapblck[RKEY]: key ref! {}", _prs(ref));
6771 _handle_keyref(ref);
6772 addrem_flags(RVAL, RKEY);
6773 if(!_maybe_scan_following_colon())
6774 _c4err("could not find ':' colon after key");
6775 _handle_colon();
6776 _maybe_skip_whitespace_tokens();
6777 }
6778 else if(first == '&')
6779 {
6780 csubstr anchor = _scan_anchor();
6781 _c4dbgpf("mapblck[RKEY]: key anchor! {}", _prs(anchor));
6782 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6783 }
6784 else if(first == '!')
6785 {
6786 csubstr tag = _scan_tag();
6787 _c4dbgpf("mapblck[RKEY]: key tag! {}", _prs(tag));
6788 _add_annotation(&m_pending_tags, tag, startindent, startline);
6789 }
6790 else if(first == '[')
6791 {
6792 // RYML's tree cannot store container keys, but that's
6793 // handled inside the tree handler. Other handlers may be
6794 // able to handle it.
6795 _c4dbgp("mapblck[RKEY]: start child seqflow (!)");
6796 _handle_annotations_before_blck_key_scalar();
6797 m_evt_handler->begin_seq_key_flow();
6798 addrem_flags(RSEQ|RFLOW|RVAL, RKEY|RMAP|RBLCK);
6799 _line_progressed(1);
6800 _set_indentation(startindent);
6801 goto mapblck_finish;
6802 }
6803 else if(first == '{')
6804 {
6805 // RYML's tree cannot store container keys, but that's
6806 // handled inside the tree handler. Other handlers may be
6807 // able to handle it.
6808 _c4dbgp("mapblck[RKEY]: start child mapflow (!)");
6809 _handle_annotations_before_blck_key_scalar();
6810 m_evt_handler->begin_map_key_flow();
6811 addrem_flags(RFLOW|RKEY, RBLCK);
6812 _line_progressed(1);
6813 _set_indentation(startindent);
6814 goto mapblck_finish;
6815 }
6816 else if(first == '-')
6817 {
6818 _c4dbgp("mapblck[RKEY]: maybe doc?");
6819 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6820 {
6821 _c4dbgp("mapblck[RKEY]: end+start doc");
6822 _start_doc_suddenly();
6823 _line_progressed(3);
6824 _maybe_skip_whitespace_tokens();
6825 goto mapblck_finish;
6826 }
6827 else
6828 {
6829 _c4err("parse error");
6830 }
6831 }
6832 else if(first == '.')
6833 {
6834 _c4dbgp("mapblck[RKEY]: maybe end doc?");
6835 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6836 {
6837 _c4dbgp("mapblck[RKEY]: end doc");
6838 _end_doc_suddenly();
6839 _line_progressed(3);
6840 _maybe_skip_whitespace_tokens();
6841 _check_doc_end_tokens();
6842 goto mapblck_finish;
6843 }
6844 else
6845 {
6846 _c4err("parse error"); // LCOV_EXCL_LINE
6847 }
6848 }
6849 else
6850 {
6851 _c4err("parse error");
6852 }
6853 }
6854 else if(has_any(RVAL))
6855 {
6856 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
6857 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
6858 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
6859 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
6860 //
6861 // handle indentation
6862 //
6863 if(m_evt_handler->m_curr->at_line_beginning())
6864 {
6865 _c4dbgpf("mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6866 m_evt_handler->m_curr->more_indented = false;
6867 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
6868 if(m_evt_handler->m_curr->indentation_eq_extra())
6869 {
6870 _c4dbgp("mapblck[RVAL]: skip indentation!");
6871 _line_progressed(m_evt_handler->m_curr->indref + 1);
6872 if(!m_evt_handler->m_curr->line_contents.rem.len)
6873 goto mapblck_again;
6874 }
6875 else if(m_evt_handler->m_curr->indentation_gt_extra())
6876 {
6877 _c4dbgp("mapblck[RVAL]: more indented!");
6878 m_evt_handler->m_curr->more_indented = true;
6879 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6880 if(!m_evt_handler->m_curr->line_contents.rem.len)
6881 goto mapblck_again; // LCOV_EXCL_LINE
6882 }
6883 else if(m_evt_handler->m_curr->indentation_lt_extra())
6884 {
6885 if(m_evt_handler->m_curr->indentation_eq())
6886 {
6887 _c4dbgp("mapblck[RVAL]: smaller indentation than RVAL!");
6888 // watchout for indentless seqs
6889 if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation)))
6890 {
6891 _c4dbgp("mapblck[RVAL]: smaller indentation than RVAL!");
6892 _handle_annotations_before_blck_val_scalar();
6893 m_evt_handler->set_val_scalar_plain_empty();
6894 addrem_flags(RNXT, RVAL);
6895 goto mapblck_again;
6896 }
6897 }
6898 else
6899 {
6900 _c4dbgp("mapblck[RVAL]: smaller indentation than RKEY!");
6901 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6902 _handle_indentation_pop_from_block_map();
6903 if(has_all(RMAP|RBLCK))
6904 {
6905 _c4dbgp("mapblck[RVAL]: still mapblck!");
6906 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6907 if(has_any(RNXT))
6908 {
6909 _c4dbgp("mapblck[RVAL]: speculatively expect next keyval");
6910 m_evt_handler->add_sibling();
6911 addrem_flags(RKEY, RNXT);
6912 }
6913 goto mapblck_again;
6914 }
6915 else
6916 {
6917 _c4dbgp("mapblck[RVAL]: no longer mapblck!");
6918 goto mapblck_finish;
6919 }
6920 }
6921 }
6922 }
6923 const size_t startcol = _handle_block_skip_leading_whitespace();
6924 if(startcol == npos)
6925 {
6926 _c4dbgp("mapblck[RVAL]: whitespace only");
6927 goto mapblck_again; // LCOV_EXCL_LINE
6928 }
6929 const size_t tabmark = _handle_block_get_whitespace_mark();
6930 //
6931 // now handle the tokens
6932 //
6933 _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
6934 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6935 const size_t startline = m_evt_handler->m_curr->pos.line;
6936 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6937 _c4dbgpf("mapblck[RVAL]: '{}'", _c4prc(first));
6938 ScannedScalar sc;
6939 if(first == '\'')
6940 {
6941 _c4dbgp("mapblck[RVAL]: scanning single-quoted scalar");
6942 sc = _scan_scalar_squot();
6943 if(!_maybe_scan_following_colon())
6944 {
6945 _c4dbgp("mapblck[RVAL]: set as val");
6946 _handle_annotations_before_blck_val_scalar();
6947 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
6948 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6949 addrem_flags(RNXT, RVAL);
6950 }
6951 else
6952 {
6953 _c4assert(m_evt_handler->m_curr->indref != npos);
6954 _c4assert(startindent > m_evt_handler->m_curr->indref);
6955 _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6956 _handle_block_check_leading_tabs(startcol);
6957 _handle_annotations_before_start_mapblck(startline);
6958 addrem_flags(RNXT, RVAL);
6959 _handle_colon();
6960 m_evt_handler->begin_map_val_block();
6961 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6962 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6963 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6964 _maybe_skip_whitespace_tokens();
6965 // keep the child state on RVAL
6966 addrem_flags(RVAL, RNXT);
6967 }
6968 }
6969 else if(first == '"')
6970 {
6971 _c4dbgp("mapblck[RVAL]: scanning double-quoted scalar");
6972 sc = _scan_scalar_dquot();
6973 if(!_maybe_scan_following_colon())
6974 {
6975 _c4dbgp("mapblck[RVAL]: set as val");
6976 _handle_annotations_before_blck_val_scalar();
6977 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
6978 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6979 addrem_flags(RNXT, RVAL);
6980 }
6981 else
6982 {
6983 _c4assert(m_evt_handler->m_curr->indref != npos);
6984 _c4assert(startindent > m_evt_handler->m_curr->indref);
6985 _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6986 _handle_block_check_leading_tabs(startcol);
6987 _handle_annotations_before_start_mapblck(startline);
6988 addrem_flags(RNXT, RVAL);
6989 _handle_colon();
6990 m_evt_handler->begin_map_val_block();
6991 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6992 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6993 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6994 _maybe_skip_whitespace_tokens();
6995 // keep the child state on RVAL
6996 addrem_flags(RVAL, RNXT);
6997 }
6998 }
6999 // block scalars can only appear as keys when in QMRK scope
7000 // (ie, after ? tokens), so no need to scan following colon
7001 else if(first == '|')
7002 {
7003 _c4dbgp("mapblck[RVAL]: scanning block-literal scalar");
7004 ScannedBlock sb;
7005 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7006 _handle_annotations_before_blck_val_scalar();
7007 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7008 m_evt_handler->set_val_scalar_literal(maybe_filtered);
7009 addrem_flags(RNXT, RVAL);
7010 }
7011 else if(first == '>')
7012 {
7013 _c4dbgp("mapblck[RVAL]: scanning block-folded scalar");
7014 ScannedBlock sb;
7015 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7016 _handle_annotations_before_blck_val_scalar();
7017 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7018 m_evt_handler->set_val_scalar_folded(maybe_filtered);
7019 addrem_flags(RNXT, RVAL);
7020 }
7021 else if(_scan_scalar_plain_map_blck(&sc))
7022 {
7023 _c4dbgp("mapblck[RVAL]: plain scalar.");
7024 if(!_maybe_scan_following_colon())
7025 {
7026 _c4dbgp("mapblck[RVAL]: set as val");
7027 _handle_annotations_before_blck_val_scalar();
7028 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
7029 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7030 addrem_flags(RNXT, RVAL);
7031 }
7032 else
7033 {
7034 _c4assert(m_evt_handler->m_curr->indref != npos);
7035 _c4assert(startindent > m_evt_handler->m_curr->indref);
7036 _c4dbgpf("mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
7037 _handle_block_check_leading_tabs(startcol, tabmark);
7038 addrem_flags(RNXT, RVAL);
7039 _handle_annotations_before_start_mapblck(startline);
7040 _handle_colon();
7041 m_evt_handler->begin_map_val_block();
7042 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7043 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
7044 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7045 _maybe_skip_whitespace_tokens();
7046 // keep the child state on RVAL
7047 addrem_flags(RVAL, RNXT);
7048 }
7049 }
7050 else if(first == '-' && _is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7051 {
7052 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7053 _c4err("parse error");
7054 _c4dbgp("mapblck[RVAL]: start val seqblck");
7055 _handle_block_check_leading_tabs(startcol);
7056 addrem_flags(RNXT, RVAL);
7057 _handle_annotations_before_blck_val_scalar();
7058 m_evt_handler->begin_seq_val_block();
7059 addrem_flags(RSEQ|RVAL, RMAP|RNXT);
7060 _set_indentation(startindent);
7061 _line_progressed(1);
7062 _maybe_skip_whitespace_tokens();
7063 goto mapblck_finish;
7064 }
7065 else if(first == '[')
7066 {
7067 _c4dbgp("mapblck[RVAL]: start val seqflow");
7068 addrem_flags(RNXT, RVAL);
7069 _handle_annotations_before_blck_val_scalar();
7070 m_evt_handler->begin_seq_val_flow();
7071 addrem_flags(RSEQ|RFLOW|RVAL, RMAP|RBLCK|RNXT);
7072 _set_indentation(m_evt_handler->m_parent->indref + 1u);
7073 _line_progressed(1);
7074 goto mapblck_finish;
7075 }
7076 else if(first == '{')
7077 {
7078 _c4dbgp("mapblck[RVAL]: start val mapflow");
7079 addrem_flags(RNXT, RVAL);
7080 _handle_annotations_before_blck_val_scalar();
7081 m_evt_handler->begin_map_val_flow();
7082 addrem_flags(RKEY|RFLOW, RBLCK|RVAL|RNXT);
7083 m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
7084 _set_indentation(m_evt_handler->m_parent->indref + 1u);
7085 _line_progressed(1);
7086 goto mapblck_finish;
7087 }
7088 else if(first == '*')
7089 {
7090 csubstr ref = _scan_ref_map();
7091 _c4dbgpf("mapblck[RVAL]: ref! {}", _prs(ref));
7092 if(_maybe_scan_following_colon())
7093 {
7094 _c4dbgp("mapblck[RVAL]: start child map, block");
7095 addrem_flags(RNXT, RVAL);
7096 _handle_annotations_before_blck_val_scalar();
7097 m_evt_handler->begin_map_val_block();
7098 _handle_keyref(ref);
7099 _set_indentation(startindent);
7100 // keep going in RVAL
7101 addrem_flags(RVAL, RNXT);
7102 }
7103 else
7104 {
7105 _c4dbgp("mapblck[RVAL]: was val ref");
7106 _handle_valref(ref);
7107 addrem_flags(RNXT, RVAL);
7108 }
7109 _maybe_skip_whitespace_tokens();
7110 }
7111 else if(first == '&')
7112 {
7113 csubstr anchor = _scan_anchor();
7114 _c4dbgpf("mapblck[RVAL]: anchor! {}", _prs(anchor));
7115 // we need to buffer the anchors, as there may be two
7116 // consecutive anchors in here
7117 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7118 }
7119 else if(first == '!')
7120 {
7121 csubstr tag = _scan_tag();
7122 _c4dbgpf("mapblck[RVAL]: tag! {}", _prs(tag));
7123 // we need to buffer the tags, as there may be two
7124 // consecutive tags in here
7125 _add_annotation(&m_pending_tags, tag, startindent, startline);
7126 }
7127 else if(first == '?')
7128 {
7129 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7130 _c4err("parse error");
7131 _c4dbgp("mapblck[RVAL]: start val mapblck");
7132 addrem_flags(RNXT, RVAL);
7133 _handle_annotations_before_blck_val_scalar();
7134 m_evt_handler->begin_map_val_block();
7135 addrem_flags(QMRK, RNXT);
7136 _set_indentation(startindent);
7137 _line_progressed(1);
7138 _maybe_skipchars(' ');
7139 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7140 {
7141 _c4dbgp("mapblck[RVAL]: seqblck starts after ?");
7142 addrem_flags(RKCL, QMRK);
7143 m_evt_handler->begin_seq_key_block();
7144 addrem_flags(RSEQ|RVAL, RMAP|RKCL);
7145 _save_indentation();
7146 _line_progressed(1);
7147 _maybe_skipchars(' ');
7148 goto mapblck_finish;
7149 }
7150 goto mapblck_again;
7151 }
7152 else if(first == ':')
7153 {
7154 _c4dbgp("mapblck[RVAL]: start val mapblck");
7155 addrem_flags(RNXT, RVAL);
7156 _handle_annotations_before_start_mapblck(startline);
7157 _handle_colon();
7158 m_evt_handler->begin_map_val_block();
7159 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7160 m_evt_handler->set_key_scalar_plain_empty();
7161 // keep the child state on RVAL
7162 addrem_flags(RVAL, RNXT);
7163 _line_progressed(1);
7164 _maybe_skip_whitespace_tokens();
7165 goto mapblck_again;
7166 }
7167 else
7168 {
7169 _c4err("parse error"); // LCOV_EXCL_LINE
7170 }
7171 }
7172 else if(has_any(RNXT))
7173 {
7174 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
7175 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
7176 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
7177 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
7178 //
7179 // handle indentation
7180 //
7181 if(m_evt_handler->m_curr->at_line_beginning())
7182 {
7183 _c4dbgpf("mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
7184 if(m_evt_handler->m_curr->indentation_eq())
7185 {
7186 _c4dbgpf("mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
7187 _line_progressed(m_evt_handler->m_curr->indref);
7188 _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
7189 m_evt_handler->add_sibling();
7190 addrem_flags(RKEY, RNXT);
7191 goto mapblck_again;
7192 }
7193 else if(m_evt_handler->m_curr->indentation_lt())
7194 {
7195 _c4dbgp("mapblck[RNXT]: smaller indentation!");
7196 _handle_indentation_pop_from_block_map();
7197 if(has_all(RMAP|RBLCK))
7198 {
7199 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7200 if(!has_any(RKCL))
7201 {
7202 _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
7203 m_evt_handler->add_sibling();
7204 addrem_flags(RKEY, RNXT);
7205 }
7206 goto mapblck_again;
7207 }
7208 else
7209 {
7210 goto mapblck_finish;
7211 }
7212 }
7213 }
7214 else
7215 {
7216 _c4dbgp("mapblck[RNXT]: NOT at line begin");
7217 if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(" \t"))
7218 {
7219 _c4err("parse error");
7220 }
7221 else
7222 {
7223 _skipchars(" \t");
7224 if(!m_evt_handler->m_curr->line_contents.rem.len)
7225 {
7226 _c4dbgp("seqblck[RNXT]: again");
7227 goto mapblck_again; // LCOV_EXCL_LINE
7228 }
7229 }
7230 }
7231 //
7232 // handle tokens
7233 //
7234 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7235 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7236 _c4dbgpf("mapblck[RNXT]: '{}'", _c4prc(first));
7237 if(first == ' ')
7238 {
7239 _c4dbgp("mapblck[RNXT]: skip spaces");
7240 _maybe_skip_whitespace_tokens();
7241 }
7242 else
7243 {
7244 _c4err("parse error");
7245 }
7246 }
7247 else if(has_any(QMRK))
7248 {
7249 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
7250 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
7251 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
7252 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
7253 if(_handle_map_block_qmrk())
7254 goto mapblck_again;
7255 else
7256 goto mapblck_finish;
7257 }
7258 else if(has_any(RKCL)) // read the key colon (after QMRK)
7259 {
7260 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
7261 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
7262 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
7263 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
7264 if(_handle_map_block_rkcl())
7265 goto mapblck_again;
7266 else
7267 goto mapblck_finish;
7268 }
7269
7270 mapblck_again:
7271 _c4dbgt("mapblck: again", 0);
7272 if(_finished_line())
7273 {
7274 _line_ended();
7275 _scan_line();
7276 if(_finished_file())
7277 {
7278 _c4dbgp("mapblck: file finished!");
7279 _end_map_blck();
7280 goto mapblck_finish;
7281 }
7283 }
7284 goto mapblck_start;
7285
7286 mapblck_finish:
7287 _c4dbgp("mapblck: finish");
7288}
7289
7290
7291//-----------------------------------------------------------------------------
7292
7293// return true if we should remain in map_block
7294template<class EventHandler>
7295bool ParseEngine<EventHandler>::_handle_map_block_qmrk()
7296{
7297 //
7298 // handle indentation
7299 //
7300 if(m_evt_handler->m_curr->at_line_beginning())
7301 {
7302 _c4dbgpf("mapblck[QMRK]: at line beginning. ind={} indref={}", m_evt_handler->m_curr->line_contents.indentation, m_evt_handler->m_curr->indref);
7303 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation != npos, m_evt_handler->m_curr->pos);
7304 if(m_evt_handler->m_curr->indentation_eq_extra())
7305 {
7306 _c4dbgpf("mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref + 1);
7307 _line_progressed(m_evt_handler->m_curr->indref + 1);
7308 if(!m_evt_handler->m_curr->line_contents.rem.len)
7309 return true; // go again
7310 }
7311 // indentation can be larger in QMRK state
7312 else if(m_evt_handler->m_curr->indentation_gt_extra())
7313 {
7314 _c4dbgp("mapblck[QMRK]: larger indentation !");
7315 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7316 if(!m_evt_handler->m_curr->line_contents.rem.len)
7317 return true; // go again
7318 }
7319 else
7320 {
7321 _c4dbgp("mapblck[QMRK]: smaller indentation!");
7322 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt_extra(), m_evt_handler->m_curr->pos);
7323 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7324 if(m_evt_handler->m_curr->indentation_eq()
7325 // defend against docs or indentless seqs
7326 && m_evt_handler->m_curr->line_contents.rem.str[0] != '-')
7327 {
7328 _c4dbgp("mapblck[QMRK]: QMRK finished!");
7329 _handle_annotations_before_blck_key_scalar();
7330 m_evt_handler->set_key_scalar_plain_empty();
7331 addrem_flags(RKCL, QMRK);
7332 return true; // go again
7333 }
7334 else if(m_evt_handler->m_curr->indentation_lt())
7335 {
7336 _c4dbgp("mapblck[QMRK]: indentation pop!");
7337 _handle_indentation_pop_from_block_map();
7338 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7339 if(has_all(RMAP|RBLCK))
7340 {
7341 _c4dbgp("mapblck[QMRK]: still mapblck!");
7342 return true; // go again
7343 }
7344 else
7345 {
7346 _c4dbgp("mapblck[QMRK]: no longer mapblck!");
7347 return false; // finish mapblck
7348 }
7349 }
7350 }
7351 }
7352 //
7353 // now handle the tokens
7354 //
7355 _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
7356 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7357 const size_t startline = m_evt_handler->m_curr->pos.line;
7358 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
7359 _c4dbgpf("mapblck[QMRK]: '{}'", first);
7360 ScannedScalar sc;
7361 if(first == '\'')
7362 {
7363 _c4dbgp("mapblck[QMRK]: scanning single-quoted scalar");
7364 sc = _scan_scalar_squot();
7365 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
7366 addrem_flags(RKCL, QMRK);
7367 if(!_maybe_scan_following_colon())
7368 {
7369 _c4dbgp("mapblck[QMRK]: set as key");
7370 _handle_annotations_before_blck_key_scalar();
7371 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7372 }
7373 else
7374 {
7375 _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7376 _handle_annotations_before_start_mapblck_as_key();
7377 m_evt_handler->begin_map_key_block();
7378 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7379 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7380 _maybe_skip_whitespace_tokens();
7381 _set_indentation(startindent);
7382 // keep the child state on RVAL
7383 addrem_flags(RVAL, RKCL);
7384 }
7385 }
7386 else if(first == '"')
7387 {
7388 _c4dbgp("mapblck[QMRK]: scanning double-quoted scalar");
7389 sc = _scan_scalar_dquot();
7390 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
7391 addrem_flags(RKCL, QMRK);
7392 if(!_maybe_scan_following_colon())
7393 {
7394 _c4dbgp("mapblck[QMRK]: set as key");
7395 _handle_annotations_before_blck_key_scalar();
7396 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7397 }
7398 else
7399 {
7400 _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7401 _handle_annotations_before_start_mapblck_as_key();
7402 m_evt_handler->begin_map_key_block();
7403 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7404 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7405 _maybe_skip_whitespace_tokens();
7406 _set_indentation(startindent);
7407 // keep the child state on RVAL
7408 addrem_flags(RVAL, RKCL);
7409 }
7410 }
7411 else if(first == '|')
7412 {
7413 _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
7414 ScannedBlock sb;
7415 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7416 csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb); // KEY!
7417 _handle_annotations_before_blck_key_scalar();
7418 m_evt_handler->set_key_scalar_literal(maybe_filtered);
7419 addrem_flags(RKCL, QMRK);
7420 }
7421 else if(first == '>')
7422 {
7423 _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
7424 ScannedBlock sb;
7425 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7426 csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb); // KEY!
7427 _handle_annotations_before_blck_key_scalar();
7428 m_evt_handler->set_key_scalar_folded(maybe_filtered);
7429 addrem_flags(RKCL, QMRK);
7430 }
7431 else if(_scan_scalar_plain_map_blck(&sc))
7432 {
7433 _c4dbgp("mapblck[QMRK]: plain scalar");
7434 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
7435 addrem_flags(RKCL, QMRK);
7436 if(!_maybe_scan_following_colon())
7437 {
7438 _c4dbgp("mapblck[QMRK]: set as key");
7439 _handle_annotations_before_blck_key_scalar();
7440 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7441 }
7442 else
7443 {
7444 _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7445 _handle_annotations_before_start_mapblck_as_key();
7446 m_evt_handler->begin_map_key_block();
7447 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7448 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7449 _maybe_skip_whitespace_tokens();
7450 _set_indentation(startindent);
7451 // keep the child state on RVAL
7452 addrem_flags(RVAL, RKCL);
7453 }
7454 }
7455 else if(first == ':')
7456 {
7457 _c4dbgp("mapblck[QMRK]: start new block map as key (!), empty key");
7458 addrem_flags(RKCL, QMRK);
7459 _handle_annotations_before_start_mapblck_as_key();
7460 m_evt_handler->begin_map_key_block();
7461 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7462 m_evt_handler->set_key_scalar_plain_empty();
7463 _line_progressed(1);
7464 _maybe_skip_whitespace_tokens();
7465 _set_indentation(startindent);
7466 // keep the child state on RVAL
7467 addrem_flags(RVAL, RKCL);
7468 }
7469 else if(first == '*')
7470 {
7471 csubstr ref = _scan_ref_map();
7472 _c4dbgpf("mapblck[QMRK]: key ref! {}", _prs(ref));
7473 addrem_flags(RKCL, QMRK);
7474 if(!_maybe_scan_following_colon())
7475 {
7476 _c4dbgp("mapblck[QMRK]: set ref as key");
7477 _handle_keyref(ref);
7478 }
7479 else
7480 {
7481 _c4dbgp("mapblck[QMRK]: start new block map as key (!), set ref as key");
7482 _handle_annotations_before_start_mapblck_as_key();
7483 m_evt_handler->begin_map_key_block();
7484 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7485 _handle_keyref(ref);
7486 _set_indentation(startindent);
7487 // keep the child state on RVAL
7488 addrem_flags(RVAL, RKCL|QMRK);
7489 }
7490 _maybe_skip_whitespace_tokens();
7491 }
7492 else if(first == '&')
7493 {
7494 csubstr anchor = _scan_anchor();
7495 _c4dbgpf("mapblck[QMRK]: key anchor! {}", _prs(anchor));
7496 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7497 }
7498 else if(first == '!')
7499 {
7500 csubstr tag = _scan_tag();
7501 _c4dbgpf("mapblck[QMRK]: key tag! {}", _prs(tag));
7502 _add_annotation(&m_pending_tags, tag, startindent, startline);
7503 }
7504 else if(first == '-')
7505 {
7506 _c4dbgp("mapblck[QMRK]: maybe seq or doc?");
7507 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7508 {
7509 _c4dbgp("mapblck[QMRK]: start child seqblck (!)");
7510 addrem_flags(RKCL, QMRK);
7511 _handle_annotations_before_blck_key_scalar();
7512 m_evt_handler->begin_seq_key_block();
7513 addrem_flags(RVAL|RSEQ, RMAP|RKCL);
7514 _set_indentation(startindent);
7515 _line_progressed(1);
7516 }
7517 else
7518 {
7519 _c4dbgp("mapblck[QMRK]: end+start doc");
7520 _c4assert(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem));
7521 _start_doc_suddenly();
7522 _line_progressed(3);
7523 }
7524 _maybe_skip_whitespace_tokens();
7525 return false; // finish mapblck
7526 }
7527 else if(first == '[')
7528 {
7529 _c4dbgp("mapblck[QMRK]: start child seqflow (!)");
7530 addrem_flags(RKCL, QMRK);
7531 _handle_annotations_before_blck_key_scalar();
7532 m_evt_handler->begin_seq_key_flow();
7533 addrem_flags(RVAL|RSEQ|RFLOW, RMAP|RKCL|RBLCK);
7534 _set_indentation(m_evt_handler->m_parent->indref + 1);
7535 _line_progressed(1);
7536 return false; // finish mapblck
7537 }
7538 else if(first == '{')
7539 {
7540 _c4dbgp("mapblck[QMRK]: start child mapflow (!)");
7541 addrem_flags(RKCL, QMRK);
7542 _handle_annotations_before_blck_key_scalar();
7543 m_evt_handler->begin_map_key_flow();
7544 addrem_flags(RKEY|RFLOW, RVAL|RKCL|RBLCK);
7545 _set_indentation(m_evt_handler->m_parent->indref + 1);
7546 _line_progressed(1);
7547 return false; // finish mapblck
7548 }
7549 else if(first == '?')
7550 {
7551 _c4dbgpf("mapblck[QMRK]: another QMRK '?'. ind={} indref={}", startindent, m_evt_handler->m_curr->indref);
7552 _RYML_ASSERT_PARSE_(callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
7553 _c4dbgp("mapblck[QMRK]: ? indent gt - start child mapblck (!)");
7554 addrem_flags(RKCL, QMRK);
7555 _handle_annotations_before_blck_key_scalar();
7556 m_evt_handler->begin_map_key_block();
7557 addrem_flags(QMRK, RKCL);
7558 _set_indentation(startindent);
7559 // indentation_lt() should be handled elsewhere
7560 _line_progressed(1);
7561 _maybe_skipchars(' ');
7562 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7563 {
7564 _c4dbgp("mapblck[RVAL]: seqblck starts after ?");
7565 addrem_flags(RKCL, QMRK);
7566 m_evt_handler->begin_seq_key_block();
7567 addrem_flags(RSEQ|RVAL, RMAP|RKCL);
7568 _save_indentation();
7569 _line_progressed(1);
7570 _maybe_skipchars(' ');
7571 return false;
7572 }
7573 }
7574 else
7575 {
7576 _c4err("parse error");
7577 }
7578 return true; // continue in mapblck
7579}
7580
7581
7582//-----------------------------------------------------------------------------
7583
7584// return true if we should remain in map_block
7585template<class EventHandler>
7586bool ParseEngine<EventHandler>::_handle_map_block_rkcl()
7587{
7588 //
7589 // handle indentation
7590 //
7591 if(m_evt_handler->m_curr->at_line_beginning())
7592 {
7593 if(m_evt_handler->m_curr->indentation_eq())
7594 {
7595 _c4dbgpf("mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
7596 _line_progressed(m_evt_handler->m_curr->indref);
7597 if(!m_evt_handler->m_curr->line_contents.rem.len)
7598 return true; // continue in mapblck
7599 }
7600 else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
7601 {
7602 _c4err("invalid indentation");
7603 }
7604 }
7605 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7606 _c4dbgpf("mapblck[RKCL]: '{}'", first);
7607 if(first == ':')
7608 {
7609 _c4dbgp("mapblck[RKCL]: found the colon");
7610 _line_progressed(1);
7611 _maybe_skipchars(' ');
7612 #if defined(__GNUC__) && ( \
7613 ((__GNUC__ >= 12) && ((C4_WORDSIZE == 4) || defined(C4_CPU_S390_X) || defined(C4_CPU_PPC64))) \
7614 || \
7615 (__GNUC__ == 16 && defined(C4_CPU_X86_64)))
7616 C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem);
7617 #endif
7618 // sequence is valid after the RKCL ':'
7619 if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7620 {
7621 addrem_flags(RVAL, RKCL);
7622 return true; // continue in mapblck
7623 }
7624 else
7625 {
7626 _c4dbgp("mapblck[RKCL]: start val seqblck");
7627 addrem_flags(RNXT, RKCL);
7628 m_evt_handler->begin_seq_val_block();
7629 addrem_flags(RSEQ|RVAL, RMAP|RNXT);
7630 _save_indentation();
7631 _line_progressed(1);
7632 _maybe_skipchars(' ');
7633 return false; // finish mapblck
7634 }
7635 }
7636 else if(first == '?')
7637 {
7638 _c4dbgp("mapblck[RKCL]: got '?'. val was empty");
7639 m_evt_handler->set_val_scalar_plain_empty();
7640 m_evt_handler->add_sibling();
7641 addrem_flags(QMRK, RKCL);
7642 _line_progressed(1);
7643 _maybe_skipchars(' ');
7644 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7645 {
7646 _c4dbgp("mapblck[RKCL]: seqblck starts after ?");
7647 addrem_flags(RKCL, QMRK);
7648 m_evt_handler->begin_seq_key_block();
7649 addrem_flags(RSEQ|RVAL, RMAP|QMRK);
7650 _save_indentation();
7651 _line_progressed(1);
7652 _maybe_skipchars(' ');
7653 return false;
7654 }
7655 }
7656 else if(first == '-')
7657 {
7658 if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7659 {
7660 _c4dbgp("mapblck[RKCL]: end+start doc");
7661 _RYML_CHECK_PARSE_(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
7662 _start_doc_suddenly();
7663 _line_progressed(3);
7664 _maybe_skip_whitespace_tokens();
7665 return false; // finish mapblck
7666 }
7667 else
7668 {
7669 _c4err("parse error"); // LCOV_EXCL_LINE
7670 }
7671 }
7672 else if(first == '.')
7673 {
7674 _c4dbgp("mapblck[RKCL]: maybe end doc?");
7675 csubstr rs = m_evt_handler->m_curr->line_contents.rem.sub(1);
7676 if(rs == ".." || rs.begins_with(".. "))
7677 {
7678 _c4dbgp("mapblck[RKCL]: end+start doc");
7679 _end_doc_suddenly();
7680 _line_progressed(3);
7681 _maybe_skip_whitespace_tokens();
7682 _check_doc_end_tokens();
7683 return false; // finish mapblck
7684 }
7685 else
7686 {
7687 _c4err("parse error"); // LCOV_EXCL_LINE
7688 }
7689 }
7690 else/* if(m_was_inside_qmrk) */
7691 {
7692 _c4dbgp("mapblck[RKCL]: missing :");
7693 if(C4_UNLIKELY(!m_evt_handler->m_curr->indentation_eq()))
7694 _c4err("parse error"); // LCOV_EXCL_LINE
7695 m_evt_handler->set_val_scalar_plain_empty();
7696 m_evt_handler->add_sibling();
7697 addrem_flags(RKEY, RKCL);
7698 }
7699 return true;
7700}
7701
7702
7703//-----------------------------------------------------------------------------
7704
7705template<class EventHandler>
7706void ParseEngine<EventHandler>::_handle_unk_json()
7707{
7708 _c4dbgpf("handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7709
7710 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP), m_evt_handler->m_curr->pos);
7711 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RTOP), m_evt_handler->m_curr->pos);
7712
7713 _maybe_skip_comment();
7714 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7715 if(!rem.len)
7716 return;
7717
7718 size_t pos = rem.first_not_of(" \t");
7719 if(pos)
7720 {
7721 pos = pos != npos ? pos : rem.len;
7722 _c4dbgpf("skipping indentation of {}", pos);
7723 _line_progressed(pos);
7724 rem = m_evt_handler->m_curr->line_contents.rem;
7725 if(!rem.len)
7726 return;
7727 _c4dbgpf("rem is now {}", _prs(rem));
7728 }
7729
7730 if(rem.begins_with('['))
7731 {
7732 _c4dbgp("it's a seq");
7733 _check_trailing_doc_token();
7734 _maybe_begin_doc();
7735 m_evt_handler->begin_seq_val_flow();
7736 addrem_flags(RSEQ|RFLOW|RVAL, RUNK|RTOP|RDOC);
7737 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7738 m_doc_empty = false;
7739 _line_progressed(1);
7740 }
7741 else if(rem.begins_with('{'))
7742 {
7743 _c4dbgp("it's a map");
7744 _check_trailing_doc_token();
7745 _maybe_begin_doc();
7746 m_evt_handler->begin_map_val_flow();
7747 addrem_flags(RMAP|RFLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7748 m_doc_empty = false;
7749 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7750 _line_progressed(1);
7751 }
7752 else if(_handle_bom())
7753 {
7754 _c4dbgp("byte order mark");
7755 }
7756 else
7757 {
7758 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
7759 _maybe_skip_whitespace_tokens();
7760 csubstr s = m_evt_handler->m_curr->line_contents.rem;
7761 if(!s.len)
7762 return;
7763 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7764 const char first = s.str[0];
7765 ScannedScalar sc;
7766 if(first == '"')
7767 {
7768 _c4dbgp("runk_json: scanning double-quoted scalar");
7769 _check_trailing_doc_token();
7770 _maybe_begin_doc();
7771 add_flags(RDOC);
7772 m_doc_empty = false;
7773 sc = _scan_scalar_dquot();
7774 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7775 if(!_maybe_scan_following_colon())
7776 {
7777 _c4dbgp("runk_json: set as val");
7778 _handle_annotations_before_blck_val_scalar();
7779 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7780 }
7781 else
7782 {
7783 _c4err("parse error");
7784 }
7785 }
7786 else if(_scan_scalar_plain_unk(&sc))
7787 {
7788 _c4dbgp("runk_json: got a plain scalar");
7789 _check_trailing_doc_token();
7790 _maybe_begin_doc();
7791 add_flags(RDOC);
7792 m_doc_empty = false;
7793 if(!_maybe_scan_following_colon())
7794 {
7795 _c4dbgp("runk_json: set as val");
7796 _handle_annotations_before_blck_val_scalar();
7797 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7798 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7799 }
7800 else
7801 {
7802 _c4err("parse error"); // LCOV_EXCL_LINE
7803 }
7804 }
7805 else
7806 {
7807 _c4err("parse error"); // LCOV_EXCL_LINE
7808 }
7809 }
7810}
7811
7812
7813//-----------------------------------------------------------------------------
7814
7815template<class EventHandler>
7816void ParseEngine<EventHandler>::_handle_unk()
7817{
7818 _c4dbgpf("handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7819
7820 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP), m_evt_handler->m_curr->pos);
7821 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RTOP), m_evt_handler->m_curr->pos);
7822
7823 _maybe_skipchars(' ');
7824 _maybe_skip_comment();
7825
7826 if(!m_evt_handler->m_curr->line_contents.rem.len)
7827 return;
7828
7829 _c4dbgpf("runk: rem is now {}", _prs(m_evt_handler->m_curr->line_contents.rem));
7830
7831 if(m_evt_handler->m_curr->line_contents.indentation == 0u && (m_evt_handler->m_curr->at_line_beginning() || (m_bom_len && (m_evt_handler->m_curr->pos.line == m_bom_line))))
7832 {
7833 _c4dbgpf("runk: rtop: zero indent + at line begin. offset={}", m_evt_handler->m_curr->pos.offset);
7834 _c4dbgp("runk: check BOM");
7835 if(_handle_bom())
7836 {
7837 m_bom_line = m_evt_handler->m_curr->pos.line;
7838 _c4dbgpf("runk: byte order mark! line={} offset={}", m_bom_line, m_evt_handler->m_curr->pos.offset);
7839 return;
7840 }
7841 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7842 _c4dbgpf("runk: rtop: first={}", _c4prc(first));
7843 if(first == '-')
7844 {
7845 _c4dbgp("runk: rtop: suspecting doc");
7846 if(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7847 {
7848 _c4dbgp("runk: rtop: begin doc");
7849 _maybe_end_doc();
7850 _begin2_doc_expl();
7851 _set_indentation(0);
7852 addrem_flags(RDOC|RUNK, NDOC);
7853 _line_progressed(3u);
7854 _maybe_skip_whitespace_tokens();
7855 return;
7856 }
7857 }
7858 else if(first == '.')
7859 {
7860 _c4dbgp("runk: rtop: suspecting doc end");
7861 if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
7862 {
7863 _c4dbgp("runk: rtop: end doc");
7864 if(has_any(RDOC))
7865 {
7866 _end2_doc_expl();
7867 }
7868 else
7869 {
7870 _c4dbgp("runk: rtop: ignore end doc");
7871 }
7872 addrem_flags(NDOC|RUNK, RDOC);
7873 _line_progressed(3u);
7874 _maybe_skip_whitespace_tokens();
7875 _check_doc_end_tokens();
7876 return;
7877 }
7878 }
7879 else if(first == '%')
7880 {
7881 _c4dbgpf("directive: {}", m_evt_handler->m_curr->line_contents.rem);
7882 if(C4_UNLIKELY(has_any(RDOC) || (!m_doc_empty && has_none(NDOC))))
7883 _c4err("need document footer before directives");
7884 _handle_directive(m_evt_handler->m_curr->line_contents.rem);
7885 return;
7886 }
7887 }
7888
7889 /* no else-if! */
7890
7891 size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7892 size_t remindent = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
7893 if(m_bom_len)
7894 {
7895 _c4dbgpf("runk: prev BOMlen={}", m_bom_len);
7896 if(m_evt_handler->m_curr->pos.line == m_bom_line)
7897 {
7898 _c4dbgpf("runk: BOM remindent={} offset={}", remindent, m_evt_handler->m_curr->pos.offset);
7899 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, remindent >= m_bom_len, m_evt_handler->m_curr->pos);
7900 remindent -= m_bom_len;
7901 }
7902 else
7903 {
7904 m_bom_len = 0;
7905 }
7906 }
7907
7908 size_t startcol = _handle_block_skip_leading_whitespace();
7909 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7910
7911 if(first == '[')
7912 {
7913 _c4dbgp("runk: flow seq?");
7914 _handle_unk_begin_doc();
7915 if(C4_LIKELY( ! _annotations_require_key_container()))
7916 {
7917 _c4dbgp("runk: it's a seq, flow");
7918 _handle_annotations_before_blck_val_scalar();
7919 m_evt_handler->begin_seq_val_flow();
7920 addrem_flags(RSEQ|RFLOW|RVAL, RUNK|RTOP|RDOC);
7921 _set_indentation(0);
7922 }
7923 else
7924 {
7925 _c4dbgp("runk: start new block map, set flow seq as key (!)");
7926 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7927 m_evt_handler->begin_map_val_block();
7928 addrem_flags(RMAP|RBLCK|RKEY, RUNK|RTOP|RDOC);
7929 _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7930 m_evt_handler->begin_seq_key_flow();
7931 addrem_flags(RSEQ|RFLOW|RVAL, RMAP|RBLCK|RKEY);
7932 _set_indentation(0);
7933 }
7934 _line_progressed(1);
7935 }
7936 else if(first == '{')
7937 {
7938 _c4dbgp("runk: flow map?");
7939 _handle_unk_begin_doc();
7940 if(C4_LIKELY( ! _annotations_require_key_container()))
7941 {
7942 _c4dbgp("runk: it's a map, flow");
7943 _handle_annotations_before_blck_val_scalar();
7944 m_evt_handler->begin_map_val_flow();
7945 addrem_flags(RMAP|RFLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7946 _set_indentation(0);
7947 }
7948 else
7949 {
7950 _c4dbgp("runk: start new block map, set flow map as key (!)");
7951 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7952 m_evt_handler->begin_map_val_block();
7953 addrem_flags(RMAP|RBLCK|RKEY, RUNK|RTOP|RDOC);
7954 _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7955 m_evt_handler->begin_map_key_flow();
7956 addrem_flags(RMAP|RFLOW, RBLCK);
7957 _set_indentation(0);
7958 }
7959 _line_progressed(1);
7960 }
7961 else if(first == '-' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7962 {
7963 _c4dbgp("runk: it's a seq, block");
7964 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7965 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col, /*skip_annotations*/false);
7966 _handle_unk_begin_doc();
7967 _handle_annotations_before_blck_val_scalar();
7968 m_evt_handler->begin_seq_val_block();
7969 addrem_flags(RSEQ|RBLCK|RVAL, RNXT|RTOP|RUNK|RDOC);
7970 _set_indentation(startindent);
7971 _line_progressed(1);
7972 _maybe_skipchars(' ');
7973 }
7974 else if(first == '?' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7975 {
7976 _c4dbgp("runk: it's a map + this key is complex");
7977 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7978 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col, /*skip_annotations*/false);
7979 _handle_block_check_leading_tabs(startcol);
7980 _handle_unk_begin_doc();
7981 _handle_annotations_before_blck_val_scalar();
7982 m_evt_handler->begin_map_val_block();
7983 addrem_flags(RMAP|RBLCK|QMRK, RKEY|RVAL|RTOP|RUNK|RDOC);
7984 _set_indentation(startindent);
7985 _line_progressed(1);
7986 _maybe_skipchars(' ');
7987 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7988 {
7989 _c4dbgp("runk: seqblck key starts after ?");
7990 addrem_flags(RKCL, QMRK);
7991 m_evt_handler->begin_seq_key_block();
7992 addrem_flags(RSEQ|RVAL, RMAP|RKCL);
7993 _save_indentation();
7994 _line_progressed(1);
7995 _maybe_skipchars(' ');
7996 }
7997 }
7998 else if(first == ':' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7999 {
8000 if(m_doc_empty || (m_pending_anchors.num_entries | m_pending_tags.num_entries))
8001 {
8002 _c4dbgp("runk: it's a map with an empty key");
8003 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
8004 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col);
8005 _handle_block_check_leading_tabs(startcol);
8006 const size_t startline = m_evt_handler->m_curr->pos.line; // save
8007 _handle_unk_begin_doc();
8008 _handle_annotations_before_start_mapblck(startline);
8009 _handle_colon();
8010 m_evt_handler->begin_map_val_block();
8011 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8012 m_evt_handler->set_key_scalar_plain_empty();
8013 _set_indentation(startindent);
8014 }
8015 else
8016 {
8017 _c4err("block colon cannot occur on a new line unless ? is used");
8018 }
8019 addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
8020 _line_progressed(1);
8021 _maybe_skip_whitespace_tokens();
8022 }
8023 else if(first == '&')
8024 {
8025 csubstr anchor = _scan_anchor();
8026 _c4dbgpf("anchor! {}", _prs(anchor));
8027 const size_t line = m_evt_handler->m_curr->pos.line;
8028 _handle_unk_begin_doc();
8029 _add_annotation(&m_pending_anchors, anchor, remindent, line);
8030 _set_indentation(0);
8031 }
8032 else if(first == '*')
8033 {
8034 csubstr ref = _scan_ref_map();
8035 _c4dbgpf("runk: ref! {}", _prs(ref));
8036 _handle_unk_begin_doc();
8037 if(!_maybe_scan_following_colon())
8038 {
8039 _c4dbgp("runk: set val ref");
8040 _handle_valref(ref);
8041 }
8042 else
8043 {
8044 _c4dbgp("runk: start new block map, set ref as key");
8045 _handle_block_check_leading_tabs(startcol);
8046 const size_t startline = m_evt_handler->m_curr->pos.line; // save
8047 _handle_annotations_before_start_mapblck(startline);
8048 m_evt_handler->begin_map_val_block();
8049 _handle_keyref(ref);
8050 _maybe_skip_whitespace_tokens();
8051 _set_indentation(0);
8052 addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
8053 }
8054 }
8055 else if(first == '!')
8056 {
8057 csubstr tag_orig;
8058 csubstr tag = _scan_tag(&tag_orig);
8059 _c4dbgpf("runk: val tag! {}", _prs(tag));
8060 // we need to buffer the tags, as there may be two
8061 // consecutive tags in here
8062 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
8063 const size_t line = m_evt_handler->m_curr->pos.line;
8064 _add_annotation(&m_pending_tags, tag, indentation, line, tag_orig);
8065 }
8066 else
8067 {
8068 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
8069 const size_t startscalar = _handle_block_get_whitespace_mark();
8070 const size_t startline = m_evt_handler->m_curr->pos.line; // save
8071 auto beginmap = [&](size_t startindent_){
8072 if(C4_UNLIKELY(m_evt_handler->m_curr->pos.line > startline))
8073 _c4err("multiline scalars cannot be used as implicit keys");
8074 _handle_block_check_leading_tabs(startcol, startscalar);
8075 _handle_annotations_before_start_mapblck(startline);
8076 _handle_colon();
8077 m_evt_handler->begin_map_val_block();
8078 _handle_annotations_and_indentation_after_start_mapblck(startindent_, startline);
8079 };
8080 auto after_beginmap = [&](size_t startindent_){
8081 _maybe_skip_whitespace_tokens();
8082 _set_indentation(startindent_);
8083 addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
8084 };
8085 if(first == '|')
8086 {
8087 _c4dbgp("runk: block-literal scalar");
8088 _handle_unk_begin_doc();
8089 ScannedBlock sb;
8090 _scan_block(&sb, startindent);
8091 _handle_annotations_before_blck_val_scalar();
8092 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8093 m_evt_handler->set_val_scalar_literal(maybe_filtered);
8094 }
8095 else if(first == '>')
8096 {
8097 _c4dbgp("runk: block-folded scalar");
8098 _handle_unk_begin_doc();
8099 ScannedBlock sb;
8100 _scan_block(&sb, startindent);
8101 _handle_annotations_before_blck_val_scalar();
8102 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8103 m_evt_handler->set_val_scalar_folded(maybe_filtered);
8104 }
8105 else if(first == '\'')
8106 {
8107 _c4dbgp("runk: single-quoted scalar");
8108 _handle_unk_begin_doc();
8109 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8110 size_t col = m_evt_handler->m_curr->pos.col;
8111 ScannedScalar sc = _scan_scalar_squot();
8112 if(!_maybe_scan_following_colon())
8113 {
8114 _c4dbgp("runk: set as val");
8115 _handle_annotations_before_blck_val_scalar();
8116 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8117 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8118 }
8119 else
8120 {
8121 _c4dbgp("runk: start new block map, set single-quoted scalar as key");
8122 if(!firsttoken)
8123 startindent = _handle_unk_check_left_tokens(startindent, col);
8124 beginmap(startindent);
8125 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8126 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8127 after_beginmap(startindent);
8128 }
8129 }
8130 else if(first == '"')
8131 {
8132 _c4dbgp("runk: double-quoted scalar");
8133 _handle_unk_begin_doc();
8134 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8135 size_t col = m_evt_handler->m_curr->pos.col;
8136 ScannedScalar sc = _scan_scalar_dquot();
8137 if(!_maybe_scan_following_colon())
8138 {
8139 _c4dbgp("runk: set as val");
8140 _handle_annotations_before_blck_val_scalar();
8141 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8142 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8143 }
8144 else
8145 {
8146 _c4dbgp("runk: start new block map, set double-quoted scalar as key");
8147 if(!firsttoken)
8148 startindent = _handle_unk_check_left_tokens(startindent, col);
8149 beginmap(startindent);
8150 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8151 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8152 after_beginmap(startindent);
8153 }
8154 }
8155 else
8156 {
8157 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8158 size_t col = m_evt_handler->m_curr->pos.col;
8159 ScannedScalar sc;
8160 if(_scan_scalar_plain_unk(&sc))
8161 {
8162 _c4dbgp("runk: plain scalar");
8163 _handle_unk_begin_doc();
8164 if(!_maybe_scan_following_colon())
8165 {
8166 _c4dbgp("runk: set as val");
8167 _handle_annotations_before_blck_val_scalar();
8168 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8169 m_evt_handler->set_val_scalar_plain(maybe_filtered);
8170 }
8171 else
8172 {
8173 _c4dbgp("runk: start new block map, set plain scalar as key");
8174 if(!firsttoken)
8175 startindent = _handle_unk_check_left_tokens(startindent, col);
8176 beginmap(startindent);
8177 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8178 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8179 after_beginmap(startindent);
8180 }
8181 }
8182 else
8183 {
8184 _c4err("parse error"); // LCOV_EXCL_LINE
8185 }
8186 }
8187 }
8188}
8189
8190template<class EventHandler>
8191void ParseEngine<EventHandler>::_handle_unk_begin_doc()
8192{
8193 _c4dbgp("runk: begin doc");
8194 _check_trailing_doc_token();
8195 _maybe_begin_doc();
8196 add_flags(RDOC);
8197 m_doc_empty = false;
8198}
8199
8200template<class EventHandler>
8201size_t ParseEngine<EventHandler>::_handle_unk_check_left_tokens(size_t realindent, size_t col, bool skip_annotations)
8202{
8203 _c4assert(col >= 1);
8204 col -= 1;
8205 _c4assert(col >= m_bom_len);
8206 csubstr s = m_evt_handler->m_curr->line_contents.full.range(m_bom_len, col);
8207 size_t pos = 0;
8208 _c4dbgpf("runk: check left tokens: s={}", _prs(s, /*escape*/true));
8209 if(skip_annotations)
8210 {
8211 _handle_unk_get_first_non_pending_token_pos(s, &realindent, &pos);
8212 _c4dbgpf("runk: skip annotations: realindent={} pos={}", realindent, pos);
8213 }
8214 size_t firstns = s.first_not_of(' ', pos);
8215 if(firstns == npos)
8216 firstns = s.len;
8217 _c4dbgpf("runk: check left tokens:\n"
8218 " tokens={} skipped={}\n"
8219 " bomlen={} first={} col={}\n"
8220 " (bomlen+first)={} vs {}=col\n"
8221 " startindent={} lineindent={}"
8222 , _prs(s, /*escape*/true), _prs(s.sub(firstns), /*escape*/true)
8223 , m_bom_len, firstns, col
8224 , m_bom_len+firstns, col,
8225 realindent, m_evt_handler->m_curr->line_contents.indentation);
8226 if(m_bom_len + firstns != col)
8227 _c4err("parse error");
8228 if(!skip_annotations)
8229 realindent = firstns;
8230 _c4dbgpf("runk: pos={} firstns={} -> realindent={}", pos, firstns, realindent);
8231 return realindent;
8232}
8233
8234
8235/** skip annotations which are pending on the same line */
8236template<class EventHandler>
8237void ParseEngine<EventHandler>::_handle_unk_get_first_non_pending_token_pos(csubstr s, size_t *indent, size_t *first_non_token_pos)
8238{
8239 csubstr first, second;
8240 uint32_t total = _get_annotations_same_line(s, &first, &second);
8241 _c4dbgpf("runk: before skip: {}", _prs(s, true));
8242 size_t pos = s.first_not_of(" \t");
8243 if(pos == npos)
8244 pos = s.len;
8245 if(!total)
8246 {
8247 *indent = *first_non_token_pos = pos;
8248 return;
8249 }
8250 _c4assert(!s.sub(pos).begins_with_any(" \t"));
8251 _c4dbgpf("runk: after skip leading {} whitespace: {}", pos, _prs(s.sub(pos), true));
8252 _c4dbgpf("runk: first annotation: {}", first);
8253 _c4assert(first.len);
8254 _c4assert(first.is_sub(s));
8255 _c4assert(first.is_sub(s.sub(pos)));
8256 _c4assert(s.sub(pos).begins_with(first));
8257 *indent = pos;
8258 pos += first.len;
8259 _c4dbgpf("runk: after skip first annotation: pos={} {}", pos, _prs(s.sub(pos), true));
8260 if(total > 1)
8261 {
8262 _c4dbgpf("runk: second annotation: {}", second);
8263 _c4assert(total == 2);
8264 _c4assert(second.len);
8265 _c4assert(second.is_sub(s));
8266 _c4assert(second.is_sub(s.sub(pos)));
8267 csubstr spos = s.sub(pos);
8268 size_t more = spos.first_not_of(" \t");
8269 _c4assert(more != npos); // because the annotations are on the same line
8270 _c4dbgpf("runk: next nonspace: {}", pos + more);
8271 pos += more;
8272 _c4dbgpf("runk: after skip annotation whitespace: pos={} {}", pos, _prs(s.sub(pos), true));
8273 _c4assert(s.sub(pos).begins_with(second));
8274 pos += second.len;
8275 _c4dbgpf("runk: after skip annotation 2: pos={} {}", pos, _prs(s.sub(pos), true));
8276 }
8277 *first_non_token_pos = pos;
8278}
8279
8280
8281template<class EventHandler>
8282uint32_t ParseEngine<EventHandler>::_get_annotations_same_line(csubstr token_soup, csubstr *first_, csubstr *second_) const
8283{
8284 _c4assert(!m_evt_handler->m_curr->at_first_token());
8285 (void)token_soup;
8286 using EntryPtr = typename Annotation::Entry const* C4_RESTRICT;
8287 EntryPtr first = nullptr;
8288 EntryPtr second = nullptr;
8289 uint32_t total = (uint32_t)(m_pending_anchors.num_entries + m_pending_tags.num_entries);
8290 if(total)
8291 {
8292 _c4dbgpf("there are {} pending annotations: {} anchors + {} tags", total, m_pending_anchors.num_entries, m_pending_tags.num_entries);
8293 auto valid_if_same_line = [this](EntryPtr entry){
8294 _c4dbgpf("pending: {} indent={} line={} vs currline={}", _maybe_null_str(entry->str), entry->indentation, entry->line, m_evt_handler->m_curr->pos.line);
8295 return (entry->line == m_evt_handler->m_curr->pos.line) ? entry : nullptr;
8296 };
8297 // now select annotations only on the same line
8298 total = 0;
8299 for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8300 total += !!valid_if_same_line(&m_pending_anchors.annotations[i]);
8301 for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
8302 total += !!valid_if_same_line(&m_pending_tags.annotations[i]);
8303 _c4dbgpf("{} annotations on same line", total);
8304 _c4assert(total > 0); // because this function is only called
8305 // while not at the first token. That
8306 // means we must have same-line
8307 // annotations.
8308 auto get_first_on_same_line = [this](EntryPtr not_this_one){
8309 for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8310 if(&m_pending_anchors.annotations[i] != not_this_one
8311 && m_pending_anchors.annotations[i].line == m_evt_handler->m_curr->pos.line)
8312 return &m_pending_anchors.annotations[i];
8313 for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
8314 if(&m_pending_tags.annotations[i] != not_this_one
8315 && m_pending_tags.annotations[i].line == m_evt_handler->m_curr->pos.line)
8316 return &m_pending_tags.annotations[i];
8317 C4_UNREACHABLE();
8318 return (EntryPtr)nullptr; // LCOV_EXCL_LINE
8319 };
8320 _c4assert(total >= 1);
8321 // assign to first
8322 first = get_first_on_same_line(nullptr);
8323 _c4assert(first);
8324 _c4dbgpf("first annotation: {} indent={} line={}", _maybe_null_str(first->str), first->indentation, first->line);
8325 if(total > 1)
8326 {
8327 _c4assert(total == 2);
8328 // assign to second
8329 second = get_first_on_same_line(first);
8330 _c4assert(second);
8331 _c4dbgpf("second annotation: {} indent={} line={}", _maybe_null_str(second->str), second->indentation, second->line);
8332 }
8333 auto extract_string = [&](EntryPtr e){
8334 // tags can be null when the arena ran out of space
8335 if(!e->str.str || e->str.begins_with_any("!<"))
8336 {
8337 csubstr tag = e->orig;
8338 _c4assert(tag.str);
8339 _c4assert(tag.len);
8340 _c4assert(tag.is_sub(token_soup));
8341 _c4dbgpf("tag: {} -> {}", _maybe_null_str(e->str), tag);
8342 return tag;
8343 }
8344 csubstr anchor = e->str;
8345 _c4assert(anchor.len);
8346 _c4assert(anchor.str);
8347 _c4assert(anchor.is_sub(token_soup));
8348 _c4assert(!anchor.begins_with('&'));
8349 _c4assert(anchor.str - token_soup.str > 0);
8350 // add back the anchor's &
8351 --anchor.str;
8352 ++anchor.len;
8353 _c4assert(anchor.begins_with('&'));
8354 _c4dbgpf("anchor: {} -> {}", e->str, anchor);
8355 return anchor;
8356 };
8357 *first_ = first ? extract_string(first) : nullptr;
8358 *second_ = second ? extract_string(second) : nullptr;
8359 if(total > 1 && (first_->str > second_->str))
8360 {
8361 csubstr tmp = *first_;
8362 *first_ = *second_;
8363 *second_ = tmp;
8364 _c4dbgpf("swap first and second: {} -> {}", *first_, *second_);
8365 }
8366 }
8367 return total;
8368}
8369
8370
8371//-----------------------------------------------------------------------------
8372
8373template<class EventHandler>
8374C4_COLD void ParseEngine<EventHandler>::_handle_usty()
8375{
8376 _c4dbgpf("handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
8377
8378 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK|RFLOW), m_evt_handler->m_curr->pos);
8379
8380 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
8381 if(has_any(RNXT))
8382 {
8383 _c4dbgp("usty[RNXT]: finishing!");
8384 _end_stream();
8385 }
8386 #endif
8387
8388 _maybe_skip_comment();
8389 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
8390 if(!rem.len)
8391 return;
8392
8393 size_t pos = rem.first_not_of(" \t");
8394 if(pos)
8395 {
8396 pos = pos != npos ? pos : rem.len;
8397 _c4dbgpf("skipping indentation of {}", pos);
8398 _line_progressed(pos);
8399 rem = m_evt_handler->m_curr->line_contents.rem;
8400 if(!rem.len)
8401 return;
8402 _c4dbgpf("rem is now {}", _prs(rem));
8403 }
8404
8405 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, rem.len > 0, m_evt_handler->m_curr->pos);
8406 size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
8407 char first = rem.str[0];
8408 if(has_any(RSEQ)) // destination is a sequence
8409 {
8410 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(RMAP), m_evt_handler->m_curr->pos);
8411 _c4dbgpf("usty[RSEQ]: first='{}'", _c4prc(first));
8412 if(first == '[')
8413 {
8414 _c4dbgp("usty[RSEQ]: it's a flow seq. merging it");
8415 add_flags(RNXT);
8416 m_evt_handler->_push();
8417 addrem_flags(RFLOW|RVAL, RNXT|USTY);
8418 _set_indentation(startindent);
8419 _line_progressed(1);
8420 _maybe_skip_whitespace_tokens();
8421 }
8422 else if(first == '-' && _is_blck_token(rem))
8423 {
8424 _c4dbgp("usty[RSEQ]: it's a block seq. merging it");
8425 add_flags(RNXT);
8426 m_evt_handler->_push();
8427 addrem_flags(RBLCK|RVAL, RNXT|USTY);
8428 _set_indentation(startindent);
8429 _line_progressed(1);
8430 _maybe_skip_whitespace_tokens();
8431 }
8432 else
8433 {
8434 _c4err("can only parse a seq into an existing seq");
8435 }
8436 }
8437 else if(has_any(RMAP)) // destination is a map
8438 {
8439 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ), m_evt_handler->m_curr->pos);
8440 _c4dbgpf("usty[RMAP]: first='{}'", _c4prc(first));
8441 if(first == '{')
8442 {
8443 _c4dbgp("usty[RMAP]: it's a flow map. merging it");
8444 add_flags(RNXT);
8445 _handle_annotations_before_blck_val_scalar();
8446 m_evt_handler->_push();
8447 addrem_flags(RMAP|RFLOW|RKEY, RNXT|USTY);
8448 _set_indentation(startindent);
8449 _line_progressed(1);
8450 _maybe_skip_whitespace_tokens();
8451 }
8452 else if(first == '?' && _is_blck_token(rem))
8453 {
8454 _c4dbgp("usty[RMAP]: it's a block map + this key is complex");
8455 add_flags(RNXT);
8456 _handle_annotations_before_blck_val_scalar();
8457 m_evt_handler->_push();
8458 addrem_flags(RMAP|RBLCK|QMRK, RNXT|USTY);
8459 _save_indentation();
8460 _line_progressed(1);
8461 _maybe_skip_whitespace_tokens();
8462 }
8463 else if(first == ':' && _is_blck_token(rem))
8464 {
8465 _c4dbgp("usty[RMAP]: it's a map with an empty key");
8466 add_flags(RNXT);
8467 _handle_annotations_before_blck_val_scalar();
8468 m_evt_handler->_push();
8469 m_evt_handler->set_key_scalar_plain_empty();
8470 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8471 _save_indentation();
8472 _line_progressed(1);
8473 _maybe_skip_whitespace_tokens();
8474 }
8475 else if(rem.begins_with('&'))
8476 {
8477 csubstr anchor = _scan_anchor();
8478 _c4dbgpf("usty[RMAP]: anchor! {}", _prs(anchor));
8479 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8480 const size_t line = m_evt_handler->m_curr->pos.line;
8481 _add_annotation(&m_pending_anchors, anchor, indentation, line);
8482 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8483 }
8484 else if(first == '*')
8485 {
8486 csubstr ref = _scan_ref_map();
8487 _c4dbgpf("usty[RMAP]: ref! {}", _prs(ref));
8488 if(!_maybe_scan_following_colon())
8489 {
8490 _c4err("cannot read a VAL to a map");
8491 }
8492 else
8493 {
8494 _c4dbgp("usty[RMAP]: start new block map, set ref as key");
8495 const size_t startline = m_evt_handler->m_curr->pos.line; // save
8496 add_flags(RNXT);
8497 _handle_annotations_before_start_mapblck(startline);
8498 m_evt_handler->_push();
8499 _handle_keyref(ref);
8500 _maybe_skip_whitespace_tokens();
8501 _set_indentation(startindent);
8502 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8503 }
8504 }
8505 else if(first == '!')
8506 {
8507 csubstr tag = _scan_tag();
8508 _c4dbgpf("usty[RMAP]: val tag! {}", _prs(tag));
8509 // we need to buffer the tags, as there may be two
8510 // consecutive tags in here
8511 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8512 const size_t line = m_evt_handler->m_curr->pos.line;
8513 _add_annotation(&m_pending_tags, tag, indentation, line);
8514 }
8515 else if(first == '[' || (first == '-' && _is_blck_token(rem)))
8516 {
8517 _c4err("cannot parse a seq into an existing map");
8518 }
8519 else
8520 {
8521 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
8522 startindent = m_evt_handler->m_curr->line_contents.indentation; // save
8523 const size_t startline = m_evt_handler->m_curr->pos.line; // save
8524 ScannedScalar sc;
8525 _c4dbgpf("usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
8526 if(first == '\'')
8527 {
8528 _c4dbgp("usty[RMAP]: scanning single-quoted scalar");
8529 sc = _scan_scalar_squot();
8530 if(!_maybe_scan_following_colon())
8531 {
8532 _c4err("cannot read a VAL to a map");
8533 }
8534 else
8535 {
8536 _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
8537 add_flags(RNXT);
8538 _handle_annotations_before_start_mapblck(startline);
8539 m_evt_handler->_push();
8540 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8541 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8542 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8543 _set_indentation(startindent);
8544 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8545 _maybe_skip_whitespace_tokens();
8546 }
8547 }
8548 else if(first == '"')
8549 {
8550 _c4dbgp("usty[RMAP]: scanning double-quoted scalar");
8551 sc = _scan_scalar_dquot();
8552 if(!_maybe_scan_following_colon())
8553 {
8554 _c4err("cannot read a VAL to a map");
8555 }
8556 else
8557 {
8558 _c4dbgp("usty[RMAP]: start new block map, set double-quoted scalar as key");
8559 add_flags(RNXT);
8560 _handle_annotations_before_start_mapblck(startline);
8561 m_evt_handler->_push();
8562 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8563 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8564 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8565 _set_indentation(startindent);
8566 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8567 _maybe_skip_whitespace_tokens();
8568 }
8569 }
8570 else if(first == '|')
8571 {
8572 _c4err("block literal keys must be enclosed in '?'");
8573 }
8574 else if(first == '>')
8575 {
8576 _c4err("block literal keys must be enclosed in '?'");
8577 }
8578 else if(_scan_scalar_plain_unk(&sc))
8579 {
8580 _c4dbgp("usty[RMAP]: got a plain scalar");
8581 if(!_maybe_scan_following_colon())
8582 {
8583 _c4err("cannot read a VAL to a map");
8584 }
8585 else
8586 {
8587 _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
8588 add_flags(RNXT);
8589 _handle_annotations_before_start_mapblck(startline);
8590 m_evt_handler->_push();
8591 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8592 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8593 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8594 _set_indentation(startindent);
8595 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8596 _maybe_skip_whitespace_tokens();
8597 }
8598 }
8599 else
8600 {
8601 _c4err("parse error"); // LCOV_EXCL_LINE
8602 }
8603 }
8604 }
8605 else // destination is unknown
8606 {
8607 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ), m_evt_handler->m_curr->pos);
8608 _c4dbgpf("usty[UNK]: first='{}'", _c4prc(first));
8609 if(first == '[')
8610 {
8611 _c4dbgp("usty[UNK]: it's a flow seq");
8612 add_flags(RNXT);
8613 _handle_annotations_before_blck_val_scalar();
8614 m_evt_handler->begin_seq_val_flow();
8615 addrem_flags(RSEQ|RFLOW|RVAL, RNXT|USTY);
8616 _set_indentation(startindent);
8617 _line_progressed(1);
8618 _maybe_skip_whitespace_tokens();
8619 }
8620 else if(first == '-' && _is_blck_token(rem))
8621 {
8622 _c4dbgp("usty[UNK]: it's a block seq");
8623 add_flags(RNXT);
8624 _handle_annotations_before_blck_val_scalar();
8625 m_evt_handler->begin_seq_val_block();
8626 addrem_flags(RSEQ|RBLCK|RVAL, RNXT|USTY);
8627 _set_indentation(startindent);
8628 _line_progressed(1);
8629 _maybe_skip_whitespace_tokens();
8630 }
8631 else if(first == '{')
8632 {
8633 _c4dbgp("usty[UNK]: it's a flow map");
8634 add_flags(RNXT);
8635 _handle_annotations_before_blck_val_scalar();
8636 m_evt_handler->begin_map_val_flow();
8637 addrem_flags(RMAP|RFLOW|RKEY, RNXT|USTY);
8638 _set_indentation(startindent);
8639 _line_progressed(1);
8640 _maybe_skip_whitespace_tokens();
8641 }
8642 else if(first == '?' && _is_blck_token(rem))
8643 {
8644 _c4dbgp("usty[UNK]: it's a map + this key is complex");
8645 add_flags(RNXT);
8646 _handle_annotations_before_blck_val_scalar();
8647 m_evt_handler->begin_map_val_block();
8648 addrem_flags(RMAP|RBLCK|QMRK, RNXT|USTY);
8649 _save_indentation();
8650 _line_progressed(1);
8651 _maybe_skip_whitespace_tokens();
8652 }
8653 else if(first == ':' && _is_blck_token(rem))
8654 {
8655 _c4dbgp("usty[UNK]: it's a map with an empty key");
8656 add_flags(RNXT);
8657 _handle_annotations_before_blck_val_scalar();
8658 m_evt_handler->begin_map_val_block();
8659 m_evt_handler->set_key_scalar_plain_empty();
8660 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8661 _save_indentation();
8662 _line_progressed(1);
8663 _maybe_skip_whitespace_tokens();
8664 }
8665 else if(first == '&')
8666 {
8667 csubstr anchor = _scan_anchor();
8668 _c4dbgpf("usty[UNK]: anchor! {}", _prs(anchor));
8669 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8670 const size_t line = m_evt_handler->m_curr->pos.line;
8671 _add_annotation(&m_pending_anchors, anchor, indentation, line);
8672 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8673 }
8674 else if(first == '*')
8675 {
8676 csubstr ref = _scan_ref_map();
8677 _c4dbgpf("usty[UNK]: ref! {}", _prs(ref));
8678 if(!_maybe_scan_following_colon())
8679 {
8680 _c4dbgp("usty[UNK]: set val ref");
8681 _handle_valref(ref);
8682 }
8683 else
8684 {
8685 _c4dbgp("usty[UNK]: start new block map, set ref as key");
8686 const size_t startline = m_evt_handler->m_curr->pos.line; // save
8687 add_flags(RNXT);
8688 _handle_annotations_before_start_mapblck(startline);
8689 m_evt_handler->begin_map_val_block();
8690 _handle_keyref(ref);
8691 _maybe_skip_whitespace_tokens();
8692 _set_indentation(startindent);
8693 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8694 }
8695 }
8696 else if(first == '!')
8697 {
8698 csubstr tag = _scan_tag();
8699 _c4dbgpf("usty[UNK]: val tag! {}", _prs(tag));
8700 // we need to buffer the tags, as there may be two
8701 // consecutive tags in here
8702 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8703 const size_t line = m_evt_handler->m_curr->pos.line;
8704 _add_annotation(&m_pending_tags, tag, indentation, line);
8705 }
8706 else
8707 {
8708 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
8709 startindent = m_evt_handler->m_curr->line_contents.indentation; // save
8710 const size_t startline = m_evt_handler->m_curr->pos.line; // save
8711 first = rem.str[0];
8712 ScannedScalar sc;
8713 _c4dbgpf("usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
8714 if(first == '\'')
8715 {
8716 _c4dbgp("usty[UNK]: scanning single-quoted scalar");
8717 sc = _scan_scalar_squot();
8718 if(!_maybe_scan_following_colon())
8719 {
8720 _c4dbgp("usty[UNK]: set as val");
8721 _handle_annotations_before_blck_val_scalar();
8722 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8723 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8724 _end_stream();
8725 }
8726 else
8727 {
8728 _c4dbgp("usty[UNK]: start new block map, set scalar as key");
8729 add_flags(RNXT);
8730 _handle_annotations_before_start_mapblck(startline);
8731 m_evt_handler->begin_map_val_block();
8732 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8733 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8734 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8735 _set_indentation(startindent);
8736 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8737 _maybe_skip_whitespace_tokens();
8738 }
8739 }
8740 else if(first == '"')
8741 {
8742 _c4dbgp("usty[UNK]: scanning double-quoted scalar");
8743 sc = _scan_scalar_dquot();
8744 if(!_maybe_scan_following_colon())
8745 {
8746 _c4dbgp("usty[UNK]: set as val");
8747 _handle_annotations_before_blck_val_scalar();
8748 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8749 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8750 _end_stream();
8751 }
8752 else
8753 {
8754 _c4dbgp("usty[UNK]: start new block map, set double-quoted scalar as key");
8755 add_flags(RNXT);
8756 _handle_annotations_before_start_mapblck(startline);
8757 m_evt_handler->begin_map_val_block();
8758 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8759 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8760 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8761 _set_indentation(startindent);
8762 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8763 _maybe_skip_whitespace_tokens();
8764 }
8765 }
8766 else if(first == '|')
8767 {
8768 _c4dbgp("usty[UNK]: scanning block-literal scalar");
8769 ScannedBlock sb;
8770 _scan_block(&sb, startindent);
8771 _c4dbgp("usty[UNK]: set as val");
8772 _handle_annotations_before_blck_val_scalar();
8773 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8774 m_evt_handler->set_val_scalar_literal(maybe_filtered);
8775 _end_stream();
8776 }
8777 else if(first == '>')
8778 {
8779 _c4dbgp("usty[UNK]: scanning block-folded scalar");
8780 ScannedBlock sb;
8781 _scan_block(&sb, startindent);
8782 _c4dbgp("usty[UNK]: set as val");
8783 _handle_annotations_before_blck_val_scalar();
8784 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8785 m_evt_handler->set_val_scalar_folded(maybe_filtered);
8786 _end_stream();
8787 }
8788 else if(_scan_scalar_plain_unk(&sc))
8789 {
8790 _c4dbgp("usty[UNK]: got a plain scalar");
8791 if(!_maybe_scan_following_colon())
8792 {
8793 _c4dbgp("usty[UNK]: set as val");
8794 _handle_annotations_before_blck_val_scalar();
8795 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8796 m_evt_handler->set_val_scalar_plain(maybe_filtered);
8797 _end_stream();
8798 }
8799 else
8800 {
8801 _c4dbgp("usty[UNK]: start new block map, set scalar as key");
8802 add_flags(RNXT);
8803 _handle_annotations_before_start_mapblck(startline);
8804 m_evt_handler->begin_map_val_block();
8805 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8806 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8807 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8808 _set_indentation(startindent);
8809 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8810 _maybe_skip_whitespace_tokens();
8811 }
8812 }
8813 else
8814 {
8815 _c4err("parse error"); // LCOV_EXCL_LINE
8816 }
8817 }
8818 }
8819}
8820
8821
8822//-----------------------------------------------------------------------------
8823
8824template<class EventHandler>
8826{
8827 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8828 _RYML_SAVE_TEST_JSON(filename, src);
8829 m_evt_handler->start_parse(filename.str, src);
8830 m_evt_handler->begin_stream();
8831 _reset();
8832 while( ! _finished_file())
8833 {
8834 _scan_line();
8835 while( ! _finished_line())
8836 {
8838 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8839 if(has_any(RSEQ))
8840 {
8841 _handle_seq_json();
8842 }
8843 else if(has_any(RMAP))
8844 {
8845 _handle_map_json();
8846 }
8847 else if(has_any(RUNK))
8848 {
8849 _handle_unk_json();
8850 }
8851 else
8852 {
8853 _c4err("internal error"); // LCOV_EXCL_LINE
8854 }
8855 }
8856 if(_finished_file())
8857 break; // it may have finished because of multiline blocks
8858 _line_ended();
8859 }
8860 _end_stream();
8861 m_evt_handler->finish_parse();
8862}
8863
8864
8865//-----------------------------------------------------------------------------
8866
8867template<class EventHandler>
8869{
8870 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8871 _RYML_SAVE_TEST_YAML(filename, src);
8872 m_evt_handler->start_parse(filename.str, src);
8873 m_evt_handler->begin_stream();
8874 _reset();
8875 while( ! _finished_file())
8876 {
8877 _scan_line();
8878 while( ! _finished_line())
8879 {
8881 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8882 if(has_any(RFLOW))
8883 {
8884 if(has_none(RSEQIMAP))
8885 {
8886 if(has_any(RSEQ))
8887 {
8888 _handle_seq_flow();
8889 }
8890 else
8891 {
8892 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
8893 _handle_map_flow();
8894 }
8895 }
8896 else
8897 {
8898 _handle_seq_imap();
8899 }
8900 }
8901 else if(has_any(RBLCK))
8902 {
8903 if(has_any(RSEQ))
8904 {
8905 _handle_seq_block();
8906 }
8907 else
8908 {
8909 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
8910 _handle_map_block();
8911 }
8912 }
8913 else if(has_any(RUNK))
8914 {
8915 _handle_unk();
8916 }
8917 else if(has_any(USTY))
8918 {
8919 _handle_usty();
8920 }
8921 else
8922 {
8923 _c4err("internal error"); // LCOV_EXCL_LINE
8924 }
8925 }
8926 if(_finished_file())
8927 break; // it may have finished because of multiline blocks
8928 _line_ended();
8929 }
8930 _end_stream();
8931 m_evt_handler->finish_parse();
8932}
8933/** @endcond */
8934
8935} // namespace yml
8936} // namespace c4
8937
8938// NOLINTEND(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered,modernize-avoid-c-style-cast)
8939
8940#undef _c4dbgnextline
8941#undef _c4assert
8942#undef _c4err
8943
8944C4_SUPPRESS_WARNING_MSVC_POP
8945C4_SUPPRESS_WARNING_GCC_CLANG_POP
8946
8947#endif // _C4_YML_PARSE_ENGINE_DEF_HPP_
Lightweight generic type-safe wrappers for converting individual values to/from strings.
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
ParseEngine(EventHandler *evt_handler, ParserOptions opts={})
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&) noexcept
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
Definition common.hpp:28
#define RYML_NOEXCEPT
Conditionally expands to noexcept when RYML_USE_ASSERT is 0 and is empty otherwise.
Definition common.hpp:197
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
void err_parse(ErrorDataParse const &errdata, const char *msg)
trigger a parse error to its respective handler, with a non-formatted error message.
Definition common.cpp:210
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
Definition charconv.hpp:903
basic_substring< char > substr
a mutable string view
Definition substr.hpp:2356
basic_substring< const char > csubstr
an immutable string view
Definition substr.hpp:2357
bool is_valid_tag_handle(csubstr handle)
Definition tag.cpp:210
bool is_custom_tag(csubstr tag)
is a tag of the form !handle!tag?
Definition tag.cpp:9
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
int ParserFlag_t
data type for ParserState_e
@ RTOP
reading at top level
@ RSET
the (implicit) map being read is a !!set.
@ RSEQ
reading a seq
@ RNXT
read next sibling
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ RDOC
reading a document
@ QSCL
stored scalar was quoted
@ RBLCK
reading in block mode
@ RMAP
reading a map
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a val
@ RFLOW
reading is inside explicit flow chars: [] or {}
size_t adjust_pos_with_escapes(csubstr scalar, size_t pos, bool keep_newlines=false)
Adjust a position in a scalar, increasing it to account for any escaped characters.
size_t to_chars(substr buf, escaped_scalar e)
formatting implementation to escape a scalar with escape_scalar()
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition common.hpp:249
@ npos
a null string position
Definition common.hpp:263
@ UTF16BE
UTF16, Big-Endian.
Definition common.hpp:271
@ UTF8
UTF8.
Definition common.hpp:269
@ UTF16LE
UTF16, Little-Endian.
Definition common.hpp:270
@ NOBOM
No Byte Order Mark was found.
Definition common.hpp:268
@ UTF32BE
UTF32, Big-Endian.
Definition common.hpp:273
@ UTF32LE
UTF32, Little-Endian.
Definition common.hpp:272
@ NONE
an index to none
Definition common.hpp:256
enum c4::yml::Encoding_ Encoding_e
csubstr version()
Definition version.cpp:6
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition common.cpp:14
#define _prflag(fl, txt)
#define _c4dbgnextline()
#define _ryml_relocate(s)
#define _c4err(...)
#define _RYML_SAVE_TEST_YAML(filename, src)
#define _c4assert(...)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _RYML_WITH_TAB_TOKENS(...)
#define _RYML_SAVE_TEST_JSON(filename, src)
basic_substring range(size_t first, size_t last=npos) const noexcept
return [first,last[.
Definition substr.hpp:520
size_t first_not_of(const C c) const
Definition substr.hpp:994
basic_substring triml(const C c) const
trim left
Definition substr.hpp:630
size_t first_of(const C c, size_t start=0) const
Definition substr.hpp:935
basic_substring first(size_t num) const noexcept
return the first num elements: [0,num[
Definition substr.hpp:530
basic_substring sub(size_t first) const noexcept
return [first,len[
Definition substr.hpp:503
basic_substring trimr(const C c) const
trim the character c from the right
Definition substr.hpp:654
C * str
a restricted pointer to the first character of the substring
Definition substr.hpp:216
Data for a parse error.
Definition common.hpp:329
Filters an input string into a different output string.
Abstracts the fact that a scalar filter result may not fit in the intended memory.
Abstracts the fact that a scalar filter result may not fit in the intended memory.
Helper to control the line contents while parsing a buffer.
holds a source or yaml file position, for example when an error is detected; See also location_format...
Definition common.hpp:289
csubstr name
name of the file
Definition common.hpp:293
Options to give to the parser to control its behavior.
Definition common.hpp:355
Accelerator structure to reduce memory requirements by enabling reuse of resolved tags.
Definition tag.hpp:71
formatting helper to escape a scalar with escape_scalar_fn()
utilities for UTF and Byte Order Mark