rapidyaml 0.15.2
parse and emit YAML, and do it fast
Loading...
Searching...
No Matches
parse_engine.def.hpp
Go to the documentation of this file.
1#ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
2#define _C4_YML_PARSE_ENGINE_DEF_HPP_
3
4#ifndef _C4_YML_PARSE_ENGINE_HPP_
6#endif
7#ifndef _C4_CHARCONV_HPP_
8#include "c4/charconv.hpp"
9#endif
10#ifndef C4_UTF_HPP_
11#include "c4/utf.hpp"
12#endif
13#ifndef _C4_YML_FILTER_PROCESSOR_HPP_
15#endif
16#ifndef _C4_YML_TAG_HPP_
17#include "c4/yml/tag.hpp"
18#endif
19#ifndef _C4_YML_NODE_TYPE_HPP_
20#include "c4/yml/node_type.hpp"
21#endif
22
23#ifndef _C4_YML_DETAIL_DBGPRINT_HPP_
24#include "c4/yml/detail/dbgprint.hpp"
25#endif
26
27#ifdef RYML_DBG
28#ifndef C4_DUMP_HPP_
29#include <c4/dump.hpp>
30#endif
31#define _c4err(...) \
32 do { RYML_DEBUG_BREAK(); this->_err(RYML_LOC_HERE(), __VA_ARGS__); } while(0)
33#else
34#define _c4err(...) \
35 this->_err(RYML_LOC_HERE(), __VA_ARGS__)
36#endif
37#define _c4assert(...) \
38 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, __VA_ARGS__, m_evt_handler->m_curr->pos)
39
40
41#if defined(RYML_WITH_TAB_TOKENS)
42#define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
43#define _RYML_WITHOUT_TAB_TOKENS(...)
44#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
45#else
46#define _RYML_WITH_TAB_TOKENS(...)
47#define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
48#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
49#endif
50
51// helper to export cases to the YAML test suite
52#ifndef RYML_SAVE_TEST_YAML
53#define _RYML_SAVE_TEST_YAML(filename, src)
54#define _RYML_SAVE_TEST_JSON(filename, src)
55#else
56#define _RYML_SAVE_TEST_YAML(filename, src) c4::yml::ryml_save_test_yaml(filename, src)
57#define _RYML_SAVE_TEST_JSON(filename, src) c4::yml::ryml_save_test_json(filename, src)
58namespace c4 {
59namespace yml {
60void ryml_save_test_yaml(csubstr filename, csubstr src);
61void ryml_save_test_json(csubstr filename, csubstr src);
62} // namespace yml
63} // namespace c4
64#endif
65
66
67// scaffold:
68#define _c4dbgnextline() \
69 do { \
70 _c4dbgq("\n-----------"); \
71 _c4dbgt("handling line={}, offset={}B", \
72 m_evt_handler->m_curr->pos.line, \
73 m_evt_handler->m_curr->pos.offset); \
74 } while(0)
75
76
77C4_SUPPRESS_WARNING_MSVC_PUSH
78C4_SUPPRESS_WARNING_MSVC(4296) // expression is always 'boolean_value'
79C4_SUPPRESS_WARNING_MSVC(4702) // unreachable code
80C4_SUPPRESS_WARNING_GCC_CLANG_PUSH
81C4_SUPPRESS_WARNING_GCC_CLANG("-Wtype-limits") // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
82C4_SUPPRESS_WARNING_GCC_CLANG("-Wformat-nonliteral")
83C4_SUPPRESS_WARNING_GCC_CLANG("-Wold-style-cast")
84#if defined(__GNUC__) && (__GNUC__ >= 6)
85C4_SUPPRESS_WARNING_GCC("-Wnull-dereference")
86#endif
87#if defined(__GNUC__) && (__GNUC__ >= 7)
88C4_SUPPRESS_WARNING_GCC("-Wduplicated-branches")
89#endif
90
91// NOLINTBEGIN(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered,modernize-avoid-c-style-cast)
92
93namespace c4 {
94namespace yml {
95
96namespace { // NOLINT
97
98C4_HOT C4_ALWAYS_INLINE void _set_first(substr &C4_RESTRICT subject, size_t pos) noexcept
99{
100 // avoids reassigning the ptr in substr
101 subject.len = pos != npos ? pos : subject.len;
102}
103C4_HOT C4_ALWAYS_INLINE void _set_first(csubstr &C4_RESTRICT subject, size_t pos) noexcept
104{
105 // avoids reassigning the ptr in substr
106 subject.len = pos != npos ? pos : subject.len;
107}
108C4_HOT C4_ALWAYS_INLINE void _set_first_strict(substr &C4_RESTRICT subject, size_t pos) RYML_NOEXCEPT
109{
110 // avoids reassigning the ptr in substr
111 _RYML_ASSERT_BASIC(pos != npos); // LCOV_EXCL_LINE
112 subject.len = pos;
113}
114C4_HOT C4_ALWAYS_INLINE void _set_first_strict(csubstr &C4_RESTRICT subject, size_t pos) RYML_NOEXCEPT
115{
116 // avoids reassigning the ptr in substr
117 _RYML_ASSERT_BASIC(pos != npos); // LCOV_EXCL_LINE
118 subject.len = pos;
119}
120
121C4_HOT C4_ALWAYS_INLINE bool _is_blck_token(csubstr s) RYML_NOEXCEPT
122{
123 _RYML_ASSERT_BASIC(s.len > 0);
124 _RYML_ASSERT_BASIC(s.str[0] == '-' || s.str[0] == ':' || s.str[0] == '?');
125 return ((s.len == 1) || ((s.str[1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[1] == '\t'))));
126}
127
128C4_HOT C4_ALWAYS_INLINE bool _is_blck_seq_token_maybe(csubstr const& C4_RESTRICT s) noexcept
129{
130 return ((s.len >= 1) && (s.str[0] == '-') && ((s.len == 1) || ((s.str[1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[1] == '\t')))));
131}
132
133inline bool _is_doc_begin_token(csubstr s) RYML_NOEXCEPT
134{
135 _RYML_ASSERT_BASIC(s.begins_with('-'));
136 _RYML_ASSERT_BASIC(!s.ends_with("\n"));
137 _RYML_ASSERT_BASIC(!s.ends_with("\r"));
138 return (s.len >= 3 && s.str[1] == '-' && s.str[2] == '-')
139 && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
140}
141
142inline bool _is_doc_end_token(csubstr s) RYML_NOEXCEPT
143{
144 _RYML_ASSERT_BASIC(s.begins_with('.'));
145 _RYML_ASSERT_BASIC(!s.ends_with("\n"));
146 _RYML_ASSERT_BASIC(!s.ends_with("\r"));
147 return (s.len >= 3 && s.str[1] == '.' && s.str[2] == '.')
148 && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
149}
150
151inline bool _is_doc_token(csubstr s) noexcept
152{
153 if(s.len >= 3)
154 {
155 switch(s.str[0])
156 {
157 case '-':
158 //return _is_doc_begin_token(s); // this was failing with gcc -O2
159 return (s.str[1] == '-' && s.str[2] == '-')
160 && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
161 case '.':
162 //return _is_doc_end_token(s); // this was failing with gcc -O2
163 return (s.str[1] == '.' && s.str[2] == '.')
164 && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
165 }
166 }
167 return false;
168}
169
170inline size_t _begins_with_special_json_scalar(csubstr s) RYML_NOEXCEPT
171{
172 _RYML_ASSERT_BASIC(s.len);
173 switch(s.str[0])
174 {
175 case 'f':
176 return s.begins_with("false") ? 5u : 0u;
177 case 't':
178 return s.begins_with("true") ? 4u : 0u;
179 case 'n':
180 return s.begins_with("null") ? 4u : 0u;
181 }
182 return 0u;
183}
184
185
186//-----------------------------------------------------------------------------
187
188C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following)
189{
190 return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n');
191}
192
193//! look for the next newline chars, and jump to the right of those
194inline substr _from_next_line(substr rem)
195{
196 size_t nlpos = rem.first_of("\r\n");
197 if(nlpos == csubstr::npos)
198 return {};
199 const char nl = rem[nlpos];
200 rem = rem.right_of(nlpos);
201 if(rem.empty())
202 return {};
203 if(_extend_from_combined_newline(nl, rem.front()))
204 rem = rem.sub(1);
205 return rem;
206}
207
208
209//-----------------------------------------------------------------------------
210
211inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i)
212{
213 _RYML_ASSERT_BASIC(r[*i] == '\n');
214 size_t numnl_following = 0;
215 ++(*i);
216 for( ; *i < r.len; ++(*i))
217 {
218 if(r.str[*i] == '\n')
219 ++numnl_following;
220 // skip leading whitespace
221 else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
222 ;
223 else
224 break;
225 }
226 return numnl_following;
227}
228
229/** @p i is set to the first non whitespace character after the line
230 * @return the number of empty lines after the initial position */
231inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation)
232{
233 _RYML_ASSERT_BASIC(r[*i] == '\n');
234 size_t numnl_following = 0;
235 ++(*i);
236 if(indentation == 0)
237 {
238 for( ; *i < r.len; ++(*i))
239 {
240 const char c = r.str[*i];
241 if(c == '\n')
242 ++numnl_following;
243 // skip leading whitespace
244 else if(c != ' ' && c != '\t' && c != '\r')
245 break;
246 }
247 }
248 else
249 {
250 for( ; *i < r.len; ++(*i))
251 {
252 char c = r.str[*i];
253 if(c == '\n')
254 {
255 ++numnl_following;
256 // skip the indentation after the newline
257 size_t stop = *i + indentation;
258 for( ; *i < r.len; ++(*i))
259 {
260 c = r.str[*i];
261 if(c != ' ' && c != '\r')
262 break;
263 _RYML_ASSERT_BASIC(*i < stop); // LCOV_EXCL_LINE
264 }
265 C4_UNUSED(stop);
266 }
267 // skip leading whitespace
268 else if(c != ' ' && c != '\t' && c != '\r')
269 {
270 break;
271 }
272 }
273 }
274 return numnl_following;
275}
276
277} // anon namespace
278
279
280//-----------------------------------------------------------------------------
281//-----------------------------------------------------------------------------
282//-----------------------------------------------------------------------------
283
284template<class EventHandler>
286{
287 _free();
288 _clr();
289}
290
291template<class EventHandler>
292ParseEngine<EventHandler>::ParseEngine(EventHandler *evt_handler, ParserOptions const& opts)
293 : m_options(opts)
294 , m_evt_handler(evt_handler)
295 , m_pending_anchors()
296 , m_pending_tags()
297 , m_has_directives_yaml(false)
298 , m_has_directives(false)
299 , m_doc_empty(true)
300 , m_prev_colon(npos)
301 , m_prev_val_end(npos)
302 , m_encoding(NOBOM)
303 , m_newline_offsets()
304 , m_newline_offsets_size(0)
305 , m_newline_offsets_capacity(0)
306{
307 _RYML_CHECK_BASIC(evt_handler);
308}
309
310template<class EventHandler>
312 : m_options(that.m_options)
313 , m_evt_handler(that.m_evt_handler)
314 , m_pending_anchors(that.m_pending_anchors)
315 , m_pending_tags(that.m_pending_tags)
316 , m_has_directives_yaml(that.m_has_directives_yaml)
317 , m_has_directives(that.m_has_directives)
318 , m_doc_empty(that.m_doc_empty)
319 , m_prev_colon(npos)
320 , m_prev_val_end(npos)
321 , m_encoding(NOBOM)
322 , m_newline_offsets(that.m_newline_offsets)
323 , m_newline_offsets_size(that.m_newline_offsets_size)
324 , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
325{
326 that._clr();
327}
328
329template<class EventHandler>
331 : m_options(that.m_options)
332 , m_evt_handler(that.m_evt_handler)
333 , m_pending_anchors(that.m_pending_anchors)
334 , m_pending_tags(that.m_pending_tags)
335 , m_has_directives_yaml(that.m_has_directives_yaml)
336 , m_has_directives(that.m_has_directives)
337 , m_doc_empty(that.m_doc_empty)
338 , m_prev_colon(npos)
339 , m_prev_val_end(npos)
340 , m_encoding(NOBOM)
341 , m_newline_offsets()
342 , m_newline_offsets_size()
343 , m_newline_offsets_capacity()
344{
345 if(that.m_newline_offsets_capacity)
346 {
347 _resize_locations(that.m_newline_offsets_capacity);
348 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
349 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
350 m_newline_offsets_size = that.m_newline_offsets_size;
351 }
352}
353
354template<class EventHandler>
356{
357 _free();
358 m_options = (that.m_options);
359 m_evt_handler = that.m_evt_handler;
360 m_pending_anchors = that.m_pending_anchors;
361 m_pending_tags = that.m_pending_tags;
362 m_has_directives_yaml = that.m_has_directives_yaml;
363 m_has_directives = that.m_has_directives;
364 m_doc_empty = that.m_doc_empty;
365 m_prev_colon = that.m_prev_colon;
366 m_prev_val_end = that.m_prev_val_end;
367 m_encoding = that.m_encoding;
368 m_newline_offsets = (that.m_newline_offsets);
369 m_newline_offsets_size = (that.m_newline_offsets_size);
370 m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
371 that._clr();
372 return *this;
373}
374
375template<class EventHandler>
377{
378 if(&that != this)
379 {
380 _free();
381 m_options = (that.m_options);
382 m_evt_handler = that.m_evt_handler;
383 m_pending_anchors = that.m_pending_anchors;
384 m_pending_tags = that.m_pending_tags;
385 m_has_directives_yaml = that.m_has_directives_yaml;
386 m_has_directives = that.m_has_directives;
387 m_doc_empty = that.m_doc_empty;
388 m_prev_colon = that.m_prev_colon;
389 m_prev_val_end = that.m_prev_val_end;
390 m_encoding = that.m_encoding;
391 if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
392 _resize_locations(that.m_newline_offsets_capacity);
393 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
394 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
395 memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
396 m_newline_offsets_size = that.m_newline_offsets_size;
397 }
398 return *this;
399}
400
401template<class EventHandler>
402void ParseEngine<EventHandler>::_clr()
403{
404 m_options = {};
405 m_evt_handler = {};
406 m_pending_anchors = {};
407 m_pending_tags = {};
408 m_has_directives_yaml = false;
409 m_has_directives = false;
410 m_doc_empty = true;
411 m_prev_colon = npos;
412 m_prev_val_end = npos;
413 m_encoding = NOBOM;
414 m_newline_offsets = {};
415 m_newline_offsets_size = {};
416 m_newline_offsets_capacity = {};
417}
418
419template<class EventHandler>
420void ParseEngine<EventHandler>::_free()
421{
422 if(m_newline_offsets)
423 {
424 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
425 m_newline_offsets = nullptr;
426 m_newline_offsets_size = 0u;
427 m_newline_offsets_capacity = 0u;
428 }
429}
430
431
432//-----------------------------------------------------------------------------
433
434template<class EventHandler>
435void ParseEngine<EventHandler>::_reset()
436{
437 m_pending_anchors = {};
438 m_pending_tags = {};
439 m_has_directives_yaml = false;
440 m_has_directives = false;
441 m_doc_empty = true;
442 m_prev_colon = npos;
443 m_prev_val_end = npos;
444 m_bom_len = 0;
445 m_encoding = NOBOM;
446 m_bom_line = 0;
447 if(m_options.locations())
448 {
449 _prepare_locations();
450 }
451}
452
453
454//-----------------------------------------------------------------------------
455
456template<class EventHandler>
457void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena, substr *other)
458{
459 _c4dbgp("relocate to new arena");
460 const char *pb = prev_arena.str;
461 const char *pe = prev_arena.str + prev_arena.len;
462 #define _ryml_relocate(s) \
463 if((s).str >= pb && (s).str <= pe) \
464 { \
465 (s).str = next_arena.str + ((s).str - pb); \
466 } \
467 ((void)0)
468 for(ParserState &st : m_evt_handler->m_stack)
469 {
470 _ryml_relocate(st.line_contents.rem);
471 _ryml_relocate(st.line_contents.full);
472 }
473 _ryml_relocate(m_evt_handler->m_src);
474 for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
475 {
476 _ryml_relocate(m_pending_tags.annotations[i].str); // LCOV_EXCL_LINE
477 _ryml_relocate(m_pending_tags.annotations[i].orig); // LCOV_EXCL_LINE
478 }
479 for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
480 {
481 _ryml_relocate(m_pending_anchors.annotations[i].str);
482 _ryml_relocate(m_pending_anchors.annotations[i].orig);
483 }
484 {
485 TagDirectives &tds = m_evt_handler->tag_directives();
486 for(size_t i = 0, sz = tds.size(); i < sz; ++i)
487 {
488 _ryml_relocate(tds.m_directives[i].handle);
489 _ryml_relocate(tds.m_directives[i].prefix);
490 }
491 }
492 {
493 TagCache &tch = m_evt_handler->tag_cache();
494 for(id_type i = 0, sz = tch.m_entries.size(); i < sz; ++i)
495 {
496 _ryml_relocate(tch.m_entries[i].tag);
497 _ryml_relocate(tch.m_entries[i].resolved);
498 }
499 }
500 if(other)
501 {
502 _ryml_relocate(*other);
503 }
504 #undef _ryml_relocate
505}
506
507/** @cond dev */
508template<class EventHandler>
510{
511 csubstr prev = m_evt_handler->arena();
512 substr out = m_evt_handler->alloc_arena(len);
513 substr curr = m_evt_handler->arena();
514 if(curr.str != prev.str)
515 _relocate_arena(prev, curr, other);
516 return out;
517}
518/** @endcond */
519
520
521//-----------------------------------------------------------------------------
522
523#ifdef RYML_DBG
524template<class EventHandler>
525template<class DumpFn>
526C4_NO_INLINE void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn) const
527{
528 ParserState const *const C4_RESTRICT st = m_evt_handler->m_curr;
529 LineContents const& C4_RESTRICT lc = st->line_contents;
530 csubstr contents = lc.full.first(lc.num_cols);
531 if(contents.len)
532 {
533 // print the yaml src line
534 size_t offs = 3u + to_chars(substr{}, st->pos.line) + to_chars(substr{}, st->pos.col);
535 csubstr m_file = m_evt_handler->m_curr->pos.name;
536 if(m_file.len)
537 {
538 _dbg_dump(std::forward<DumpFn>(dumpfn), "{}:", m_file);
539 offs += m_file.len + 1;
540 }
541 _dbg_dump(std::forward<DumpFn>(dumpfn), "{}:{}: ", st->pos.line, st->pos.col);
542 csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
543 csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("..."));
544 _dbg_dump(std::forward<DumpFn>(dumpfn), "{}{} (size={})\n", escaped_scalar(maybe_full_content, /*escape*/true), maybe_ellipsis, contents.len);
545 // highlight the remaining portion of the previous line
546 size_t firstcol = (size_t)(lc.rem.str - lc.full.str);
547 size_t lastcol = firstcol + lc.rem.len;
548 size_t firstcol_adj = adjust_pos_with_escapes(lc.full, firstcol);
549 size_t len = adjust_pos_with_escapes(lc.rem, lc.rem.len);
550 for(size_t i = 0; i < offs + firstcol_adj; ++i)
551 std::forward<DumpFn>(dumpfn)(" ");
552 std::forward<DumpFn>(dumpfn)("^");
553 for(size_t i = 1, e = (len < 80u ? len : 80u); i < e; ++i)
554 std::forward<DumpFn>(dumpfn)("~");
555 _dbg_dump(std::forward<DumpFn>(dumpfn), "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
556 }
557 else
558 {
559 std::forward<DumpFn>(dumpfn)("\n");
560 }
561 // next line: print the state flags
562 {
563 char flagbuf_[128];
564 _dbg_dump(std::forward<DumpFn>(dumpfn), "top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
565 }
566}
567
568template<class EventHandler>
570{
571 if(_dbg_enabled())
572 {
573 for(ParserState const& s : m_evt_handler->m_stack)
574 _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(buf, s.flags));
575 }
576}
577
578template<class EventHandler>
580{
581 char buf[128];
582 _print_state_stack(buf);
583}
584#endif
585
586
587//-----------------------------------------------------------------------------
588
589template<class EventHandler>
590template<class ...Args>
591C4_NORETURN C4_NO_INLINE void ParseEngine<EventHandler>::_err(Location const& cpploc, Location const& ymlloc, const char* fmt, Args const& ...args) const
592{
593 m_evt_handler->cancel_parse();
594 err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, ymlloc}, fmt, args...);
595}
596
597template<class EventHandler>
598template<class ...Args>
599C4_NORETURN C4_NO_INLINE void ParseEngine<EventHandler>::_err(Location const& cpploc, const char *fmt, Args const& ...args) const
600{
601 m_evt_handler->cancel_parse();
602 err_parse(m_evt_handler->m_stack.m_callbacks, ErrorDataParse{cpploc, m_evt_handler->m_curr->pos}, fmt, args...);
603}
604
605
606//-----------------------------------------------------------------------------
607#ifdef RYML_DBG
608template<class EventHandler>
609template<class ...Args>
610void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args const& ...args) const
611{
612 if(_dbg_enabled())
613 {
614 _dbg_printf(fmt, args...);
615 _dbg_dumper("\n");
616 _fmt_msg(_dbg_dumper);
617 }
618}
619#endif
620
621
622//-----------------------------------------------------------------------------
623template<class EventHandler>
624bool ParseEngine<EventHandler>::_finished_file() const
625{
626 bool ret = m_evt_handler->m_curr->pos.offset >= _buf().len;
627 #ifdef RYML_DBG
628 if(ret)
629 {
630 _c4dbgp("finished file!!!");
631 }
632 #endif
633 return ret;
634}
635
636template<class EventHandler>
637C4_HOT C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_finished_line() const // LCOV_EXCL_LINE
638{
639 return m_evt_handler->m_curr->line_contents.rem.empty();
640}
641
642
643//-----------------------------------------------------------------------------
644
645template<class EventHandler>
646void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
647{
648 if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' _RYML_WITH_TAB_TOKENS(|| m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')))
649 {
650 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
651 if(pos == npos)
652 pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just all whitespace
653 _c4dbgpf("skip {} whitespace characters", pos);
654 _line_progressed(pos);
655 }
656}
657
658template<class EventHandler>
659void ParseEngine<EventHandler>::_maybe_skipchars(char c)
660{
661 if(m_evt_handler->m_curr->line_contents.rem.len && m_evt_handler->m_curr->line_contents.rem.str[0] == c)
662 {
663 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(c);
664 if(pos == npos)
665 pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just all c
666 _c4dbgpf("skip {}x'{}'", pos, _c4prc(c));
667 _line_progressed(pos);
668 }
669}
670
671template<class EventHandler>
672template<size_t N>
673void ParseEngine<EventHandler>::_skipchars(const char (&chars)[N])
674{
675 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars), m_evt_handler->m_curr->pos);
676 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
677 if(pos == npos)
678 pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just whitespace
679 _c4dbgpf("skip {} characters", pos);
680 _line_progressed(pos);
681}
682
683template<class EventHandler>
684void ParseEngine<EventHandler>::_skip_comment()
685{
686 LineContents const& C4_RESTRICT lc = m_evt_handler->m_curr->line_contents;
687 const size_t col = m_evt_handler->m_curr->pos.col - 1u;
688 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, lc.rem.begins_with('#'), m_evt_handler->m_curr->pos);
689 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, lc.rem.is_sub(lc.full), m_evt_handler->m_curr->pos);
690 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col >= 1, m_evt_handler->m_curr->pos); // 1-based
691 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, col == ((size_t)(lc.rem.str - lc.full.str)), m_evt_handler->m_curr->pos);
692 // raise an error if the comment is not preceded by whitespace
693 if(lc.rem.str != lc.full.str) // not at line beginning
694 {
695 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, col > 0, m_evt_handler->m_curr->pos);
696 const char prev = lc.full.str[col - 1u];
697 if(C4_UNLIKELY(prev != ' ' && prev != '\t'))
698 _c4err("comment not preceded by whitespace");
699 }
700 _c4dbgpf("comment was '{}'", m_evt_handler->m_curr->line_contents.rem);
701 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
702}
703
704template<class EventHandler>
705void ParseEngine<EventHandler>::_maybe_skip_comment_strict()
706{
707 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
708 if(pos != npos)
709 {
710 if('#' == m_evt_handler->m_curr->line_contents.rem[pos])
711 {
712 _line_progressed(pos);
713 _skip_comment();
714 }
715 }
716}
717
718template<class EventHandler>
719void ParseEngine<EventHandler>::_maybe_skip_comment()
720{
721 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
722 if(pos != npos)
723 {
724 if('#' == m_evt_handler->m_curr->line_contents.rem[pos])
725 {
726 _line_progressed(pos);
727 _skip_comment();
728 }
729 }
730 else
731 {
732 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
733 }
734}
735
736template<class EventHandler>
737bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
738{
739 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
740 if(pos != npos)
741 {
742 if(':' == m_evt_handler->m_curr->line_contents.rem[pos])
743 {
744 // bump pos to skip the colon as well, and check the colon
745 // is followed by space or tab
746 if(++pos < m_evt_handler->m_curr->line_contents.rem.len)
747 {
748 const char next = m_evt_handler->m_curr->line_contents.rem.str[pos];
749 if(next == ' ' _RYML_WITH_TAB_TOKENS(|| next == '\t'))
750 ++pos;
751 else
752 return false;
753 }
754 _line_progressed(pos);
755 return true;
756 }
757 }
758 else
759 {
760 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
761 }
762 return false;
763}
764
765
766//-----------------------------------------------------------------------------
767
768template<class EventHandler>
769csubstr ParseEngine<EventHandler>::_scan_anchor()
770{
771 csubstr s = m_evt_handler->m_curr->line_contents.rem;
772 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with('&'), m_evt_handler->m_curr->pos);
773 csubstr anchor = s.range(1, s.first_of(" ,]}\t"));
774 _line_progressed(1u + anchor.len);
775 _maybe_skipchars(' ');
776 return anchor;
777}
778
779template<class EventHandler>
780csubstr ParseEngine<EventHandler>::_scan_ref_seq()
781{
782 csubstr s = m_evt_handler->m_curr->line_contents.rem;
783 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'), m_evt_handler->m_curr->pos);
784 _set_first(s, s.first_of(" ,]\t"));
785 _line_progressed(s.len);
786 return s;
787}
788
789template<class EventHandler>
790csubstr ParseEngine<EventHandler>::_scan_ref_map()
791{
792 csubstr s = m_evt_handler->m_curr->line_contents.rem;
793 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'), m_evt_handler->m_curr->pos);
794 _set_first(s, s.first_of(" ,}\t"));
795 _line_progressed(s.len);
796 return s;
797}
798
799template<class EventHandler>
800csubstr ParseEngine<EventHandler>::_scan_tag()
801{
802 csubstr t = m_evt_handler->m_curr->line_contents.rem;
803 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.begins_with('!'), m_evt_handler->m_curr->pos);
804 if(!t.begins_with("!<"))
805 {
806 _c4dbgp("begins with '!'");
807 _set_first(t, t.first_of(" ,]}\t"));
808 if(C4_UNLIKELY(t.first_of("[{") != npos))
809 _c4err("invalid tag");
810 _line_progressed(t.len);
811 if(m_options.resolve_tags_all() || (m_options.resolve_tags() && is_custom_tag(t)))
812 t = _resolve_tag(t);
813 }
814 else
815 {
816 _c4dbgp("begins with '!<'");
817 size_t pos = t.find('>');
818 if(C4_UNLIKELY(pos == npos))
819 _c4err("invalid tag");
820 _set_first_strict(t, pos+1);
821 _line_progressed(t.len);
822 t = t.sub(1);
823 }
824 _maybe_skip_whitespace_tokens();
825 return t;
826}
827
828template<class EventHandler>
829csubstr ParseEngine<EventHandler>::_scan_tag(csubstr *orig)
830{
831 csubstr t = m_evt_handler->m_curr->line_contents.rem;
832 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.begins_with('!'), m_evt_handler->m_curr->pos);
833 if(!t.begins_with("!<"))
834 {
835 _c4dbgp("begins with '!'");
836 _set_first(t, t.first_of(" ,\t"));
837 if(C4_UNLIKELY(t.first_of("[{") != npos))
838 _c4err("invalid tag");
839 _line_progressed(t.len);
840 *orig = t;
841 if(m_options.resolve_tags_all() || (m_options.resolve_tags() && is_custom_tag(t)))
842 t = _resolve_tag(t);
843 }
844 else
845 {
846 _c4dbgp("begins with '!<'");
847 size_t pos = t.find('>');
848 if(C4_UNLIKELY(pos == npos))
849 _c4err("invalid tag");
850 _set_first_strict(t, pos+1);
851 _line_progressed(t.len);
852 *orig = t;
853 t = t.sub(1);
854 }
855 _maybe_skip_whitespace_tokens();
856 return t;
857}
858
859
860//-----------------------------------------------------------------------------
861
862template<class EventHandler>
863bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_block_token(csubstr s)
864{
865 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
866 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any(":-"), m_evt_handler->m_curr->pos);
867 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count('\n') == 0, m_evt_handler->m_curr->pos);
868 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count('\r') == 0, m_evt_handler->m_curr->pos);
869 if(s.len > 1)
870 {
871 switch(s.str[1])
872 {
873 case ' ':
874 case ',':
875 case '}':
876 case ']':
877 case '\t':
878 if(s.str[0] == ':')
879 {
880 _c4dbgpf("not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
881 return false;
882 }
883 else
884 {
885 _c4err("invalid scalar");
886 }
887 break;
888 case '{':
889 case '[':
890 _c4err("invalid token \":{}\"", _c4prc(s.str[1]));
891 break;
892 default:
893 break;
894 }
895 }
896 else
897 {
898 if(s.str[0] == '-')
899 _c4err("invalid scalar");
900 return false;
901 }
902 return true;
903}
904
905template<class EventHandler>
906bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow_check_qmrk(csubstr s)
907{
908 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
909 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] == '?', m_evt_handler->m_curr->pos);
910 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count('\n') == 0, m_evt_handler->m_curr->pos);
911 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.count('\r') == 0, m_evt_handler->m_curr->pos);
912 if(s.len > 1)
913 {
914 switch(s.str[1])
915 {
916 case ' ':
917 case '\t':
918 _c4dbgpf("not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
919 return false;
920 case '{':
921 case '}':
922 case '[':
923 case ']':
924 _c4err("invalid token \"?{}\"", _c4prc(s.str[1]));
925 break;
926 default:
927 break;
928 }
929 }
930 else
931 {
932 return false;
933 }
934 return true;
935}
936
937
938template<class EventHandler>
939bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
940{
941 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.empty(), m_evt_handler->m_curr->pos);
942 // it's not a scalar if it starts with any of these characters:
943 switch(s.str[0])
944 {
945 // these are all legal tokens which mean no scalar is starting:
946 case '[':
947 case ']':
948 case '{':
949 case '}':
950 case '&':
951 case '*':
952 case '!':
953 case '|':
954 case '>':
955 case '#':
956 case ',':
957 _c4dbgpf("not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
958 return false;
959 // '-' and ':' are illegal at the beginning if not followed by a scalar character
960 case '-':
961 case ':':
962 _c4dbgpf("suspicious token='{}' len={}", _c4prc(s.str[0]), s.len);
963 return _is_valid_start_scalar_plain_flow_check_block_token(s);
964 case '?':
965 _c4dbgpf("qmrk='{}' len={}", _c4prc(s.str[0]), s.len);
966 return _is_valid_start_scalar_plain_flow_check_qmrk(s);
967 // everything else is a legal starting character
968 default:
969 return true;
970 }
971}
972
973
974template<class EventHandler>
975bool ParseEngine<EventHandler>::_scan_scalar_plain_handle_newline(csubstr s, size_t offs)
976{
977 _c4dbgpf("newl[PLAIN]: found '\\n'. offs={} line={} sofar={}", offs, m_evt_handler->m_curr->pos.line, _prs(s.first(offs), true));
978 if(s.len > offs + 1)
979 {
980 _c4dbgp("newl[PLAIN]: buffer continues");
981 csubstr next_line = s.sub(offs + 1);
982 size_t next_line_indentation = next_line.first_not_of(' ');
983 if(next_line_indentation != npos)
984 {
985 _c4dbgpf("newl[PLAIN]: line={} indentation={} indref={}", m_evt_handler->m_curr->pos.line + 1, next_line_indentation, m_evt_handler->m_curr->indref);
986 next_line = next_line.first(next_line.first_of("\n\r"));
987 _c4dbgpf("newl[PLAIN]: has indentation. next_line={}", _prs(next_line));
988 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, next_line_indentation <= next_line.len, m_evt_handler->m_curr->pos);
989 if(C4_LIKELY(next_line_indentation >= m_evt_handler->m_curr->indref))
990 {
991 _c4dbgp("newl[PLAIN]: larger indentation");
992 next_line = next_line.sub(next_line_indentation);
993 }
994 else if(C4_UNLIKELY(next_line.len && next_line.triml(' ').len))
995 {
996 _c4dbgp("newl[PLAIN]: err, smaller indentation");
997 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
998 _line_ended();
999 _scan_line();
1000 if(m_evt_handler->m_curr->line_contents.indentation != npos)
1001 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
1002 _c4err("parse error"); // cannot reduce indentation here
1003 }
1004 _c4dbgpf("newl[PLAIN]: next_line.len={}", next_line.len);
1005 if(next_line.len)
1006 {
1007 size_t fno = next_line.first_not_of(" \t");
1008 if(fno != csubstr::npos)
1009 {
1010 _c4assert(fno < next_line.len);
1011 switch(next_line.str[fno])
1012 {
1013 case ',': case ']': case '#':
1014 _c4dbgpf("newl[PLAIN]: found terminating character beginning next line: '{}'", next_line.str[fno]);
1015 return false;
1016 case ':': // cannot be succeeded by whitespace
1017 _c4dbgp("newl[PLAIN]: found :");
1018 if(fno + 1 == next_line.len || _is_blck_token(next_line.sub(fno)))
1019 {
1020 _c4dbgpf("newl[PLAIN]: found terminating character beginning next line: '{}'", next_line.str[fno]);
1021 return false;
1022 }
1023 break;
1024 }
1025 }
1026 }
1027 }
1028 }
1029 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
1030 _line_ended();
1031 _scan_line();
1032 return true;
1033}
1034
1035template<class EventHandler>
1036bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
1037{
1038 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP), m_evt_handler->m_curr->pos);
1039 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1040 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ|RSEQIMAP), m_evt_handler->m_curr->pos);
1041 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1042 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL), m_evt_handler->m_curr->pos);
1043
1044 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(' '), m_evt_handler->m_curr->pos);
1045 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with('\n'), m_evt_handler->m_curr->pos);
1046
1047 if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1048 return false;
1049
1050 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset);
1051 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1052
1053 _c4dbgp("scanning seqflow scalar...");
1054
1055 bool needs_filter = false;
1056 size_t col = 0; // zero-based column
1057 size_t offs = 0; // offset
1058 for( ; offs < s.len; ++offs, ++col)
1059 {
1060 const char c = s.str[offs];
1061 switch(c)
1062 {
1063 case ',':
1064 case ']':
1065 _c4dbgpf("found terminating character at {}: '{}'", offs, c);
1066 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1067 goto ended_scalar;
1068 case '\n':
1069 _c4dbgpf("found '\\n' at col={}", col);
1070 if(!_scan_scalar_plain_handle_newline(s, offs))
1071 goto ended_scalar;
1072 col = (size_t)-1; // so that col is 0 in the next loop iteration
1073 needs_filter = true;
1074 break;
1075 case '\r':
1076 --col; // don't count \r when calling _line_progressed()
1077 needs_filter = true;
1078 break;
1079 case ':':
1080 _c4dbgp("found suspicious ':'");
1081 if(s.len > offs + 1)
1082 {
1083 char next = s.str[offs + 1];
1084 _c4dbgpf("next char is '{}'", _c4prc(next));
1085 if(next == '\r')
1086 {
1087 csubstr after = s.sub(offs + 1).triml('\r');
1088 if(after.len)
1089 {
1090 next = after.str[0];
1091 _c4dbgpf("skip \\r to '{}'", _c4prc(next));
1092 }
1093 }
1094 // no else here.
1095 if(next == ' ' _RYML_WITH_TAB_TOKENS(|| next == '\t') || next == ',' || next == '\n' || next == ']')
1096 {
1097 _c4dbgp("map starting!");
1098 goto ended_scalar;
1099 }
1100 else
1101 {
1102 _c4dbgp("':' nothing to see here");
1103 }
1104 }
1105 else
1106 {
1107 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len == offs + 1, m_evt_handler->m_curr->pos);
1108 _line_progressed(col);
1109 _c4err("missing termination: '{}'", c); // noreturn
1110 }
1111 break;
1112 case '#':
1113 {
1114 _c4dbgp("found suspicious '#'");
1115 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1116 char prev = s.str[offs - 1];
1117 if(prev == ' ' _RYML_WITH_TAB_TOKENS(|| prev == '\t'))
1118 {
1119 _c4dbgpf("found terminating character at {}: '{}'", offs, c);
1120 goto ended_scalar;
1121 }
1122 }
1123 break;
1124 case '[':
1125 case '{':
1126 case '}':
1127 _line_progressed(col); // advance to report the proper position in the error
1128 _c4err("invalid character: '{}'", c); // noreturn
1129 case '-':
1130 case '.':
1131 _c4dbgpf("doc token character: '{}', offs={}", c, offs);
1132 if(offs == 0 && m_evt_handler->m_curr->at_line_beginning())
1133 {
1134 _c4dbgp("at line beginning");
1135 if(s.len >= 3 && s.str[1] == c && s.str[2] == c)
1136 {
1137 _c4err("parse error"); // no return
1138 }
1139 }
1140 break;
1141 default:
1142 ;
1143 }
1144 }
1145
1146ended_scalar:
1147
1148 _line_progressed(col);
1149 _set_first(s, offs);
1150 sc->scalar = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
1151 sc->needs_filter = needs_filter;
1152
1153 _c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true);
1154
1155 return true;
1156}
1157
1158template<class EventHandler>
1159bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
1160{
1161 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ) || has_any(RSEQIMAP), m_evt_handler->m_curr->pos);
1162 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1163 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP|RSEQIMAP), m_evt_handler->m_curr->pos);
1164 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1165 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK), m_evt_handler->m_curr->pos);
1166
1167 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with(' '), m_evt_handler->m_curr->pos);
1168 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->line_contents.rem.begins_with('\n'), m_evt_handler->m_curr->pos);
1169
1170 if(!m_evt_handler->m_curr->line_contents.rem.len || !_is_valid_start_scalar_plain_flow(m_evt_handler->m_curr->line_contents.rem))
1171 return false;
1172
1173 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset);
1174 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
1175
1176 _c4dbgp("scanning mapflow scalar...");
1177
1178 bool needs_filter = false;
1179 size_t col = 0; // zero-based column
1180 size_t offs = 0; // offset
1181 for( ; offs < s.len; ++offs, ++col)
1182 {
1183 const char c = s.str[offs];
1184 switch(c)
1185 {
1186 case ',':
1187 case '}':
1188 _c4dbgpf("found terminating character at {}: '{}'", offs, c);
1189 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, offs > 0, m_evt_handler->m_curr->pos);
1190 goto ended_scalar;
1191 case '\n':
1192 _c4dbgpf("found '\\n' at col={}", col);
1193 if(!_scan_scalar_plain_handle_newline(s, offs))
1194 goto ended_scalar;
1195 col = (size_t)-1; // so that col is 0 in the next loop iteration
1196 needs_filter = true;
1197 break;
1198 case '\r':
1199 --col; // don't count \r when calling _line_progressed()
1200 needs_filter = true;
1201 break;
1202 case ':':
1203 _c4dbgpf("found ':'", c);
1204 if(s.len == offs+1)
1205 break;
1206 {
1207 const char next = s.str[offs+1];
1208 _c4dbgpf("next='{}'", c);
1209 if(next == ' ' || next == ',' || next == '}' || next == '\n' || next == '\r' _RYML_WITH_TAB_TOKENS(|| next == '\t'))
1210 {
1211 _c4dbgpf("found terminating character: '{}'", c);
1212 goto ended_scalar;
1213 }
1214 }
1215 break;
1216 case '{':
1217 case '[':
1218 _line_progressed(col);
1219 _c4err("invalid character: '{}'", c); // noreturn
1220 break;
1221 case ']':
1222 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQIMAP), m_evt_handler->m_curr->pos);
1223 goto ended_scalar;
1224 default:
1225 ;
1226 }
1227 }
1228
1229ended_scalar:
1230
1231 _line_progressed(col);
1232 s = s.first(offs);
1233 sc->scalar = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
1234 sc->needs_filter = needs_filter;
1235
1236 _c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true);
1237
1238 return sc->scalar.len > 0u;
1239}
1240
1241template<class EventHandler>
1242bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
1243{
1244 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP), m_evt_handler->m_curr->pos);
1245 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1246 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ), m_evt_handler->m_curr->pos);
1247 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1248
1249 substr s = m_evt_handler->m_curr->line_contents.rem;
1250 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '), m_evt_handler->m_curr->pos);
1251 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1252
1253 _c4dbgp("seq_json: scanning scalar...");
1254
1255 switch(s.str[0])
1256 {
1257 case ']':
1258 case '{':
1259 case ',':
1260 _c4dbgp("seq_json: not a scalar.");
1261 return false;
1262 }
1263
1264 {
1265 const size_t len = _begins_with_special_json_scalar(s);
1266 if(len)
1267 {
1268 char c = s.len > len ? s.str[len] : ',';
1269 if(c == ',' || c == ']' || c == ' ' || c == '\n' || c == '\t' || c == '\r')
1270 {
1271 sc->scalar = s.first(len);
1272 sc->needs_filter = false;
1273 _c4dbgpf("seq_json: special scalar: '{}'", sc->scalar);
1274 _line_progressed(len);
1275 return true;
1276 }
1277 else
1278 {
1279 return false;
1280 }
1281 }
1282 }
1283
1284 // must be a number or special scalar
1285 size_t i = 0;
1286 for( ; i < s.len; ++i)
1287 {
1288 const char c = s.str[i];
1289 switch(c)
1290 {
1291 case ',':
1292 case ']':
1293 case ' ':
1294 case '\t':
1295 _c4dbgpf("seq_json: found terminating character: '{}'", c);
1296 goto ended_scalar;
1297 default:
1298 ;
1299 }
1300 }
1301
1302ended_scalar:
1303
1304 if(C4_LIKELY(i > 0))
1305 {
1306 _line_progressed(i);
1307 sc->scalar = s.first(i);
1308 sc->needs_filter = false;
1309 _c4dbgpf("seq_json: scalar was {}", _prs(sc->scalar, /*escape*/true));
1310 }
1311
1312 return true;
1313}
1314
1315template<class EventHandler>
1316bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
1317{
1318 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ), m_evt_handler->m_curr->pos);
1319 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK), m_evt_handler->m_curr->pos);
1320 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP), m_evt_handler->m_curr->pos);
1321 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1322 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL), m_evt_handler->m_curr->pos);
1323
1324 substr s = m_evt_handler->m_curr->line_contents.rem;
1325 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '), m_evt_handler->m_curr->pos);
1326 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1327
1328 _c4dbgp("scanning scalar...");
1329
1330 {
1331 const size_t len = _begins_with_special_json_scalar(s);
1332 if(len)
1333 {
1334 char c = s.len > len ? s.str[len] : ',';
1335 _c4dbgpf("begins with special scalar: {} next='{}'", s.first(len), _c4prc(c));
1336 if(c == ',' || c == '}' || c == ' ' || c == '\n' || c == '\t' || c == '\r')
1337 {
1338 sc->scalar = s.first(len);
1339 sc->needs_filter = false;
1340 _c4dbgpf("special json scalar: '{}'", _prs(sc->scalar));
1341 _line_progressed(len);
1342 return true;
1343 }
1344 else
1345 {
1346 return false;
1347 }
1348 }
1349 }
1350
1351 // must be a number
1352 size_t i = 0;
1353 for( ; i < s.len; ++i)
1354 {
1355 const char c = s.str[i];
1356 switch(c)
1357 {
1358 case ',':
1359 case '}':
1360 case ' ':
1361 case '\t':
1362 _c4dbgpf("found terminating character: '{}'", c);
1363 goto ended_scalar;
1364 default:
1365 ;
1366 }
1367 }
1368
1369ended_scalar:
1370
1371 if(C4_LIKELY(i > 0))
1372 {
1373 _line_progressed(i);
1374 sc->scalar = s.first(i);
1375 sc->needs_filter = false;
1376 _c4dbgpf("scalar was {}", _prs(sc->scalar));
1377 return true;
1378 }
1379
1380 return false;
1381}
1382
1383template<class EventHandler>
1384bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
1385{
1386 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] == '-', m_evt_handler->m_curr->pos);
1387 return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_begin_token(s));
1388}
1389
1390template<class EventHandler>
1391bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
1392{
1393 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s[0] == '.', m_evt_handler->m_curr->pos);
1394 return (m_evt_handler->m_curr->line_contents.indentation == 0u && m_evt_handler->m_curr->at_line_beginning() && _is_doc_end_token(s));
1395}
1396
1397template<class EventHandler>
1398bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc, size_t indentation)
1399{
1400 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1401 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP), m_evt_handler->m_curr->pos);
1402 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK|RUNK|USTY), m_evt_handler->m_curr->pos);
1403
1404 substr s = m_evt_handler->m_curr->line_contents.rem;
1405 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '), m_evt_handler->m_curr->pos);
1406 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.len > 0, m_evt_handler->m_curr->pos);
1407
1408 switch(s.str[0])
1409 {
1410 case '-':
1411 if(_is_blck_token(s))
1412 {
1413 return false;
1414 }
1415 else if(_is_doc_begin(s))
1416 {
1417 _c4dbgp("token is doc start");
1418 return false;
1419 }
1420 break;
1421 case ':':
1422 case '?':
1423 if(_is_blck_token(s))
1424 return false;
1425 break;
1426 case '[':
1427 case '{':
1428 case '&':
1429 case '*':
1430 case '!':
1431 case '\t':
1432 case ',':
1433 case '%':
1434 return false;
1435 case '.':
1436 if(_is_doc_end(s))
1437 {
1438 _c4dbgp("token is doc end");
1439 return false;
1440 }
1441 break;
1442 }
1443
1444 _c4dbgpf("plain scalar! indentation={}", indentation);
1445
1446 const size_t start_offset = m_evt_handler->m_curr->pos.offset;
1447 const size_t start_line = m_evt_handler->m_curr->pos.line;
1448
1449 bool needs_filter = false;
1450 while(true)
1451 {
1452 _c4dbgpf("plain scalar line: {}", _prs(s));
1453 for(size_t i = 0; i < s.len; ++i)
1454 {
1455 const char curr = s.str[i];
1456 //_c4dbgpf("[{}]='{}'", i, _c4prc(curr));
1457 switch(curr)
1458 {
1459 case ':':
1460 _c4dbgpf("[{}]: got suspicious ':'", i);
1461 // are there more characters?
1462 if((i + 1 == s.len) || ((s.str[i+1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[i+1] == '\t'))))
1463 {
1464 _c4dbgpf("followed by '{}'", i+1 == s.len ? csubstr("\\n") : _c4prc(s.str[i+1]));
1465 _line_progressed(i);
1466 // ': ' is accepted only on the first line
1467 if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
1468 {
1469 _c4dbgp("start line. scalar ends here");
1470 goto ended_scalar;
1471 }
1472 else
1473 {
1474 _c4err("multiline scalars cannot be used as keys");
1475 }
1476 }
1477 else
1478 {
1479 size_t j = i;
1480 while(j + 1 < s.len && s.str[j+1] == ':')
1481 {
1482 _c4dbgp("skip colon");
1483 ++j;
1484 }
1485 i = j > i ? j-1 : i;
1486 _c4dbgp("nothing to see here");
1487 }
1488 break;
1489 case '#':
1490 _c4dbgp("got suspicious '#'");
1491 if(!i || (s.str[i-1] == ' ' || s.str[i-1] == '\t'))
1492 {
1493 _c4dbgp("comment! scalar ends here");
1494 _line_progressed(i);
1495 goto ended_scalar;
1496 }
1497 else
1498 {
1499 _c4dbgp("nothing to see here");
1500 }
1501 break;
1502 }
1503 }
1504 _line_progressed(s.len);
1505 csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
1506 next_peeked = next_peeked.trimr("\n\r");
1507 const size_t next_indentation = next_peeked.first_not_of(' ');
1508 _c4dbgpf("indentation curr={} next={}", indentation, next_indentation);
1509 if(next_indentation < indentation)
1510 {
1511 _c4dbgp("smaller indentation! scalar ended");
1512 goto ended_scalar;
1513 }
1514 else if(next_indentation == 0 && next_peeked.len > 0)
1515 {
1516 const char first = next_peeked.str[0];
1517 switch(first)
1518 {
1519 case '-':
1520 _c4dbgpf("doc begin? peeked={}", _prs(next_peeked, size_t(3)));
1521 if(_is_doc_begin_token(next_peeked))
1522 {
1523 _c4dbgp("doc begin! scalar ended");
1524 goto ended_scalar;
1525 }
1526 break;
1527 case '.':
1528 _c4dbgpf("doc end? peeked={}", _prs(next_peeked, size_t(3)));
1529 if(_is_doc_end_token(next_peeked))
1530 {
1531 _c4dbgp("doc end! scalar ended");
1532 goto ended_scalar;
1533 }
1534 break;
1535 }
1536 }
1537 // load with next line
1538 _c4dbgp("next line!");
1539 if(!_finished_file())
1540 {
1541 _c4dbgp("next line!");
1542 _line_ended();
1543 _scan_line();
1544 }
1545 else
1546 {
1547 _c4dbgp("file finished!");
1548 goto ended_scalar;
1549 }
1550 s = m_evt_handler->m_curr->line_contents.rem;
1551 needs_filter = true;
1552 }
1553
1554ended_scalar:
1555
1556 sc->scalar = _buf().range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(" \n\r\t");
1557 sc->needs_filter = needs_filter;
1558
1559 _c4dbgpf("scalar was {}", _prs(sc->scalar));
1560
1561 return true;
1562}
1563
1564template<class EventHandler>
1565C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc) // LCOV_EXCL_LINE
1566{
1567 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RMAP), m_evt_handler->m_curr->pos);
1568 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1569 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP), m_evt_handler->m_curr->pos);
1570 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ), m_evt_handler->m_curr->pos);
1571 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK), m_evt_handler->m_curr->pos);
1572 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL), m_evt_handler->m_curr->pos);
1573 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1574}
1575
1576template<class EventHandler>
1577C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc) // LCOV_EXCL_LINE
1578{
1579 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ), m_evt_handler->m_curr->pos);
1580 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1581 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP), m_evt_handler->m_curr->pos);
1582 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RBLCK), m_evt_handler->m_curr->pos);
1583 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK), m_evt_handler->m_curr->pos);
1584 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
1585}
1586
1587template<class EventHandler>
1588C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc) // LCOV_EXCL_LINE
1589{
1590 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RUNK|USTY), m_evt_handler->m_curr->pos);
1591 return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
1592}
1593
1594
1595//-----------------------------------------------------------------------------
1596
1597template<class EventHandler>
1598substr ParseEngine<EventHandler>::_peek_next_line(size_t pos) const
1599{
1600 substr rem{}; // declare here because of the goto
1601 size_t nlpos{}; // declare here because of the goto
1602 pos = pos == npos ? m_evt_handler->m_curr->pos.offset : pos;
1603 if(pos >= _buf().len)
1604 goto next_is_empty;
1605
1606 // look for the next newline chars, and jump to the right of those
1607 rem = _from_next_line(_buf().sub(pos));
1608 if(rem.empty())
1609 goto next_is_empty;
1610
1611 // now get everything up to and including the following newline chars
1612 nlpos = rem.first_of("\r\n");
1613 if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len))
1614 nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
1615 rem = rem.left_of(nlpos, /*include_pos*/true);
1616
1617 _c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n"));
1618 return rem;
1619
1620next_is_empty:
1621 _c4dbgpf("peek next line @ {}: (len=0)''", pos);
1622 return rem;
1623}
1624
1625//-----------------------------------------------------------------------------
1626
1627template<class EventHandler>
1628void ParseEngine<EventHandler>::_scan_line()
1629{
1630 if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < _buf().len))
1631 m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf(), m_evt_handler->m_curr->pos.offset);
1632 else
1633 m_evt_handler->m_curr->line_contents.reset_with_next_line(_buf().last(0), 0);
1634}
1635
1636template<class EventHandler>
1637void ParseEngine<EventHandler>::_line_progressed(size_t ahead)
1638{
1639 _c4dbgpf("line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}",
1640 m_evt_handler->m_curr->pos.line,
1641 m_evt_handler->m_curr->line_contents.full.len,
1642 ahead, m_evt_handler->m_curr->pos.col,
1643 m_evt_handler->m_curr->pos.col+ahead,
1644 m_evt_handler->m_curr->pos.offset,
1645 m_evt_handler->m_curr->pos.offset+ahead);
1646 m_evt_handler->m_curr->pos.offset += ahead;
1647 m_evt_handler->m_curr->pos.col += ahead;
1648 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.num_cols+1, m_evt_handler->m_curr->pos);
1649 m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
1650}
1651
1652template<class EventHandler>
1653void ParseEngine<EventHandler>::_line_ended()
1654{
1655 _c4dbgpf("line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
1656 m_evt_handler->m_curr->pos.line,
1657 m_evt_handler->m_curr->line_contents.full.len,
1658 m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols,
1659 m_evt_handler->m_curr->pos.col, 1);
1660 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.num_cols + 1, m_evt_handler->m_curr->pos);
1661 m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1662 ++m_evt_handler->m_curr->pos.line;
1663 m_evt_handler->m_curr->pos.col = 1;
1664}
1665
1666template<class EventHandler>
1667void ParseEngine<EventHandler>::_line_ended_undo()
1668{
1669 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u, m_evt_handler->m_curr->pos);
1670 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u, m_evt_handler->m_curr->pos);
1671 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols, m_evt_handler->m_curr->pos);
1672 const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.num_cols;
1673 _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
1674 m_evt_handler->m_curr->pos.offset -= delta;
1675 --m_evt_handler->m_curr->pos.line;
1676 m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.num_cols + 1u;
1677 // don't forget to undo also the changes to the remainder of the line
1678 //_RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= _buf().len || _buf()[m_evt_handler->m_curr->pos.offset] == '\n' || _buf()[m_evt_handler->m_curr->pos.offset] == '\r', m_evt_handler->m_curr->pos);
1679 m_evt_handler->m_curr->line_contents.rem = _buf().sub(m_evt_handler->m_curr->pos.offset, 0);
1680}
1681
1682
1683//-----------------------------------------------------------------------------
1684template<class EventHandler>
1685void ParseEngine<EventHandler>::_set_indentation(size_t indentation) noexcept
1686{
1687 m_evt_handler->m_curr->indref = indentation;
1688 _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1689}
1690
1691template<class EventHandler>
1692void ParseEngine<EventHandler>::_save_indentation()
1693{
1694 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full), m_evt_handler->m_curr->pos);
1695 m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
1696 _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
1697}
1698
1699template<class EventHandler>
1700void ParseEngine<EventHandler>::_mark_seqflow_val_end() noexcept
1701{
1702 _c4dbgpf("SEQFLOW. mark val end at line={}", m_evt_handler->m_curr->pos.line);
1703 m_prev_val_end = m_evt_handler->m_curr->pos.line;
1704}
1705
1706
1707//-----------------------------------------------------------------------------
1708
1709template<class EventHandler>
1710void ParseEngine<EventHandler>::_flow_container_was_a_key(size_t orig_indent)
1711{
1712 _c4dbgpf("flow container is followed by colon! orig_indent={}", orig_indent);
1713 m_evt_handler->actually_val_is_first_key_of_new_map_block();
1714 addrem_flags(RMAP|RVAL|RBLCK, RKCL|RSEQ|RUNK);
1715 _set_indentation(orig_indent);
1716 _maybe_skip_whitespace_tokens();
1717}
1718
1719template<class EventHandler>
1720void ParseEngine<EventHandler>::_end_flow_container(size_t orig_indent, bool multiline)
1721{
1722 // this is called AFTER ending the flow container,
1723 // so now we're at the parent container's scope
1724 if(has_all(RMAP|RBLCK) && has_none(RKCL|RVAL|RNXT))
1725 {
1726 _c4dbgp("flow container: end as vanilla block map key!");
1727 if(C4_UNLIKELY(multiline))
1728 _c4err("multiline key is invalid");
1729 if(C4_UNLIKELY(!_maybe_scan_following_colon()))
1730 _c4err("could not find ':' colon after key");
1731 _maybe_skip_whitespace_tokens();
1732 addrem_flags(RVAL, RKEY|RKCL|RNXT);
1733 }
1734 else if(has_none(RFLOW))
1735 {
1736 _c4dbgp("end_flow_container: now not in flow!");
1737 if(has_any(RUNK|RSEQ|RKCL) && _maybe_scan_following_colon())
1738 {
1739 if(C4_UNLIKELY(multiline))
1740 _c4err("multiline key is invalid");
1741 _flow_container_was_a_key(orig_indent);
1742 }
1743 else
1744 {
1745 _c4dbgp("end_flow_container: end map as key!");
1746 }
1747 }
1748 else if(has_any(RSEQ))
1749 {
1750 _c4dbgp("end_flow_container: now in a flow seq");
1751 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RFLOW), m_evt_handler->m_curr->pos);
1752 _mark_seqflow_val_end();
1753 }
1754}
1755
1756template<class EventHandler>
1757void ParseEngine<EventHandler>::_end_map_flow()
1758{
1759 bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1760 size_t orig_indent = m_evt_handler->m_curr->indref;
1761 _c4dbgpf("mapflow: end, multiline={}", multiline);
1762 m_evt_handler->end_map_flow(multiline && m_options.detect_flow_ml(), m_options.flow_ml_style().type);
1763 _end_flow_container(orig_indent, multiline);
1764}
1765
1766template<class EventHandler>
1767void ParseEngine<EventHandler>::_end_seq_flow()
1768{
1769 bool multiline = m_evt_handler->m_parent->pos.line < m_evt_handler->m_curr->pos.line;
1770 size_t orig_indent = m_evt_handler->m_curr->indref;
1771 _c4dbgpf("seqflow: end, multiline={}", multiline);
1772 m_evt_handler->end_seq_flow(multiline && m_options.detect_flow_ml(), m_options.flow_ml_style().type);
1773 _end_flow_container(orig_indent, multiline);
1774}
1775
1776template<class EventHandler>
1777void ParseEngine<EventHandler>::_end_map_blck()
1778{
1779 _c4dbgp("mapblck: end");
1780 if(has_any(RKCL|RVAL))
1781 {
1782 _c4dbgp("mapblck: set missing val");
1783 _handle_annotations_before_blck_val_scalar();
1784 m_evt_handler->set_val_scalar_plain_empty();
1785 }
1786 else if(has_any(QMRK))
1787 {
1788 _c4dbgp("mapblck: set missing keyval");
1789 _handle_annotations_before_blck_key_scalar();
1790 m_evt_handler->set_key_scalar_plain_empty();
1791 _handle_annotations_before_blck_val_scalar();
1792 m_evt_handler->set_val_scalar_plain_empty();
1793 }
1794 m_evt_handler->end_map_block();
1795}
1796
1797template<class EventHandler>
1798void ParseEngine<EventHandler>::_end_seq_blck()
1799{
1800 if(has_any(RVAL))
1801 {
1802 _c4dbgp("seqblck: set missing val");
1803 _handle_annotations_before_blck_val_scalar();
1804 m_evt_handler->set_val_scalar_plain_empty();
1805 }
1806 m_evt_handler->end_seq_block();
1807}
1808
1809template<class EventHandler>
1810void ParseEngine<EventHandler>::_end2_map()
1811{
1812 _c4dbgp("map: end");
1813 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RMAP), m_evt_handler->m_curr->pos);
1814 if(has_any(RBLCK))
1815 {
1816 _end_map_blck();
1817 }
1818 else
1819 {
1820 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1821 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(USTY), m_evt_handler->m_curr->pos);
1822 m_evt_handler->_pop();
1823 }
1824}
1825
1826template<class EventHandler>
1827void ParseEngine<EventHandler>::_end2_seq()
1828{
1829 _c4dbgp("seq: end");
1830 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ), m_evt_handler->m_curr->pos);
1831 if(has_any(RBLCK))
1832 {
1833 _end_seq_blck();
1834 }
1835 else
1836 {
1837 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RFLOW), m_evt_handler->m_curr->pos);
1838 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(USTY), m_evt_handler->m_curr->pos);
1839 m_evt_handler->_pop();
1840 }
1841}
1842
1843template<class EventHandler>
1844void ParseEngine<EventHandler>::_begin2_doc()
1845{
1846 _c4dbgp("begin_doc");
1847 m_has_directives_yaml = false;
1848 m_has_directives = false;
1849 m_doc_empty = true;
1850 add_flags(RDOC);
1851 m_evt_handler->begin_doc();
1852 m_evt_handler->m_curr->indref = 0; // ?
1853}
1854
1855template<class EventHandler>
1856void ParseEngine<EventHandler>::_begin2_doc_expl()
1857{
1858 _c4dbgp("begin_doc_expl");
1859 m_has_directives_yaml = false;
1860 m_has_directives = false;
1861 m_doc_empty = true;
1862 add_flags(RDOC);
1863 m_evt_handler->begin_doc_expl();
1864 m_evt_handler->m_curr->indref = 0; // ?
1865}
1866
1867template<class EventHandler>
1868void ParseEngine<EventHandler>::_end2_doc()
1869{
1870 _c4dbgp("doc: end");
1871 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RDOC), m_evt_handler->m_curr->pos);
1872 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1873 {
1874 _c4dbgp("doc was empty; add empty val");
1875 _handle_annotations_before_blck_val_scalar();
1876 m_evt_handler->set_val_scalar_plain_empty();
1877 }
1878 m_evt_handler->end_doc();
1879 m_bom_len = 0;
1880}
1881
1882template<class EventHandler>
1883void ParseEngine<EventHandler>::_end2_doc_expl()
1884{
1885 _c4dbgp("doc: end");
1886 if(m_doc_empty || (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1887 {
1888 _c4dbgp("doc: no children; add empty val");
1889 _handle_annotations_before_blck_val_scalar();
1890 m_evt_handler->set_val_scalar_plain_empty();
1891 }
1892 m_evt_handler->end_doc_expl();
1893 m_bom_len = 0;
1894}
1895
1896template<class EventHandler>
1897void ParseEngine<EventHandler>::_maybe_begin_doc()
1898{
1899 if(has_none(RDOC))
1900 {
1901 _c4dbgp("doc must be started");
1902 _begin2_doc();
1903 }
1904}
1905template<class EventHandler>
1906void ParseEngine<EventHandler>::_maybe_end_doc()
1907{
1908 if(has_any(RDOC))
1909 {
1910 _c4dbgp("doc must be finished");
1911 _end2_doc();
1912 }
1913 else if(m_doc_empty && (m_pending_tags.num_entries || m_pending_anchors.num_entries))
1914 {
1915 _c4dbgp("no doc to finish, but pending annotations");
1916 m_evt_handler->begin_doc();
1917 _handle_annotations_before_blck_val_scalar();
1918 m_evt_handler->set_val_scalar_plain_empty();
1919 m_evt_handler->end_doc();
1920 }
1921}
1922
1923template<class EventHandler>
1924void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
1925{
1926 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1, m_evt_handler->m_curr->pos);
1927 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack[0].flags & RDOC, m_evt_handler->m_curr->pos);
1928 _c4dbgp("root is RDOC");
1929 if(m_evt_handler->m_curr->level != 0)
1930 _handle_indentation_pop(&m_evt_handler->m_stack[0]);
1931 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RDOC), m_evt_handler->m_curr->pos);
1932}
1933
1934/** Check whether the current parse tokens are trailing on the
1935 * previous doc, and raise an error if they are */
1936template<class EventHandler>
1937void ParseEngine<EventHandler>::_check_trailing_doc_token()
1938{
1939 const bool is_root = (m_evt_handler->m_stack.size() == 1u);
1940 const bool isndoc = (m_evt_handler->m_curr->flags & NDOC) != 0;
1941 const bool suspicious = m_evt_handler->template _has_any__<MAP|SEQ|VAL>();
1942 _c4dbgpf("target={} isroot={} suspicious={} ndoc={}", m_evt_handler->m_curr->node_id, is_root, suspicious, isndoc);
1943 if((is_root || m_evt_handler->template _has_any__<DOC>()) && suspicious && !isndoc)
1944 _c4err("parse error");
1945}
1946
1947template<class EventHandler>
1948void ParseEngine<EventHandler>::_end_doc_suddenly()
1949{
1950 _c4dbgp("end doc suddenly");
1951 _end_doc_suddenly__pop();
1952 _end2_doc_expl();
1953 addrem_flags(RUNK|RTOP|NDOC, RMAP|RSEQ|RDOC);
1954}
1955
1956template<class EventHandler>
1957void ParseEngine<EventHandler>::_check_doc_end_tokens() const
1958{
1959 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
1960 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !rem.begins_with_any(". \t"), m_evt_handler->m_curr->pos);
1961 if(C4_UNLIKELY(rem.len && !rem.begins_with('#')))
1962 {
1963 _c4err("parse error");
1964 }
1965}
1966
1967template<class EventHandler>
1968void ParseEngine<EventHandler>::_start_doc_suddenly()
1969{
1970 _c4dbgp("start doc suddenly");
1971 _end_doc_suddenly__pop();
1972 _end2_doc();
1973 _begin2_doc_expl();
1974}
1975
1976template<class EventHandler>
1977void ParseEngine<EventHandler>::_end_stream()
1978{
1979 _c4dbgpf("end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
1980 if(C4_UNLIKELY(has_all(RSEQ|RFLOW)))
1981 _c4err("missing terminating ]");
1982 else if(C4_UNLIKELY(has_all(RMAP|RFLOW)))
1983 _c4err("missing terminating }");
1984 if(m_evt_handler->m_stack.size() > 1)
1985 _handle_indentation_pop(m_evt_handler->m_stack.begin());
1986 if(has_all(RDOC))
1987 {
1988 _end2_doc();
1989 }
1990 else if(has_all(RTOP|RUNK))
1991 {
1992 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
1993 {
1994 if(m_doc_empty)
1995 {
1996 m_evt_handler->begin_doc();
1997 _handle_annotations_before_blck_val_scalar();
1998 m_evt_handler->set_val_scalar_plain_empty();
1999 m_evt_handler->end_doc();
2000 }
2001 }
2002 }
2003 m_evt_handler->end_stream();
2004 if(C4_UNLIKELY(m_has_directives))
2005 _c4err("directives cannot be used without a document");
2006}
2007
2008
2009template<class EventHandler>
2010void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState const* popto)
2011{
2012 _c4dbgpf("popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ? "s" : ""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
2013 while(m_evt_handler->m_curr != popto)
2014 {
2015 if(has_any(RSEQ))
2016 {
2017 _c4dbgpf("popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2018 _end2_seq();
2019 }
2020 else if(has_any(RMAP))
2021 {
2022 _c4dbgpf("popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
2023 _end2_map();
2024 }
2025 else
2026 {
2027 break;
2028 }
2029 }
2030 _c4dbgpf("current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
2031}
2032
2033template<class EventHandler>
2034void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
2035{
2036 // search the stack frame to jump to based on its indentation
2037 using state_type = typename EventHandler::state;
2038 state_type const* popto = nullptr;
2039 auto &stack = m_evt_handler->m_stack;
2040 _RYML_ASSERT_PARSE_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos); // this search relies on the stack being contiguous
2041 _RYML_ASSERT_PARSE_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2042 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2043 #ifdef RYML_DBG
2044 _print_state_stack();
2045 #endif
2046 for(state_type const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
2047 {
2048 _c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
2049 if(s->indref == ind)
2050 {
2051 _c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id);
2052 popto = s;
2053 break;
2054 }
2055 }
2056 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2057 {
2058 _c4err("parse error: incorrect indentation?");
2059 }
2060 _handle_indentation_pop(popto);
2061}
2062
2063template<class EventHandler>
2064void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
2065{
2066 // search the stack frame to jump to based on its indentation
2067 using state_type = typename EventHandler::state;
2068 auto &stack = m_evt_handler->m_stack;
2069 _RYML_ASSERT_PARSE_(stack.m_callbacks, stack.is_contiguous(), m_evt_handler->m_curr->pos); // this search relies on the stack being contiguous
2070 _RYML_ASSERT_PARSE_(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end(), m_evt_handler->m_curr->pos);
2071 const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
2072 state_type const* popto = nullptr;
2073 #ifdef RYML_DBG
2074 char flagbuf_[128];
2075 _print_state_stack(flagbuf_);
2076 #endif
2077 for(state_type const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s) // never go to the stack bottom. that's the root
2078 {
2079 _c4dbgpf("searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
2080 if(s->indref < ind)
2081 {
2082 break;
2083 }
2084 else if(s->indref == ind)
2085 {
2086 _c4dbgpf("same indentation!!! level={} node={}", s->level, s->node_id);
2087 if(popto && has_any(RTOP, s) && has_none(RMAP|RSEQ, s))
2088 {
2089 break;
2090 }
2091 popto = s;
2092 if(has_all(RSEQ|RBLCK, s))
2093 {
2094 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2095 const size_t first = rem.first_not_of(' ');
2096 _RYML_ASSERT_PARSE_(stack.m_callbacks, first == ind || first == npos, m_evt_handler->m_curr->pos);
2097 rem = rem.right_of(first, true);
2098 _c4dbgpf("indentless? rem='{}' first={}", rem, first);
2099 if(rem.begins_with('-') && _is_blck_token(rem))
2100 {
2101 _c4dbgp("parent was indentless seq");
2102 break;
2103 }
2104 }
2105 }
2106 }
2107 if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
2108 {
2109 _c4err("parse error: incorrect indentation?");
2110 }
2111 _handle_indentation_pop(popto);
2112}
2113
2114
2115//-----------------------------------------------------------------------------
2116template<class EventHandler>
2117void ParseEngine<EventHandler>::_check_valid_newline_in_quoted_scalar()
2118{
2119 if(C4_UNLIKELY(has_all(RMAP|RBLCK|RKEY)))
2120 {
2121 _c4err("multiline quoted keys are invalid");
2122 }
2123 else // check contextual indentation
2124 {
2125 const size_t minindent = m_evt_handler->m_curr->indref + ((has_any(RMAP|RSEQ) && has_any(RBLCK)));
2126 _c4dbgpf("indent={} vs minindent={} indref={}", m_evt_handler->m_curr->line_contents.indentation, minindent, m_evt_handler->m_curr->indref);
2127 if(m_evt_handler->m_curr->line_contents.indentation < minindent)
2128 {
2129 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks,
2130 m_evt_handler->m_curr->line_contents.indentation == m_evt_handler->m_curr->line_contents.rem.first_not_of(' '),
2131 m_evt_handler->m_curr->pos);
2132 csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation);
2133 _c4dbgpf("trimmed.len={} line={}", trimmed.len, _prs(m_evt_handler->m_curr->line_contents.rem, true));
2134 if(C4_UNLIKELY(!!trimmed.len))
2135 {
2136 _c4err("bad indentation");
2137 }
2138 }
2139 }
2140}
2141
2142
2143//-----------------------------------------------------------------------------
2144template<class EventHandler>
2145typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
2146{
2147 // quoted scalars can spread over multiple lines!
2148 // nice explanation here: http://yaml-multiline.info/
2149
2150 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with('\''), m_evt_handler->m_curr->pos);
2151
2152 // a span to the end of the file, skipping the opening quote
2153 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset + 1);
2154 _line_progressed(1); // advance over the opening quote
2155 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
2156
2157 bool needs_filter = false;
2158 size_t pos = npos; // find the pos of the matching quote
2159 while( ! _finished_file())
2160 {
2161 const csubstr line = m_evt_handler->m_curr->line_contents.rem;
2162 _c4dbgpf("scanning single quoted scalar @ line[{}]: {}", m_evt_handler->m_curr->pos.line, _prs(line));
2163 if(C4_UNLIKELY(m_evt_handler->m_curr->at_line_beginning() && _is_doc_token(line)))
2164 _c4err("token can not appear at line begin");
2165 for(size_t i = 0; i < line.len; ++i)
2166 {
2167 const char curr = line.str[i];
2168 if(curr == '\'') // single quotes are escaped with two single quotes
2169 {
2170 const char next = i+1 < line.len ? line.str[i+1] : '~';
2171 if(next != '\'') // so just look for the first quote
2172 { // without another after it
2173 _line_progressed(i + 1); // progress beyond the quote
2174 pos = i + (size_t)(line.str - s.str); // set pos to before the quote
2175 goto found_close;
2176 }
2177 else
2178 {
2179 needs_filter = true; // needs filter to remove escaped quotes
2180 ++i; // skip the escaped quote
2181 }
2182 }
2183 }
2184
2185 needs_filter = true;
2186 _line_progressed(line.len);
2187 _line_ended();
2188 _scan_line();
2189 _check_valid_newline_in_quoted_scalar();
2190 }
2191
2192 _c4err("reached end of file while looking for closing quote");
2193
2194found_close:
2195
2196 _c4dbgpf("found closing quote at: {}", pos);
2197 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos != npos, m_evt_handler->m_curr->pos);
2198 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2199 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2200 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() == '\'', m_evt_handler->m_curr->pos);
2201 _set_first_strict(s, pos);
2202
2203 _c4prscalar("scanned squoted scalar", s, /*keep_newlines*/true);
2204
2205 return ScannedScalar { s, needs_filter };
2206}
2207
2208
2209//-----------------------------------------------------------------------------
2210template<class EventHandler>
2211typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
2212{
2213 // quoted scalars can spread over multiple lines!
2214 // nice explanation here: http://yaml-multiline.info/
2215
2216 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, _buf().sub(m_evt_handler->m_curr->pos.offset).begins_with('"'), m_evt_handler->m_curr->pos);
2217
2218 // a span to the end of the file, skipping the opening quote
2219 substr s = _buf().sub(m_evt_handler->m_curr->pos.offset + 1);
2220 _line_progressed(1); // advance over the opening quote
2221 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, !m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
2222
2223 bool needs_filter = false;
2224 size_t pos = npos; // find the pos of the matching quote
2225 while( ! _finished_file())
2226 {
2227 #if defined(__GNUC__) && (/*__GNUC__ == 12 || */__GNUC__ == 13)
2228 C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem); // prevent hoisting
2229 #endif
2230 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
2231 _c4dbgpf("scanning double quoted scalar @ line[{}]: line='{}'", m_evt_handler->m_curr->pos.line, rem);
2232 if(C4_UNLIKELY(m_evt_handler->m_curr->at_line_beginning() && _is_doc_token(rem)))
2233 _c4err("token can not appear at line begin");
2234 for(size_t i = 0; i < rem.len; ++i)
2235 {
2236 const char curr = rem.str[i];
2237 // every \ is an escape
2238 if(curr == '\\')
2239 {
2240 const char next = i+1 < rem.len ? rem.str[i+1] : '~';
2241 needs_filter = true;
2242 if(next == '"' || next == '\\')
2243 ++i;
2244 }
2245 else if(curr == '"')
2246 {
2247 _line_progressed(i + 1); // progress beyond the quote
2248 pos = i + (size_t)(rem.str - s.str); // set pos to before the quote
2249 goto found_close;
2250 }
2251 }
2252
2253 // leading whitespace also needs filtering
2254 needs_filter = true;
2255 _line_progressed(rem.len);
2256 _line_ended();
2257 _scan_line();
2258 _check_valid_newline_in_quoted_scalar();
2259 }
2260
2261 _c4err("reached end of file while looking for closing quote");
2262
2263found_close:
2264
2265 _c4dbgpf("found closing quote at: {}", pos);
2266 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos != npos, m_evt_handler->m_curr->pos);
2267 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, pos >= 0, m_evt_handler->m_curr->pos);
2268 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() >= _buf().begin() && s.end() <= _buf().end(), m_evt_handler->m_curr->pos);
2269 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.end() == _buf().end() || *s.end() == '"', m_evt_handler->m_curr->pos);
2270 _set_first_strict(s, pos);
2271
2272 _c4prscalar("scanned dquoted scalar", s, /*keep_newlines*/true);
2273
2274 return ScannedScalar{s, needs_filter};
2275}
2276
2277
2278//-----------------------------------------------------------------------------
2279template<class EventHandler>
2280void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t indref)
2281{
2282 _c4dbgpf("blck: indref={}", indref);
2283 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, indref != npos, m_evt_handler->m_curr->pos);
2284
2285 // nice explanation here: http://yaml-multiline.info/
2286 csubstr s = m_evt_handler->m_curr->line_contents.rem;
2287 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>'), m_evt_handler->m_curr->pos);
2288
2289 _c4dbgpf("blck: specs={}", _prs(s));
2290
2291 // parse the spec
2292 BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used
2293 size_t indentation = npos; // have to find out if no spec is given
2294 if(s.len > 1)
2295 {
2296 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.begins_with_any("|>"), m_evt_handler->m_curr->pos);
2297 csubstr t = s.sub(1);
2298 _c4dbgpf("blck: spec is multichar: {}", _prs(t));
2299 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, t.len >= 1, m_evt_handler->m_curr->pos);
2300 size_t pos = t.first_of("-+");
2301 _c4dbgpf("blck: spec chomp char: pos={}", pos);
2302 if(pos != npos)
2303 {
2304 _c4dbgpf("blck: spec chomp char: {}", _c4prc(t[pos]));
2305 if(t[pos] == '-')
2306 {
2307 _c4dbgp("blck: chomp=STRIP");
2308 chomp = CHOMP_STRIP;
2309 }
2310 else if(t[pos] == '+')
2311 {
2312 _c4dbgp("blck: chomp=KEEP");
2313 chomp = CHOMP_KEEP;
2314 }
2315 if(pos == 0)
2316 t = t.sub(1);
2317 else
2318 t = t.first(pos);
2319 _c4dbgpf("blck: spec is now: {}", _prs(t));
2320 }
2321 // from here to the end, only digits are considered
2322 pos = t.first_not_of("0123456789");
2323 csubstr rest = t.first(pos);
2324 if( ! rest.empty())
2325 {
2326 _c4dbgpf("blck: parse indentation digits: {}", _prs(rest));
2327 if(C4_UNLIKELY(rest.len > 1))
2328 _c4err("parse error: invalid indentation");
2329 if(C4_UNLIKELY( ! c4::atou(rest, &indentation)))
2330 _c4err("parse error: could not read indentation as decimal"); // LCOV_EXCL_LINE
2331 if(C4_UNLIKELY( ! indentation))
2332 _c4err("parse error: null indentation");
2333 _c4dbgpf("blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
2334 indentation += m_evt_handler->m_curr->indref;
2335 }
2336 else
2337 {
2338 rest = t.triml(" \t");
2339 _c4dbgpf("blck: digits empty. t={} trimmed={} iscomm={} t.iscomm={}", _prs(t), _prs(rest), rest.begins_with('#'), t.begins_with('#'));
2340 if(C4_UNLIKELY(rest.len && (rest.str[0] != '#' || t.str[0] == '#')))
2341 _c4err("parse error: invalid token");
2342 }
2343 }
2344
2345 _c4dbgpf("blck: style={} chomp={} indentation={}", s.begins_with('>') ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation);
2346
2347 // finish the current line
2348 _line_progressed(s.len);
2349 _line_ended();
2350 _scan_line();
2351
2352 // start with a zero-length block, already pointing at the right place
2353 substr raw_block(_buf().data() + m_evt_handler->m_curr->pos.offset, size_t(0));
2354 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.str, m_evt_handler->m_curr->pos);
2355
2356 // read every full line into a raw block,
2357 // from which newlines are to be stripped as needed.
2358 //
2359 // If no explicit indentation was given, pick it from the first
2360 // non-empty line. See
2361 // https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator
2362 size_t num_lines = 0;
2363 size_t first = m_evt_handler->m_curr->pos.line;
2364 size_t provisional_indentation = npos;
2365 LineContents lc;
2366 while(( ! _finished_file()))
2367 {
2368 // peek next line, but do not advance immediately
2369 lc.reset_with_next_line(_buf(), m_evt_handler->m_curr->pos.offset);
2370 #if defined(__GNUC__) && (__GNUC__ == 12 || __GNUC__ == 13)
2371 C4_DONT_OPTIMIZE(lc.rem);
2372 #endif
2373 _c4dbgpf("blck: peeking at {}", _prs(lc.rem.trimr("\r\n"), true));
2374 // evaluate termination conditions
2375 if(indentation != npos)
2376 {
2377 _c4dbgpf("blck: indentation={}", indentation);
2378 // stop when the line is deindented and not empty
2379 if(lc.indentation < indentation && ( ! lc.rem.trim(" \t").empty()))
2380 {
2381 if(raw_block.len)
2382 {
2383 _c4dbgpf("blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
2384 }
2385 else
2386 {
2387 _c4err("indentation decreased without any scalar");
2388 }
2389 break;
2390 }
2391 else if(indentation == 0)
2392 {
2393 _c4dbgpf("blck: noindent. lc.rem={}", _prs(lc.rem));
2394 if(_is_doc_token(lc.rem))
2395 {
2396 _c4dbgp("blck: stop. indentation=0 and doc ended");
2397 break;
2398 }
2399 }
2400 }
2401 else
2402 {
2403 const size_t fns = lc.rem.first_not_of(' ');
2404 _c4dbgpf("blck: indentation ref not set. firstnonws={}", fns);
2405 if(fns != npos) // non-empty line
2406 {
2407 _c4dbgpf("blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
2408 if(C4_UNLIKELY(lc.full.begins_with('\t')))
2409 _c4err("parse error");
2410 if(provisional_indentation == npos)
2411 {
2412 if(lc.indentation < indref)
2413 {
2414 _c4dbgpf("blck: block terminated indentation={} < indref={}", lc.indentation, indref);
2415 if(raw_block.len == 0)
2416 {
2417 _c4dbgp("blck: was empty, undo next line");
2418 _line_ended_undo();
2419 }
2420 break;
2421 }
2422 else if(lc.indentation == m_evt_handler->m_curr->indref)
2423 {
2424 if(has_any(RSEQ|RMAP))
2425 {
2426 _c4dbgpf("blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
2427 break;
2428 }
2429 }
2430 _c4dbgpf("blck: set indentation ref from this line: ref={}", lc.indentation);
2431 indentation = lc.indentation;
2432 }
2433 else
2434 {
2435 if(lc.indentation >= provisional_indentation)
2436 {
2437 _c4dbgpf("blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
2438 //indentation = provisional_indentation ? provisional_indentation : lc.indentation;
2439 indentation = lc.indentation;
2440 }
2441 else
2442 {
2443 if(lc.indentation >= indref)
2444 _c4err("parse error: first non-empty block line should have at least the original indentation");
2445 _c4dbgp("blck: finished");
2446 break;
2447 }
2448 }
2449 }
2450 else // empty line
2451 {
2452 _c4dbgpf("blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.rem.len, lc.indentation, provisional_indentation);
2453 if(provisional_indentation != npos)
2454 {
2455 if(lc.rem.len >= provisional_indentation)
2456 {
2457 _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.rem.len);
2458 provisional_indentation = lc.rem.len;
2459 }
2460 }
2461 else
2462 {
2463 provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL);
2464 _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2465 if(provisional_indentation == npos)
2466 {
2467 provisional_indentation = lc.rem.len ? lc.rem.len : has_any(RSEQ|RVAL);
2468 _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2469 }
2470 if(provisional_indentation < indref)
2471 {
2472 provisional_indentation = indref;
2473 _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
2474 }
2475 }
2476 }
2477 }
2478 // advance now that we know the folded scalar continues
2479 m_evt_handler->m_curr->line_contents = lc;
2480 _c4dbgpf("blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
2481 raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
2482 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
2483 _line_ended();
2484 ++num_lines;
2485 }
2486 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0), m_evt_handler->m_curr->pos);
2487 C4_UNUSED(num_lines);
2488 C4_UNUSED(first);
2489
2490 if(indentation == npos)
2491 {
2492 _c4dbgpf("blck: set indentation from provisional: {}", provisional_indentation);
2493 indentation = provisional_indentation;
2494 }
2495
2496 if(num_lines)
2497 _line_ended_undo();
2498
2499 _c4prscalar("scanned block", raw_block, /*keep_newlines*/true);
2500
2501 sb->scalar = raw_block;
2502 sb->indentation = indentation;
2503 sb->chomp = chomp;
2504}
2505
2506
2507//-----------------------------------------------------------------------------
2508//-----------------------------------------------------------------------------
2509//-----------------------------------------------------------------------------
2510/** @cond dev */
2511
2512// a debugging scaffold:
2513#if 0
2514#define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2515#else
2516#define _c4dbgfws(...)
2517#endif
2518
2519template<class EventHandler>
2520template<class FilterProcessor>
2522{
2523 _c4dbgfws("found whitespace '{}'", _c4prc(proc.curr()));
2524 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == ' ' || proc.curr() == '\t', m_evt_handler->m_curr->pos);
2525
2526 const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(" \t", proc.rpos) : proc.src.first_not_of(' ', proc.rpos);
2527 if(first_pos != npos)
2528 {
2529 const char first_char = proc.src[first_pos];
2530 _c4dbgfws("firstnonws='{}'@{}", _c4prc(first_char), first_pos);
2531 if(first_char == '\n' || first_char == '\r') // skip trailing whitespace
2532 {
2533 _c4dbgfws("whitespace is trailing on line", "");
2534 proc.skip(first_pos - proc.rpos);
2535 }
2536 else // a legit whitespace
2537 {
2538 proc.copy();
2539 _c4dbgfws("legit whitespace. sofar={}", _prs(proc.sofar()));
2540 }
2541 return true;
2542 }
2543 _c4dbgfws("whitespace is trailing on line", "");
2544 return false;
2545}
2546
2547template<class EventHandler>
2548template<class FilterProcessor>
2550{
2551 if(!_filter_ws_handle_to_first_non_space(proc))
2552 {
2553 _c4dbgfws("... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
2554 proc.copy(proc.src.len - proc.rpos);
2555 }
2556}
2557
2558template<class EventHandler>
2559template<class FilterProcessor>
2561{
2562 if(!_filter_ws_handle_to_first_non_space(proc))
2563 {
2564 _c4dbgfws("... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
2565 proc.skip(proc.src.len - proc.rpos);
2566 }
2567}
2568
2569#undef _c4dbgfws
2570
2571
2572//-----------------------------------------------------------------------------
2573//-----------------------------------------------------------------------------
2574//-----------------------------------------------------------------------------
2575/* plain scalars */
2576
2577// a debugging scaffold:
2578#if 0
2579#define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2580#else
2581#define _c4dbgfps(fmt, ...)
2582#endif
2583
2584template<class EventHandler>
2585template<class FilterProcessor>
2586void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation)
2587{
2588 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
2589
2590 _c4dbgfps("found newline. sofar={}", _prs(proc.sofar()));
2591 size_t ii = proc.rpos;
2592 const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
2593 if(numnl_following)
2594 {
2595 proc.set('\n', numnl_following);
2596 _c4dbgfps("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2597 }
2598 else
2599 {
2600 const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2601 if(ret != npos)
2602 {
2603 proc.set(' ');
2604 _c4dbgfps("single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2605 }
2606 else
2607 {
2608 _c4dbgfps("last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
2609 ii = proc.src.len;
2610 }
2611 }
2612 proc.rpos = ii;
2613}
2614
2615template<class EventHandler>
2616template<class FilterProcessor>
2617auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation) -> decltype(proc.result())
2618{
2619 _RYML_ASSERT_PARSE_(this->callbacks(), indentation != npos, m_evt_handler->m_curr->pos);
2620 _c4dbgfps("before={}", _prs(proc.src));
2621
2622 while(proc.has_more_chars())
2623 {
2624 const char curr = proc.curr();
2625 _c4dbgfps("'{}', sofar={}", _c4prc(curr), _prs(proc.sofar()));
2626 switch(curr)
2627 {
2628 case ' ':
2629 _RYML_WITH_TAB_TOKENS(case '\t':)
2630 _c4dbgfps("whitespace", curr);
2631 _filter_ws_skip_trailing(proc);
2632 break;
2633 case '\n':
2634 _c4dbgfps("newline", curr);
2635 _filter_nl_plain(proc, /*indentation*/indentation);
2636 break;
2637 case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2638 _c4dbgfps("carriage return, ignore", curr);
2639 proc.skip();
2640 break;
2641 default:
2642 proc.copy();
2643 break;
2644 }
2645 }
2646
2647 _c4dbgfps("after={}", _prs(proc.sofar()));
2648
2649 return proc.result();
2650}
2651
2652#undef _c4dbgfps
2653
2654
2655template<class EventHandler>
2657{
2658 FilterProcessorSrcDst proc(scalar, dst);
2659 return _filter_plain(proc, indentation);
2660}
2661
2662template<class EventHandler>
2664{
2666 return _filter_plain(proc, indentation);
2667}
2668
2669
2670//-----------------------------------------------------------------------------
2671//-----------------------------------------------------------------------------
2672//-----------------------------------------------------------------------------
2673/* single quoted */
2674
2675// a debugging scaffold:
2676#if 0
2677#define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2678#else
2679#define _c4dbgfsq(fmt, ...)
2680#endif
2681
2682template<class EventHandler>
2683template<class FilterProcessor>
2684void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
2685{
2686 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
2687
2688 _c4dbgfsq("found newline. sofar={}", _prs(proc.sofar()));
2689 size_t ii = proc.rpos;
2690 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2691 if(numnl_following)
2692 {
2693 proc.set('\n', numnl_following);
2694 _c4dbgfsq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2695 }
2696 else
2697 {
2698 const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2699 if(ret != npos)
2700 {
2701 proc.set(' ');
2702 _c4dbgfsq("single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2703 }
2704 else
2705 {
2706 proc.set(' ');
2707 _c4dbgfsq("single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2708 }
2709 }
2710 proc.rpos = ii;
2711}
2712
2713template<class EventHandler>
2714template<class FilterProcessor>
2715auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
2716{
2717 _c4dbgfsq("before={}", _prs(proc.src));
2718
2719 // from the YAML spec for double-quoted scalars:
2720 // https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted
2721 while(proc.has_more_chars())
2722 {
2723 const char curr = proc.curr();
2724 _c4dbgfsq("'{}', sofar={}", _c4prc(curr), _prs(proc.sofar()));
2725 switch(curr)
2726 {
2727 case ' ':
2728 case '\t':
2729 _c4dbgfsq("whitespace", curr);
2730 _filter_ws_copy_trailing(proc);
2731 break;
2732 case '\n':
2733 _c4dbgfsq("newline", curr);
2734 _filter_nl_squoted(proc);
2735 break;
2736 case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
2737 _c4dbgfsq("skip cr", curr);
2738 proc.skip();
2739 break;
2740 case '\'':
2741 _c4dbgfsq("squote", curr);
2742 if(proc.next() == '\'')
2743 {
2744 _c4dbgfsq("two consecutive squotes", curr);
2745 proc.skip();
2746 proc.copy();
2747 }
2748 else
2749 {
2750 _c4err("filter error");
2751 }
2752 break;
2753 default:
2754 proc.copy();
2755 break;
2756 }
2757 }
2758
2759 _c4dbgfsq(": #filteredchars={} after={}", proc.src.len-proc.sofar().len, _prs(proc.sofar()));
2760
2761 return proc.result();
2762}
2763
2764#undef _c4dbgfsq
2765
2766template<class EventHandler>
2768{
2769 FilterProcessorSrcDst proc(scalar, dst);
2770 return _filter_squoted(proc);
2771}
2772
2773template<class EventHandler>
2775{
2777 return _filter_squoted(proc);
2778}
2779
2780
2781//-----------------------------------------------------------------------------
2782//-----------------------------------------------------------------------------
2783//-----------------------------------------------------------------------------
2784/* double quoted */
2785
2786// a debugging scaffold:
2787#if 0
2788#define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
2789#else
2790#define _c4dbgfdq(...)
2791#endif
2792
2793template<class EventHandler>
2794template<class FilterProcessor>
2795void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
2796{
2797 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
2798
2799 _c4dbgfdq("found newline. sofar={}", _prs(proc.sofar()));
2800 size_t ii = proc.rpos;
2801 const size_t numnl_following = _count_following_newlines(proc.src, &ii);
2802 if(numnl_following)
2803 {
2804 proc.set('\n', numnl_following);
2805 _c4dbgfdq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
2806 }
2807 else
2808 {
2809 const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
2810 if(ret != npos)
2811 {
2812 proc.set(' ');
2813 _c4dbgfdq("single newline. convert to space. ret={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2814 }
2815 else
2816 {
2817 proc.set(' ');
2818 _c4dbgfdq("single newline. convert to space. ii={}/{}. sofar={}", ii, proc.src.len, _prs(proc.sofar()));
2819 }
2820 if(ii < proc.src.len && proc.src.str[ii] == '\\')
2821 {
2822 _c4dbgfdq("backslash at [{}]", ii);
2823 const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] : '\0';
2824 if(next == ' ' || next == '\t')
2825 {
2826 _c4dbgfdq("extend skip to backslash", "");
2827 ++ii;
2828 }
2829 }
2830 }
2831 proc.rpos = ii;
2832}
2833
2834template<class EventHandler>
2835template<class FilterProcessor>
2836void ParseEngine<EventHandler>::_filter_dquoted_backslash_decode(FilterProcessor &C4_RESTRICT proc, size_t sz)
2837{
2838 const size_t szp1 = sz + 1u;
2839 if(C4_UNLIKELY(proc.rpos + szp1 >= proc.src.len))
2840 _c4err("codepoint requires {} hex digits. scalar pos={}", sz, proc.rpos);
2841 char readbuf[8];
2842 csubstr codepoint = proc.src.sub(proc.rpos + 2u, sz);
2843 _c4dbgfdq("utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
2844 uint32_t codepoint_val = {};
2845 if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
2846 _c4err("failed to parse codepoint. scalar pos={}", proc.rpos);
2847 const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
2848 if(C4_UNLIKELY(numbytes == 0))
2849 _c4err("failed to decode code point={}", proc.rpos);
2850 _RYML_ASSERT_PARSE_(callbacks(), numbytes <= 4, m_evt_handler->m_curr->pos);
2851 proc.translate_esc_bulk(readbuf, numbytes, /*nread*/szp1);
2852 _c4dbgfdq("utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
2853}
2854
2855template<class EventHandler>
2856template<class FilterProcessor>
2857void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
2858{
2859 char next = proc.next();
2860 _c4dbgfdq("backslash, next='{}'", _c4prc(next));
2861 if(next == '\r')
2862 {
2863 if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] == '\n')
2864 {
2865 proc.skip(); // newline escaped with \ -- skip both (add only one as i is loop-incremented)
2866 next = '\n';
2867 _c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", proc.rpos);
2868 }
2869 }
2870
2871 if(next == '\n')
2872 {
2873 size_t ii = proc.rpos + 2;
2874 for( ; ii < proc.src.len; ++ii)
2875 {
2876 // skip leading whitespace
2877 if(proc.src.str[ii] == ' ' || proc.src.str[ii] == '\t')
2878 ;
2879 else
2880 break;
2881 }
2882 proc.skip(ii - proc.rpos);
2883 }
2884 else if(next == '"' || next == '/' || next == ' ' || next == '\t')
2885 {
2886 // escapes for json compatibility
2887 proc.translate_esc(next);
2888 _c4dbgfdq("here, used '{}'", _c4prc(next));
2889 }
2890 else if(next == '\r')
2891 {
2892 proc.skip();
2893 }
2894 else if(next == 'n')
2895 {
2896 proc.translate_esc('\n');
2897 }
2898 else if(next == 'r')
2899 {
2900 proc.translate_esc('\r');
2901 }
2902 else if(next == 't')
2903 {
2904 proc.translate_esc('\t');
2905 }
2906 else if(next == '\\')
2907 {
2908 proc.translate_esc('\\');
2909 }
2910 else if(next == 'x') // 2-digit Unicode escape (\xXX), code point 0x00–0xFF
2911 {
2912 _filter_dquoted_backslash_decode(proc, 2u);
2913 }
2914 else if(next == 'u') // 4-digit Unicode escape (\uXXXX), code point 0x0000–0xFFFF
2915 {
2916 _filter_dquoted_backslash_decode(proc, 4u);
2917 }
2918 else if(next == 'U') // 8-digit Unicode escape (\UXXXXXXXX), full 32-bit code point
2919 {
2920 _filter_dquoted_backslash_decode(proc, 8u);
2921 }
2922 // https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char
2923 else if(next == '0')
2924 {
2925 proc.translate_esc('\0');
2926 }
2927 else if(next == 'b') // backspace
2928 {
2929 proc.translate_esc('\b');
2930 }
2931 else if(next == 'f') // form feed
2932 {
2933 proc.translate_esc('\f');
2934 }
2935 else if(next == 'a') // bell character
2936 {
2937 proc.translate_esc('\a');
2938 }
2939 else if(next == 'v') // vertical tab
2940 {
2941 proc.translate_esc('\v');
2942 }
2943 else if(next == 'e') // escape character
2944 {
2945 proc.translate_esc('\x1b');
2946 }
2947 else if(next == '_') // unicode non breaking space \u00a0
2948 {
2949 // https://www.compart.com/en/unicode/U+00a0
2950 const char payload[] = {
2951 _RYML_CHCONST(-0x3e, 0xc2),
2952 _RYML_CHCONST(-0x60, 0xa0),
2953 };
2954 proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2955 }
2956 else if(next == 'N') // unicode next line \u0085
2957 {
2958 // https://www.compart.com/en/unicode/U+0085
2959 const char payload[] = {
2960 _RYML_CHCONST(-0x3e, 0xc2),
2961 _RYML_CHCONST(-0x7b, 0x85),
2962 };
2963 proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
2964 }
2965 else if(next == 'L') // unicode line separator \u2028
2966 {
2967 // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2968 const char payload[] = {
2969 _RYML_CHCONST(-0x1e, 0xe2),
2970 _RYML_CHCONST(-0x80, 0x80),
2971 _RYML_CHCONST(-0x58, 0xa8),
2972 };
2973 proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2974 }
2975 else if(next == 'P') // unicode paragraph separator \u2029
2976 {
2977 // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
2978 const char payload[] = {
2979 _RYML_CHCONST(-0x1e, 0xe2),
2980 _RYML_CHCONST(-0x80, 0x80),
2981 _RYML_CHCONST(-0x57, 0xa9),
2982 };
2983 proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
2984 }
2985 else if(next == '\0')
2986 {
2987 proc.skip();
2988 }
2989 else
2990 {
2991 _c4err("unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
2992 }
2993 _c4dbgfdq("backslash...sofar={}", _prs(proc.sofar()));
2994}
2995
2996
2997template<class EventHandler>
2998template<class FilterProcessor>
2999auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
3000{
3001 _c4dbgfdq("before={}", _prs(proc.src));
3002 // from the YAML spec for double-quoted scalars:
3003 // https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted
3004 while(proc.has_more_chars())
3005 {
3006 const char curr = proc.curr();
3007 _c4dbgfdq("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3008 switch(curr)
3009 {
3010 case ' ':
3011 case '\t':
3012 {
3013 _c4dbgfdq("whitespace", curr);
3014 _filter_ws_copy_trailing(proc);
3015 break;
3016 }
3017 case '\n':
3018 {
3019 _c4dbgfdq("newline", curr);
3020 _filter_nl_dquoted(proc);
3021 break;
3022 }
3023 case '\r': // skip \r --- https://stackoverflow.com/questions/1885900
3024 {
3025 _c4dbgfdq("carriage return, ignore", curr);
3026 proc.skip();
3027 break;
3028 }
3029 case '\\':
3030 {
3031 _filter_dquoted_backslash(proc);
3032 break;
3033 }
3034 default:
3035 {
3036 proc.copy();
3037 break;
3038 }
3039 }
3040 }
3041 _c4dbgfdq("after={}", _prs(proc.sofar()));
3042 return proc.result();
3043}
3044
3045#undef _c4dbgfdq
3046
3047
3048template<class EventHandler>
3050{
3051 FilterProcessorSrcDst proc(scalar, dst);
3052 return _filter_dquoted(proc);
3053}
3054
3055template<class EventHandler>
3057{
3059 return _filter_dquoted(proc);
3060}
3061
3062
3063//-----------------------------------------------------------------------------
3064//-----------------------------------------------------------------------------
3065//-----------------------------------------------------------------------------
3066// block filtering helpers
3067
3068C4_NO_INLINE inline size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept
3069{
3070 if(indentation + 1 > s.len)
3071 return npos;
3072 for(size_t i = s.len-indentation-1; i != size_t(-1); --i)
3073 {
3074 if(s.str[i] == '\n')
3075 {
3076 csubstr rem = s.sub(i + 1);
3077 size_t first = rem.first_not_of(' ');
3078 first = (first != npos) ? first : rem.len;
3079 if(first > indentation)
3080 return i;
3081 }
3082 }
3083 return npos;
3084}
3085
3086template<class EventHandler>
3087template<class FilterProcessor>
3088void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp, size_t indentation)
3089{
3090 _RYML_ASSERT_PARSE_(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP, m_evt_handler->m_curr->pos);
3091 _RYML_ASSERT_PARSE_(this->callbacks(), proc.rem().first_not_of(" \n\r") == npos, m_evt_handler->m_curr->pos);
3092
3093 // a debugging scaffold:
3094 #if 0
3095 #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3096 #else
3097 #define _c4dbgchomp(...)
3098 #endif
3099
3100 // advance to the last line having spaces beyond the indentation
3101 {
3102 size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
3103 if(last != npos)
3104 {
3105 _c4dbgchomp("found newline and larger indentation. last={}", last);
3106 last = proc.rpos + last + size_t(1) + indentation; // last started at to-be-read.
3107 _RYML_ASSERT_PARSE_(this->callbacks(), last <= proc.src.len, m_evt_handler->m_curr->pos);
3108 // remove indentation spaces, copy the rest
3109 while((proc.rpos < last) && proc.has_more_chars())
3110 {
3111 const char curr = proc.curr();
3112 _c4dbgchomp("curr='{}'", _c4prc(curr));
3113 switch(curr)
3114 {
3115 case '\n':
3116 {
3117 _c4dbgchomp("newline! remlen={}", proc.rem().len);
3118 proc.copy();
3119 // are there spaces after the newline?
3120 csubstr at_next_line = proc.rem();
3121 if(at_next_line.begins_with(' '))
3122 {
3123 _c4dbgchomp("next line begins with spaces. indentation={}", indentation);
3124 // there are spaces.
3125 size_t first_non_space = at_next_line.first_not_of(' ');
3126 _c4dbgchomp("first_non_space={}", first_non_space);
3127 if(first_non_space == npos)
3128 {
3129 _c4dbgchomp("{} spaces, to the end", at_next_line.len);
3130 first_non_space = at_next_line.len;
3131 }
3132 if(first_non_space <= indentation)
3133 {
3134 _c4dbgchomp("skip spaces={}<=indentation={}", first_non_space, indentation);
3135 proc.skip(first_non_space);
3136 }
3137 else
3138 {
3139 _c4dbgchomp("skip indentation={}<spaces={}", indentation, first_non_space);
3140 proc.skip(indentation);
3141 // copy the spaces after the indentation
3142 _c4dbgchomp("copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
3143 proc.copy(first_non_space - indentation);
3144 }
3145 }
3146 break;
3147 }
3148 case '\r':
3149 proc.skip();
3150 break;
3151 }
3152 }
3153 }
3154 }
3155
3156 // from now on, we only have line ends (or indentation spaces)
3157 switch(chomp)
3158 {
3159 case CHOMP_CLIP:
3160 {
3161 bool had_one = false;
3162 while(proc.has_more_chars())
3163 {
3164 const char curr = proc.curr();
3165 _c4dbgchomp("CLIP: '{}'", _c4prc(curr));
3166 switch(curr)
3167 {
3168 case '\n':
3169 {
3170 _c4dbgchomp("copy newline!", curr);
3171 proc.copy();
3172 proc.set_at_end();
3173 had_one = true;
3174 break;
3175 }
3176 case ' ':
3177 case '\r':
3178 _c4dbgchomp("skip!", curr);
3179 proc.skip();
3180 break;
3181 }
3182 }
3183 if(!had_one) // there were no newline characters. add one.
3184 {
3185 _c4dbgchomp("chomp=CLIP: add missing newline @{}", proc.wpos);
3186 proc.set('\n');
3187 }
3188 break;
3189 }
3190 case CHOMP_KEEP:
3191 {
3192 _c4dbgchomp("chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
3193 while(proc.has_more_chars())
3194 {
3195 const char curr = proc.curr();
3196 _c4dbgchomp("KEEP: '{}'", _c4prc(curr));
3197 switch(curr)
3198 {
3199 case '\n':
3200 _c4dbgchomp("copy newline!", curr);
3201 proc.copy();
3202 break;
3203 case ' ':
3204 case '\r':
3205 _c4dbgchomp("skip!", curr);
3206 proc.skip();
3207 break;
3208 }
3209 }
3210 break;
3211 }
3212 case CHOMP_STRIP:
3213 {
3214 _c4dbgchomp("chomp=STRIP: strip {} characters", proc.rem().len);
3215 // nothing to do!
3216 break;
3217 }
3218 }
3219
3220 #undef _c4dbgchomp
3221}
3222
3223
3224// a debugging scaffold:
3225#if 0
3226#define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3227#else
3228#define _c4dbgfb(...)
3229#endif
3230
3231template<class EventHandler>
3232template<class FilterProcessor>
3233void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc, size_t indentation)
3234{
3235 csubstr rem = proc.rem(); // remaining
3236 if(rem.len)
3237 {
3238 size_t first = rem.first_not_of(' ');
3239 if(first != npos)
3240 {
3241 _c4dbgfb("{} spaces follow before next nonws character", first);
3242 if(first < indentation)
3243 {
3244 _c4dbgfb("skip {}<{} spaces from indentation", first, indentation);
3245 proc.skip(first);
3246 }
3247 else
3248 {
3249 _c4dbgfb("skip {} spaces from indentation", indentation);
3250 proc.skip(indentation);
3251 }
3252 }
3253 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
3254 else
3255 {
3256 _c4dbgfb("all spaces to the end: {} spaces", first);
3257 first = rem.len;
3258 if(first)
3259 {
3260 if(first < indentation)
3261 {
3262 _c4dbgfb("skip everything", first);
3263 proc.skip(proc.src.len - proc.rpos);
3264 }
3265 else
3266 {
3267 _c4dbgfb("skip {} spaces from indentation", indentation);
3268 proc.skip(indentation);
3269 }
3270 }
3271 }
3272 #endif
3273 }
3274}
3275
3276template<class EventHandler>
3277template<class FilterProcessor>
3278size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
3279{
3280 csubstr contents = proc.src.trimr(" \n\r");
3281 _c4dbgfb("ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
3282 if(!contents.len)
3283 {
3284 _c4dbgfb("ws: all whitespace: len={}", proc.src.len);
3285 if(chomp == CHOMP_KEEP && proc.src.len)
3286 {
3287 _c4dbgfb("ws: chomp=KEEP all {} newlines", proc.src.count('\n'));
3288 while(proc.has_more_chars())
3289 {
3290 const char curr = proc.curr();
3291 if(curr == '\n')
3292 proc.copy();
3293 else
3294 proc.skip();
3295 }
3296 if(!proc.wpos)
3297 {
3298 proc.set('\n');
3299 }
3300 }
3301 }
3302 return contents.len;
3303}
3304
3305template<class EventHandler>
3306template<class FilterProcessor>
3307size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc, size_t contents_len)
3308{
3309 _c4dbgfb("contents_len={}", contents_len);
3310
3311 _RYML_ASSERT_PARSE_(this->callbacks(), contents_len > 0u, m_evt_handler->m_curr->pos);
3312
3313 // extend contents to just before the first newline at the end,
3314 // in case it is preceded by spaces
3315 size_t firstnewl = proc.src.first_of('\n', contents_len);
3316 if(firstnewl != npos)
3317 {
3318 contents_len = firstnewl;
3319 _c4dbgfb("contents_len={} <--- firstnewl={}", contents_len, firstnewl);
3320 }
3321 else
3322 {
3323 contents_len = proc.src.len;
3324 _c4dbgfb("contents_len={} <--- src.len={}", contents_len, proc.src.len);
3325 }
3326
3327 return contents_len;
3328}
3329
3330#undef _c4dbgfb
3331
3332
3333//-----------------------------------------------------------------------------
3334//-----------------------------------------------------------------------------
3335//-----------------------------------------------------------------------------
3336
3337// a debugging scaffold:
3338#if 0
3339#define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3340#else
3341#define _c4dbgfbl(...)
3342#endif
3343
3344template<class EventHandler>
3345template<class FilterProcessor>
3346auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3347{
3348 _c4dbgfbl("indentation={} before={}", indentation, _prs(proc.src));
3349
3350 size_t contents_len = _handle_all_whitespace(proc, chomp);
3351 if(!contents_len)
3352 return proc.result();
3353
3354 contents_len = _extend_to_chomp(proc, contents_len);
3355
3356 _c4dbgfbl("to filter={}", _prs(proc.src.first(contents_len)));
3357
3358 _filter_block_indentation(proc, indentation);
3359
3360 // now filter the bulk
3361 while(proc.has_more_chars(/*maxpos*/contents_len))
3362 {
3363 const char curr = proc.curr();
3364 _c4dbgfbl("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3365 switch(curr)
3366 {
3367 case '\n':
3368 {
3369 _c4dbgfbl("found newline. skip indentation on the next line", curr);
3370 proc.copy(); // copy the newline
3371 _filter_block_indentation(proc, indentation);
3372 break;
3373 }
3374 case '\r':
3375 proc.skip();
3376 break;
3377 default:
3378 proc.copy();
3379 break;
3380 }
3381 }
3382
3383 _c4dbgfbl("before chomp: #tochomp={} sofar={}", proc.rem().len, _prs(proc.sofar()));
3384
3385 _filter_chomp(proc, chomp, indentation);
3386
3387 _c4dbgfbl("final={}", _prs(proc.sofar()));
3388
3389 return proc.result();
3390}
3391
3392#undef _c4dbgfbl
3393
3394template<class EventHandler>
3395FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3396{
3397 FilterProcessorSrcDst proc(scalar, dst);
3398 return _filter_block_literal(proc, indentation, chomp);
3399}
3400
3401template<class EventHandler>
3402FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3403{
3404 FilterProcessorInplaceEndExtending proc(scalar, cap);
3405 return _filter_block_literal(proc, indentation, chomp);
3406}
3407
3408
3409//-----------------------------------------------------------------------------
3410//-----------------------------------------------------------------------------
3411//-----------------------------------------------------------------------------
3412
3413// a debugging scaffold:
3414#if 0
3415#define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
3416#else
3417#define _c4dbgfbf(...)
3418#endif
3419
3420
3421template<class EventHandler>
3422template<class FilterProcessor>
3423void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3424{
3425 _filter_block_indentation(proc, indentation);
3426 while(proc.has_more_chars(len))
3427 {
3428 const char curr = proc.curr();
3429 _c4dbgfbf("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3430 switch(curr)
3431 {
3432 case '\n':
3433 _c4dbgfbf("newline.", curr);
3434 proc.copy();
3435 _filter_block_indentation(proc, indentation);
3436 break;
3437 case '\r':
3438 proc.skip();
3439 break;
3440 case ' ':
3441 case '\t':
3442 {
3443 size_t first = proc.rem().first_not_of(" \t");
3444 _c4dbgfbf("space. first={}", first);
3445 if(first == npos)
3446 first = proc.rem().len;
3447 _c4dbgfbf("... indentation increased to {}", first);
3448 _filter_block_folded_indented_block(proc, indentation, len, first);
3449 break;
3450 }
3451 default:
3452 _c4dbgfbf("newl leading: not space, not newline. stop.", 0);
3453 return;
3454 }
3455 }
3456}
3457
3458template<class EventHandler>
3459template<class FilterProcessor>
3460size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc, size_t num_newl, size_t wpos_at_first_newl)
3461{
3462 switch(num_newl)
3463 {
3464 case 1u:
3465 _c4dbgfbf("... this is the first newline. turn into space. wpos={}", proc.wpos);
3466 wpos_at_first_newl = proc.wpos;
3467 proc.skip();
3468 proc.set(' ');
3469 break;
3470 case 2u:
3471 _c4dbgfbf("... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
3472 _RYML_ASSERT_PARSE_(this->callbacks(), wpos_at_first_newl != npos, m_evt_handler->m_curr->pos);
3473 _RYML_ASSERT_PARSE_(this->callbacks(), proc.sofar()[wpos_at_first_newl] == ' ', m_evt_handler->m_curr->pos);
3474 _RYML_ASSERT_PARSE_(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos, m_evt_handler->m_curr->pos);
3475 proc.skip();
3476 proc.set_at(wpos_at_first_newl, '\n');
3477 _RYML_ASSERT_PARSE_(this->callbacks(), proc.sofar()[wpos_at_first_newl] == '\n', m_evt_handler->m_curr->pos);
3478 break;
3479 default:
3480 _c4dbgfbf("... subsequent newline (num_newl={}). copy", num_newl);
3481 proc.copy();
3482 break;
3483 }
3484 return wpos_at_first_newl;
3485}
3486
3487template<class EventHandler>
3488template<class FilterProcessor>
3489void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
3490{
3491 _RYML_ASSERT_PARSE_(this->callbacks(), proc.curr() == '\n', m_evt_handler->m_curr->pos);
3492 size_t num_newl = 0;
3493 size_t wpos_at_first_newl = npos;
3494 while(proc.has_more_chars(len))
3495 {
3496 const char curr = proc.curr();
3497 _c4dbgfbf("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3498 switch(curr)
3499 {
3500 case '\n':
3501 {
3502 _c4dbgfbf("newline. sofar={}", num_newl);
3503 // NOTE: vs2022-32bit-release builds were giving wrong
3504 // results in this block, if it was written as either
3505 // as a switch(num_newl) or its equivalent if-form.
3506 //
3507 // For this reason, we're using a dedicated function
3508 // (**_compress), which seems to work around the issue.
3509 //
3510 // The manifested problem was that somewhere between the
3511 // assignment to curr and this point, proc.wpos (the
3512 // write-position of the processor) jumped to npos, which
3513 // made the write wrap-around! To make things worse,
3514 // enabling prints via _c4dbgpf() and _c4dbgfbf() made the
3515 // problem go away!
3516 //
3517 // The only way to make the problem appear with prints
3518 // enabled was by disabling all prints in this function
3519 // (including in the block which was moved to the compress
3520 // function) and then selectively enabling only some of
3521 // those prints.
3522 //
3523 // This may be due to some bug in the cl-x86 optimizer; or
3524 // it may be triggered by some UB which may be
3525 // inadvertedly present in this function or in the filter
3526 // processor. This is despite our best efforts to weed out
3527 // any such UB problem: neither clang-tidy nor none of the
3528 // sanitizers, or gcc's -fanalyzer pointed to any problems
3529 // in this code.
3530 //
3531 // In the end, moving this block to a separate function
3532 // was the only way to bury the problem. But it may
3533 // resurface again, as The Undead, rising to from the
3534 // grave to haunt us with his terrible presence.
3535 //
3536 // We may have to revisit this. With a stake, and lots of
3537 // garlic.
3538 wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
3539 _filter_block_indentation(proc, indentation);
3540 break;
3541 }
3542 case ' ':
3543 case '\t':
3544 {
3545 size_t first = proc.rem().first_not_of(" \t");
3546 _c4dbgfbf("space. first={}", first);
3547 if(first == npos)
3548 first = proc.rem().len;
3549 _c4dbgfbf("... indentation increased to {}", first);
3550 if(num_newl)
3551 {
3552 _c4dbgfbf("... prev space (at wpos={}) must be newline", wpos_at_first_newl);
3553 proc.set_at(wpos_at_first_newl, '\n');
3554 }
3555 if(num_newl > 1u)
3556 {
3557 _c4dbgfbf("... add missing newline", wpos_at_first_newl);
3558 proc.set('\n');
3559 }
3560 _filter_block_folded_indented_block(proc, indentation, len, first);
3561 num_newl = 0;
3562 wpos_at_first_newl = npos;
3563 break;
3564 }
3565 case '\r':
3566 proc.skip();
3567 break;
3568 default:
3569 _c4dbgfbf("not space, not newline. stop.", 0);
3570 return;
3571 }
3572 }
3573}
3574
3575
3576template<class EventHandler>
3577template<class FilterProcessor>
3578void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len, size_t curr_indentation) noexcept
3579{
3580 _RYML_ASSERT_PARSE_(this->callbacks(), (proc.rem().first_not_of(" \t") == curr_indentation) || (proc.rem().first_not_of(" \t") == npos), m_evt_handler->m_curr->pos);
3581 if(curr_indentation)
3582 proc.copy(curr_indentation);
3583 while(proc.has_more_chars(len))
3584 {
3585 const char curr = proc.curr();
3586 _c4dbgfbf("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3587 switch(curr)
3588 {
3589 case '\n':
3590 {
3591 proc.copy();
3592 _filter_block_indentation(proc, indentation);
3593 csubstr rem = proc.rem();
3594 const size_t first = rem.first_not_of(' ');
3595 _c4dbgfbf("newline. firstns={}", first);
3596 if(first == 0)
3597 {
3598 const char c = rem[first];
3599 _c4dbgfbf("firstns={}='{}'", first, _c4prc(c));
3600 if(c != '\n' && c != '\r')
3601 {
3602 _c4dbgfbf("done with indented block", first);
3603 goto endloop;
3604 }
3605 }
3606 else if(first != npos)
3607 {
3608 proc.copy(first);
3609 _c4dbgfbf("copy all {} spaces", first);
3610 }
3611 break;
3612 }
3613 break;
3614 case '\r':
3615 proc.skip();
3616 break;
3617 default:
3618 proc.copy();
3619 break;
3620 }
3621 }
3622 endloop:
3623 return;
3624}
3625
3626
3627template<class EventHandler>
3628template<class FilterProcessor>
3629auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
3630{
3631 _c4dbgfbf("indentation={} before={}", indentation, _prs(proc.src));
3632
3633 size_t contents_len = _handle_all_whitespace(proc, chomp);
3634 if(!contents_len)
3635 return proc.result();
3636
3637 contents_len = _extend_to_chomp(proc, contents_len);
3638
3639 _c4dbgfbf("to filter={}", _prs(proc.src.first(contents_len)));
3640
3641 _filter_block_folded_newlines_leading(proc, indentation, contents_len);
3642
3643 // now filter the bulk
3644 while(proc.has_more_chars(/*maxpos*/contents_len))
3645 {
3646 const char curr = proc.curr();
3647 _c4dbgfbf("'{}' sofar={}", _c4prc(curr), _prs(proc.sofar()));
3648 switch(curr)
3649 {
3650 case '\n':
3651 {
3652 _c4dbgfbf("found newline", curr);
3653 _filter_block_folded_newlines(proc, indentation, contents_len);
3654 break;
3655 }
3656 case '\r':
3657 proc.skip();
3658 break;
3659 default:
3660 proc.copy();
3661 break;
3662 }
3663 }
3664
3665 _c4dbgfbf("before chomp: #tochomp={} sofar={}", proc.rem().len, _prs(proc.sofar()));
3666
3667 _filter_chomp(proc, chomp, indentation);
3668
3669 _c4dbgfbf("final={}", proc.sofar().len, _prs(proc.sofar()));
3670
3671 return proc.result();
3672}
3673
3674#undef _c4dbgfbf
3675
3676template<class EventHandler>
3677FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
3678{
3679 FilterProcessorSrcDst proc(scalar, dst);
3680 return _filter_block_folded(proc, indentation, chomp);
3681}
3682
3683template<class EventHandler>
3684FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
3685{
3686 FilterProcessorInplaceEndExtending proc(scalar, cap);
3687 return _filter_block_folded(proc, indentation, chomp);
3688}
3689
3690
3691//-----------------------------------------------------------------------------
3692//-----------------------------------------------------------------------------
3693//-----------------------------------------------------------------------------
3694
3695template<class EventHandler>
3697{
3698 _c4dbgpf("filtering plain scalar: s={}", _prs(s));
3699 FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
3700 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.valid(), m_evt_handler->m_curr->pos);
3701 _c4dbgpf("filtering plain scalar: success! s={}", _prs(r.get()));
3702 return r.get();
3703}
3704
3705//-----------------------------------------------------------------------------
3706
3707template<class EventHandler>
3709{
3710 _c4dbgpf("filtering squo scalar: s={}", _prs(s));
3711 FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
3712 _RYML_ASSERT_PARSE_(this->callbacks(), r.valid(), m_evt_handler->m_curr->pos);
3713 _c4dbgpf("filtering squo scalar: success! s={}", _prs(r.get()));
3714 return r.get();
3715}
3716
3717
3718//-----------------------------------------------------------------------------
3719
3720template<class EventHandler>
3722{
3723 _c4dbgpf("filtering dquo scalar: s={}", _prs(s));
3724 FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
3725 if(C4_LIKELY(r.valid()))
3726 {
3727 _c4dbgpf("filtering dquo scalar: success! s={}", _prs(r.get()));
3728 return r.get();
3729 }
3730 else
3731 {
3732 const size_t len = r.required_len();
3733 _c4dbgpf("filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
3734 substr dst = _alloc_arena(len, &s);
3735 _c4dbgpf("filtering dquo scalar: dst.len={}", dst.len);
3736 if(dst.str)
3737 {
3738 _RYML_ASSERT_PARSE_(this->callbacks(), dst.len == len, m_evt_handler->m_curr->pos);
3739 FilterResult rsd = this->filter_scalar_dquoted(s, dst);
3740 _c4dbgpf("filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
3741 _RYML_ASSERT_PARSE_(this->callbacks(), rsd.required_len() <= len, m_evt_handler->m_curr->pos); // may be smaller!
3742 _RYML_CHECK_PARSE_(m_evt_handler->m_stack.m_callbacks, rsd.valid(), m_evt_handler->m_curr->pos);
3743 _c4dbgpf("filtering dquo scalar: success! s={}", _prs(rsd.get()));
3744 return rsd.get();
3745 }
3746 return dst;
3747 }
3748}
3749
3750
3751//-----------------------------------------------------------------------------
3752
3753template<class EventHandler>
3755{
3756 if(s.is_sub(_buf()))
3757 {
3758 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.str > _buf().str, m_evt_handler->m_curr->pos);
3759 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, s.str-1 >= _buf().str, m_evt_handler->m_curr->pos);
3760 if(s.len)
3761 memmove(s.str - 1, s.str, s.len);
3762 --s.str;
3763 s.str[s.len] = '\n';
3764 ++s.len;
3765 return s;
3766 }
3767 else
3768 {
3769 substr dst = _alloc_arena(s.len + 1, &s);
3770 if(s.len)
3771 memcpy(dst.str, s.str, s.len);
3772 dst[s.len] = '\n';
3773 return dst;
3774 }
3775}
3776
3777template<class EventHandler>
3778csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s, size_t indentation, BlockChomp_e chomp)
3779{
3780 _c4dbgpf("filtering block literal scalar: s={}", _prs(s));
3781 FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
3782 csubstr result;
3783 if(C4_LIKELY(r.valid()))
3784 {
3785 result = r.get();
3786 }
3787 else
3788 {
3789 _c4dbgpf("filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3790 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3791 // this can only happen when adding a single newline in clip mode.
3792 // so we shift left the scalar by one place
3793 result = _move_scalar_left_and_add_newline(s);
3794 }
3795 _c4dbgpf("filtering block literal scalar: success! s={}", _prs(result));
3796 return result;
3797}
3798
3799
3800//-----------------------------------------------------------------------------
3801template<class EventHandler>
3802csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s, size_t indentation, BlockChomp_e chomp)
3803{
3804 _c4dbgpf("filtering block folded scalar: s={}", _prs(s));
3805 FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
3806 csubstr result;
3807 if(C4_LIKELY(r.valid()))
3808 {
3809 result = r.get();
3810 }
3811 else
3812 {
3813 _c4dbgpf("filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
3814 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, r.required_len() == s.len + 1, m_evt_handler->m_curr->pos);
3815 // this can only happen when adding a single newline in clip mode.
3816 // so we shift left the scalar by one place
3817 result = _move_scalar_left_and_add_newline(s);
3818 }
3819 _c4dbgpf("filtering block folded scalar: success! s={}", _prs(result));
3820 return result;
3821}
3822
3823
3824//-----------------------------------------------------------------------------
3825
3826template<class EventHandler>
3827csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3828{
3829 if(sc.needs_filter)
3830 {
3831 if(m_options.scalar_filtering())
3832 {
3833 return _filter_scalar_plain(sc.scalar, indentation);
3834 }
3835 else
3836 {
3837 _c4dbgp("plain scalar left unfiltered");
3838 m_evt_handler->mark_key_scalar_unfiltered();
3839 }
3840 }
3841 else
3842 {
3843 _c4dbgp("plain scalar doesn't need filtering");
3844 }
3845 return sc.scalar;
3846}
3847
3848template<class EventHandler>
3849csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
3850{
3851 if(sc.needs_filter)
3852 {
3853 if(m_options.scalar_filtering())
3854 {
3855 return _filter_scalar_plain(sc.scalar, indentation);
3856 }
3857 else
3858 {
3859 _c4dbgp("plain scalar left unfiltered");
3860 m_evt_handler->mark_val_scalar_unfiltered();
3861 }
3862 }
3863 else
3864 {
3865 _c4dbgp("plain scalar doesn't need filtering");
3866 }
3867 return sc.scalar;
3868}
3869
3870
3871//-----------------------------------------------------------------------------
3872
3873template<class EventHandler>
3874csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3875{
3876 if(sc.needs_filter)
3877 {
3878 if(m_options.scalar_filtering())
3879 {
3880 return _filter_scalar_squot(sc.scalar);
3881 }
3882 else
3883 {
3884 _c4dbgp("squo key scalar left unfiltered");
3885 m_evt_handler->mark_key_scalar_unfiltered();
3886 }
3887 }
3888 else
3889 {
3890 _c4dbgp("squo key scalar doesn't need filtering");
3891 }
3892 return sc.scalar;
3893}
3894
3895template<class EventHandler>
3896csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
3897{
3898 if(sc.needs_filter)
3899 {
3900 if(m_options.scalar_filtering())
3901 {
3902 return _filter_scalar_squot(sc.scalar);
3903 }
3904 else
3905 {
3906 _c4dbgp("squo val scalar left unfiltered");
3907 m_evt_handler->mark_val_scalar_unfiltered();
3908 }
3909 }
3910 else
3911 {
3912 _c4dbgp("squo val scalar doesn't need filtering");
3913 }
3914 return sc.scalar;
3915}
3916
3917
3918//-----------------------------------------------------------------------------
3919
3920template<class EventHandler>
3921csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3922{
3923 if(sc.needs_filter)
3924 {
3925 if(m_options.scalar_filtering())
3926 {
3927 return _filter_scalar_dquot(sc.scalar);
3928 }
3929 else
3930 {
3931 _c4dbgp("dquo scalar left unfiltered");
3932 m_evt_handler->mark_key_scalar_unfiltered();
3933 }
3934 }
3935 else
3936 {
3937 _c4dbgp("dquo scalar doesn't need filtering");
3938 }
3939 return sc.scalar;
3940}
3941
3942template<class EventHandler>
3943csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
3944{
3945 if(sc.needs_filter)
3946 {
3947 if(m_options.scalar_filtering())
3948 {
3949 return _filter_scalar_dquot(sc.scalar);
3950 }
3951 else
3952 {
3953 _c4dbgp("dquo scalar left unfiltered");
3954 m_evt_handler->mark_val_scalar_unfiltered();
3955 }
3956 }
3957 else
3958 {
3959 _c4dbgp("dquo scalar doesn't need filtering");
3960 }
3961 return sc.scalar;
3962}
3963
3964
3965//-----------------------------------------------------------------------------
3966
3967template<class EventHandler>
3969{
3970 if(m_options.scalar_filtering())
3971 {
3972 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3973 }
3974 else
3975 {
3976 _c4dbgp("literal scalar left unfiltered");
3977 m_evt_handler->mark_key_scalar_unfiltered();
3978 }
3979 return sb.scalar;
3980}
3981
3982template<class EventHandler>
3984{
3985 if(m_options.scalar_filtering())
3986 {
3987 return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
3988 }
3989 else
3990 {
3991 _c4dbgp("literal scalar left unfiltered");
3992 m_evt_handler->mark_val_scalar_unfiltered();
3993 }
3994 return sb.scalar;
3995}
3996
3997
3998//-----------------------------------------------------------------------------
3999
4000template<class EventHandler>
4002{
4003 if(m_options.scalar_filtering())
4004 {
4005 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
4006 }
4007 else
4008 {
4009 _c4dbgp("folded scalar left unfiltered");
4010 m_evt_handler->mark_key_scalar_unfiltered();
4011 }
4012 return sb.scalar;
4013}
4014
4015template<class EventHandler>
4017{
4018 if(m_options.scalar_filtering())
4019 {
4020 return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
4021 }
4022 else
4023 {
4024 _c4dbgp("folded scalar left unfiltered");
4025 m_evt_handler->mark_val_scalar_unfiltered();
4026 }
4027 return sb.scalar;
4028}
4029
4030
4031//-----------------------------------------------------------------------------
4032//-----------------------------------------------------------------------------
4033//-----------------------------------------------------------------------------
4034
4035#ifdef RYML_DBG // !!! <----------------------------------
4036
4037template<class EventHandler>
4038void ParseEngine<EventHandler>::add_flags(ParserFlag_t on)
4039{
4040 ParserState *s = m_evt_handler->m_curr;
4041 char buf1_[64], buf2_[64], buf3_[64];
4042 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4043 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4044 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
4045 _c4dbgpf("state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
4046 s->flags |= on;
4047}
4048
4049template<class EventHandler>
4050void ParseEngine<EventHandler>::addrem_flags(ParserFlag_t on, ParserFlag_t off)
4051{
4052 ParserState *s = m_evt_handler->m_curr;
4053 char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
4054 csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
4055 csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
4056 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
4057 csubstr buf4 = detail::_parser_flags_to_str(buf4_, (~off)&((s->flags|on)));
4058 _c4dbgpf("state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
4059 _RYML_ASSERT_BASIC((on & off) == ParserFlag_t(0));
4060 s->flags &= ~off;
4061 s->flags |= on;
4062}
4063
4064template<class EventHandler>
4065void ParseEngine<EventHandler>::rem_flags(ParserFlag_t off)
4066{
4067 ParserState *s = m_evt_handler->m_curr;
4068 char buf1_[64], buf2_[64], buf3_[64];
4069 csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
4070 csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
4071 csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
4072 _c4dbgpf("state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
4073 s->flags &= ~off;
4074}
4075
4076inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf, ParserFlag_t flags)
4077{
4078 size_t pos = 0;
4079 bool gotone = false;
4080
4081 #define _prflag(fl) \
4082 if((flags & fl) == (fl)) \
4083 { \
4084 if(gotone) \
4085 { \
4086 if(pos + 1 < buf.len) \
4087 buf[pos] = '|'; \
4088 ++pos; \
4089 } \
4090 csubstr fltxt = #fl; \
4091 if(pos + fltxt.len <= buf.len) \
4092 memcpy(buf.str + pos, fltxt.str, fltxt.len); \
4093 pos += fltxt.len; \
4094 gotone = true; \
4095 }
4096
4097 _prflag(RTOP);
4098 _prflag(RUNK);
4099 _prflag(RMAP);
4100 _prflag(RSEQ);
4101 _prflag(RFLOW);
4102 _prflag(RBLCK);
4103 _prflag(QMRK);
4104 _prflag(RKEY);
4105 _prflag(RVAL);
4106 _prflag(RKCL);
4107 _prflag(RNXT);
4108 _prflag(SSCL);
4109 _prflag(QSCL);
4110 _prflag(RSET);
4111 _prflag(RDOC);
4112 _prflag(NDOC);
4113 _prflag(USTY);
4115
4116 #undef _prflag
4117
4118 if(pos == 0)
4119 if(buf.len > 0)
4120 buf[pos++] = '0';
4121
4122 _RYML_CHECK_BASIC(pos <= buf.len);
4123
4124 return buf.first(pos);
4125}
4126
4127#endif // RYML_DBG !!! <----------------------------------
4128
4129
4130//-----------------------------------------------------------------------------
4131//-----------------------------------------------------------------------------
4132//-----------------------------------------------------------------------------
4133
4134template<class EventHandler>
4136{
4137 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, loc.offset < _buf().len);
4138 return _buf().sub(loc.offset);
4139}
4140
4141template<class EventHandler>
4143{
4144 if(C4_UNLIKELY(val == nullptr))
4145 return {m_evt_handler->m_curr->pos.name, 0, 0, 0};
4146 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4147 // NOTE: if any of these checks fails, the parser needs to be
4148 // instantiated with locations enabled.
4149 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_options.locations());
4150 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
4151 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets != nullptr);
4152 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
4153 // NOTE: the pointer needs to belong to the buffer that was used to parse.
4154 csubstr src = _buf();
4155 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, val != nullptr || src.str == nullptr);
4156 _RYML_CHECK_BASIC_(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr));
4157 // ok. search the first stored newline after the given ptr
4158 using lineptr_type = size_t const* C4_RESTRICT;
4159 lineptr_type lineptr = nullptr;
4160 size_t offset = (size_t)(val - src.begin());
4161 if(m_newline_offsets_size < RYML_LOCATIONS_SMALL_THRESHOLD)
4162 {
4163 // just do a linear search if the size is small.
4164 for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
4165 {
4166 if(*curr > offset)
4167 {
4168 lineptr = curr;
4169 break;
4170 }
4171 }
4172 }
4173 else
4174 {
4175 // do a bisection search if the size is not small.
4176 //
4177 // We could use std::lower_bound but this is simple enough and
4178 // spares the costly include of <algorithm>.
4179 size_t count = m_newline_offsets_size;
4180 lineptr = m_newline_offsets;
4181 while(count)
4182 {
4183 size_t step = count >> 1;
4184 lineptr_type it = lineptr + step;
4185 if(*it < offset)
4186 {
4187 lineptr = ++it;
4188 count -= step + 1;
4189 }
4190 else
4191 {
4192 count = step;
4193 }
4194 }
4195 }
4196 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
4197 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
4198 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
4199 Location loc;
4200 loc.name = m_evt_handler->m_curr->pos.name;
4201 loc.offset = offset;
4202 loc.line = (size_t)(lineptr - m_newline_offsets);
4203 if(lineptr > m_newline_offsets)
4204 loc.col = (offset - *(lineptr-1) - 1u);
4205 else
4206 loc.col = offset;
4207 return loc;
4208}
4209
4210template<class EventHandler>
4211void ParseEngine<EventHandler>::_prepare_locations()
4212{
4213 csubstr src = _buf();
4214 size_t numnewlines = 1u + src.count('\n');
4215 _resize_locations(numnewlines);
4216 m_newline_offsets_size = 0;
4217 for(size_t i = 0; i < src.len; i++)
4218 if(src.str[i] == '\n')
4219 m_newline_offsets[m_newline_offsets_size++] = i; // NOLINT
4220 m_newline_offsets[m_newline_offsets_size++] = src.len; // NOLINT
4221 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
4222}
4223
4224template<class EventHandler>
4225void ParseEngine<EventHandler>::_resize_locations(size_t numnewlines)
4226{
4227 numnewlines = numnewlines >= 16 ? numnewlines : 16;
4228 if(numnewlines > m_newline_offsets_capacity)
4229 {
4230 if(m_newline_offsets)
4231 _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
4232 m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets);
4233 m_newline_offsets_capacity = numnewlines;
4234 }
4235}
4236
4237template<class EventHandler>
4238bool ParseEngine<EventHandler>::_locations_dirty() const
4239{
4240 return !m_newline_offsets_size;
4241}
4242
4243
4244//-----------------------------------------------------------------------------
4245//-----------------------------------------------------------------------------
4246//-----------------------------------------------------------------------------
4247
4248template<class EventHandler>
4249void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
4250{
4251 // don't assign to csubstr rem: otherwise, gcc12,13,14 -O3 -m32 misbuilds
4252 if(m_evt_handler->m_curr->line_contents.rem.len > 0)
4253 {
4254 if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
4255 {
4256 _c4dbgpf("starts with whitespace: '{}'", _c4prc(m_evt_handler->m_curr->line_contents.rem.str[0]));
4257 _skipchars(" \t");
4258 }
4259 // comments
4260 if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
4261 {
4262 _c4dbgpf("it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
4263 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4264 }
4265 }
4266}
4267
4268
4269template<class EventHandler>
4270void ParseEngine<EventHandler>::_handle_flow_line_beginning()
4271{
4272 _c4dbgpf("flow: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
4273 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->at_line_beginning(), m_evt_handler->m_curr->pos);
4274 if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
4275 {
4276 csubstr trimmed = m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation);
4277 _c4dbgpf("flow: after indentation={}", _prs(trimmed));
4278 if(trimmed.len && trimmed.triml(" \t").len)
4279 {
4280 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
4281 _c4err("bad indentation");
4282 }
4283 }
4284}
4285
4286template<class EventHandler>
4287size_t ParseEngine<EventHandler>::_handle_block_skip_leading_whitespace()
4288{
4289 const size_t mark = m_evt_handler->m_curr->pos.offset;
4290 const size_t firstpos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
4291 _c4dbgpf("block: mark={} firstpos={}", mark, firstpos);
4292 if(firstpos != npos)
4293 {
4294 _c4dbgp("block: non empty line");
4295 _line_progressed(firstpos);
4296 return mark;
4297 }
4298 else
4299 {
4300 _c4dbgp("block: rest of line is whitespace");
4301 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
4302 return npos;
4303 }
4304}
4305
4306template<class EventHandler>
4307void ParseEngine<EventHandler>::_handle_block_check_leading_tabs(size_t start_mark, size_t end_mark)
4308{
4309 _c4dbgpf("block: start_mark={} end_mark={}", start_mark, end_mark);
4310 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, end_mark >= start_mark, m_evt_handler->m_curr->pos);
4311 if(end_mark != start_mark)
4312 {
4313 csubstr leading = _buf().range(start_mark, end_mark);
4314 _c4dbgpf("block: leading[{}-{}]={}", start_mark, end_mark, _prs(leading, true));
4315 size_t pos = leading.find('\t');
4316 if(pos != npos)
4317 {
4318 size_t fno = leading.first_not_of(" \t");
4319 if(fno == npos || pos < fno)
4320 _c4err("invalid tab character to the left");
4321 }
4322 (void)leading;
4323 }
4324}
4325
4326
4327//-----------------------------------------------------------------------------
4328
4329
4330template<class EventHandler>
4331void ParseEngine<EventHandler>::_handle_colon()
4332{
4333 size_t curr = m_evt_handler->m_curr->pos.line;
4334 if(C4_UNLIKELY(m_prev_colon != npos && curr == m_prev_colon))
4335 {
4336 _c4dbgpf("colon: prevline={} currline={}", m_prev_colon, curr);
4337 _c4err("two colons on same line");
4338 }
4339 _c4dbgpf("colon: set prevline={}->{}", m_prev_colon, curr);
4340 m_prev_colon = curr;
4341}
4342
4343template<class EventHandler>
4344void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str)
4345{
4346 _c4dbgpf("store annotation[{}]: {}", dst->num_entries, _prs(str));
4347 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos); // NOLINT(bugprone-sizeof-expression)
4348 dst->annotations[dst->num_entries].str = str;
4349 dst->annotations[dst->num_entries].indentation = {};
4350 dst->annotations[dst->num_entries].line = {};
4351 dst->annotations[dst->num_entries].orig = {};
4352 ++dst->num_entries;
4353}
4354
4355template<class EventHandler>
4356void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line)
4357{
4358 _c4dbgpf("store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, _maybe_null_str(str), indentation, line);
4359 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos); // NOLINT(bugprone-sizeof-expression)
4360 if(C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line))
4361 {
4362 _c4err("parse error");
4363 }
4364 dst->annotations[dst->num_entries].str = str;
4365 dst->annotations[dst->num_entries].indentation = indentation;
4366 dst->annotations[dst->num_entries].line = line;
4367 dst->annotations[dst->num_entries].orig = {};
4368 ++dst->num_entries;
4369}
4370
4371template<class EventHandler>
4372void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line, csubstr orig)
4373{
4374 _c4dbgpf("store annotation[{}]: '{}'->'{}' indentation={} line={}", dst->num_entries, orig, _maybe_null_str(str), indentation, line);
4375 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, dst->num_entries < C4_COUNTOF(dst->annotations), m_evt_handler->m_curr->pos); // NOLINT(bugprone-sizeof-expression)
4376 if(C4_UNLIKELY(dst->num_entries && dst->annotations[0].line == line))
4377 {
4378 _c4err("parse error");
4379 }
4380 dst->annotations[dst->num_entries].str = str;
4381 dst->annotations[dst->num_entries].indentation = indentation;
4382 dst->annotations[dst->num_entries].line = line;
4383 dst->annotations[dst->num_entries].orig = orig;
4384 ++dst->num_entries;
4385}
4386
4387template<class EventHandler>
4388bool ParseEngine<EventHandler>::_annotations_require_key_container() const
4389{
4390 return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
4391}
4392
4393template<class EventHandler>
4394bool ParseEngine<EventHandler>::_handle_annotations_before_unexpected_flow_token_rkey()
4395{
4396 if(!(m_pending_tags.num_entries | m_pending_anchors.num_entries))
4397 return false;
4398 _c4dbgpf("handle_annotations_before_unexpected_flow_comma_rkey, node={}", m_evt_handler->m_curr->node_id);
4399 if(m_pending_tags.num_entries)
4400 {
4401 _c4dbgpf("handle_annotations_before_unexpected_flow_comma_rkey, #tags={}", m_pending_tags.num_entries);
4402 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4403 {
4404 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4405 _clear_annotations(&m_pending_tags);
4406 }
4407 else
4408 {
4409 _c4err("too many tags");
4410 }
4411 }
4412 if(m_pending_anchors.num_entries)
4413 {
4414 _c4dbgpf("handle_annotations_before_unexpected_flow_comma, #anchors={}", m_pending_tags.num_entries);
4415 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4416 {
4417 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4418 _clear_annotations(&m_pending_anchors);
4419 }
4420 else
4421 {
4422 _c4err("too many anchors");
4423 }
4424 }
4425 m_evt_handler->set_key_scalar_plain_empty();
4426 m_evt_handler->set_val_scalar_plain_empty();
4427 return true;
4428}
4429
4430template<class EventHandler>
4431void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
4432{
4433 _c4dbgpf("annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
4434 if(m_pending_tags.num_entries)
4435 {
4436 _c4dbgpf("annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
4437 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4438 {
4439 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4440 _clear_annotations(&m_pending_tags);
4441 }
4442 else
4443 {
4444 _c4err("too many tags"); // LCOV_EXCL_LINE
4445 }
4446 }
4447 if(m_pending_anchors.num_entries)
4448 {
4449 _c4dbgpf("annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
4450 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4451 {
4452 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4453 _clear_annotations(&m_pending_anchors);
4454 }
4455 else
4456 {
4457 _c4err("too many anchors"); // LCOV_EXCL_LINE
4458 }
4459 }
4460}
4461
4462template<class EventHandler>
4463void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
4464{
4465 _c4dbgpf("annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
4466 if(m_pending_tags.num_entries)
4467 {
4468 _c4dbgpf("annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
4469 if(C4_LIKELY(m_pending_tags.num_entries == 1))
4470 {
4471 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4472 _clear_annotations(&m_pending_tags);
4473 }
4474 else
4475 {
4476 _c4err("too many tags");
4477 }
4478 }
4479 if(m_pending_anchors.num_entries)
4480 {
4481 _c4dbgpf("annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
4482 if(C4_LIKELY(m_pending_anchors.num_entries == 1))
4483 {
4484 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4485 _clear_annotations(&m_pending_anchors);
4486 }
4487 else
4488 {
4489 _c4err("too many anchors");
4490 }
4491 }
4492}
4493
4494template<class EventHandler>
4495void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(size_t current_line)
4496{
4497 _c4dbgpf("annotations_before_start_mapblck, current_line={}", current_line);
4498 if(m_pending_tags.num_entries == 2)
4499 {
4500 _c4dbgp("2 tags, setting entry 0");
4501 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4502 }
4503 else if(m_pending_tags.num_entries == 1)
4504 {
4505 _c4dbgpf("1 tag. line={}, curr={}", m_pending_tags.annotations[0].line, current_line);
4506 if(m_pending_tags.annotations[0].line < current_line)
4507 {
4508 _c4dbgp("...tag is for the map. setting it.");
4509 m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
4510 _clear_annotations(&m_pending_tags);
4511 }
4512 }
4513 //
4514 if(m_pending_anchors.num_entries == 2)
4515 {
4516 _c4dbgp("2 anchors, setting entry 0");
4517 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4518 }
4519 else if(m_pending_anchors.num_entries == 1)
4520 {
4521 _c4dbgpf("1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line, current_line);
4522 if(m_pending_anchors.annotations[0].line < current_line)
4523 {
4524 _c4dbgp("...anchor is for the map. setting it.");
4525 m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
4526 _clear_annotations(&m_pending_anchors);
4527 }
4528 }
4529}
4530
4531template<class EventHandler>
4532void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
4533{
4534 _c4dbgp("annotations_before_start_mapblck_as_key");
4535 switch(m_pending_tags.num_entries)
4536 {
4537 case 1u:
4538 _c4dbgpf("annotations_after_start_mapblck_as_key: 1 tag={} line={} currline=", _prs(m_pending_tags.annotations[0].str), m_pending_tags.annotations[0].line, m_evt_handler->m_curr->pos.line);
4539 if(m_pending_tags.annotations[0].line != m_evt_handler->m_curr->pos.line)
4540 {
4541 _c4dbgp("annotations_after_start_mapblck_as_key: is map tag");
4542 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4543 _clear_annotations(&m_pending_tags);
4544 }
4545 break;
4546 case 2u:
4547 _c4dbgpf("annotations_after_start_mapblck_as_key: 2 tags: {} -> {}", _prs(m_pending_tags.annotations[0].str), _prs(m_pending_tags.annotations[1].str));
4548 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4549 break;
4550 }
4551 switch(m_pending_anchors.num_entries)
4552 {
4553 case 1u:
4554 _c4dbgpf("annotations_after_start_mapblck_as_key: 1 anchor={} line={} currline=", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[0].line, m_evt_handler->m_curr->pos.line);
4555 if(m_pending_anchors.annotations[0].line != m_evt_handler->m_curr->pos.line)
4556 {
4557 _c4dbgp("annotations_after_start_mapblck_as_key: is map anchor");
4558 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4559 _clear_annotations(&m_pending_anchors);
4560 }
4561 break;
4562 case 2u:
4563 _c4dbgpf("annotations_after_start_mapblck_as_key: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4564 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4565 break;
4566 }
4567}
4568
4569template<class EventHandler>
4570void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(size_t key_indentation, size_t key_line)
4571{
4572 _c4dbgp("annotations_after_start_mapblck");
4573 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2, m_evt_handler->m_curr->pos);
4574 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2, m_evt_handler->m_curr->pos);
4575 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
4576 {
4577 key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
4578 switch(m_pending_tags.num_entries)
4579 {
4580 case 1u:
4581 _c4dbgpf("annotations_after_start_mapblck: 1 tag: {}", _prs(m_pending_tags.annotations[0].str));
4582 m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
4583 _clear_annotations(&m_pending_tags);
4584 break;
4585 case 2u:
4586 _c4dbgpf("annotations_after_start_mapblck: 2 tags: {} -> {}", _prs(m_pending_tags.annotations[0].str), _prs(m_pending_tags.annotations[1].str));
4587 m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
4588 _clear_annotations(&m_pending_tags);
4589 break;
4590 }
4591 switch(m_pending_anchors.num_entries)
4592 {
4593 case 1u:
4594 _c4dbgpf("annotations_after_start_mapblck: 1 anchors: {} -> {}", m_pending_anchors.annotations[0].str);
4595 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
4596 _clear_annotations(&m_pending_anchors);
4597 break;
4598 case 2u:
4599 _c4dbgpf("annotations_after_start_mapblck: 2 anchors: {} -> {}", m_pending_anchors.annotations[0].str, m_pending_anchors.annotations[1].str);
4600 m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
4601 _clear_annotations(&m_pending_anchors);
4602 break;
4603 }
4604 }
4605 _set_indentation(key_indentation);
4606}
4607
4608template<class EventHandler>
4609size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(size_t val_indentation, size_t val_line)
4610{
4611 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries | m_pending_anchors.num_entries, m_evt_handler->m_curr->pos);
4612 // select the left-most annotation on the max line
4613 auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
4614 for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
4615 {
4616 auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
4617 if(ann.line > curr->line)
4618 curr = &ann;
4619 else if(ann.indentation < curr->indentation)
4620 curr = &ann;
4621 }
4622 for(size_t j = 0; j < m_pending_tags.num_entries; ++j)
4623 {
4624 auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
4625 if(ann.line > curr->line)
4626 curr = &ann;
4627 else if(ann.indentation < curr->indentation)
4628 curr = &ann;
4629 }
4630 return curr->line < val_line ? val_indentation : curr->indentation;
4631}
4632
4633template<class EventHandler>
4634void ParseEngine<EventHandler>::_handle_keyref(csubstr alias)
4635{
4636 if(C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries)))
4637 m_evt_handler->set_key_ref(alias);
4638 else
4639 _c4err("aliases cannot have anchors or tags");
4640}
4641
4642template<class EventHandler>
4643void ParseEngine<EventHandler>::_handle_valref(csubstr alias)
4644{
4645 if(C4_LIKELY(!(m_pending_anchors.num_entries | m_pending_tags.num_entries)))
4646 m_evt_handler->set_val_ref(alias);
4647 else
4648 _c4err("aliases cannot have anchors or tags");
4649}
4650
4651template<class EventHandler>
4652csubstr ParseEngine<EventHandler>::_resolve_tag(csubstr tag)
4653{
4654 _c4dbgpf("resolving tag: {} curr_doc={}", _prs(tag), m_evt_handler->m_curr_doc);
4655 _c4assert(tag.is_sub(_buf()));
4656 TagCache::LookupResult ret = m_evt_handler->tag_cache().find(tag, m_evt_handler->m_curr_doc);
4657 if(ret)
4658 {
4659 _c4dbgpf("resolving tag: found in cache[{}]: {}", ret.pos, _prs(ret.resolved));
4660 return ret.resolved;
4661 }
4662 _c4dbgpf("resolving tag: not in cache: {} curr_doc={}", _prs(tag), m_evt_handler->m_curr_doc);
4663 size_t bufsz = 0;
4664 substr buf = m_evt_handler->arena_rem();
4665 TagDirectives const& C4_RESTRICT tds = m_evt_handler->tag_directives();
4666 csubstr ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4667 m_evt_handler->m_curr->pos,
4668 m_evt_handler->m_stack.m_callbacks);
4669 _c4dbgpf("resolving tag: bufsz={} ttag.len={} !!ttag.str={}", bufsz, ttag.len, !!ttag.str);
4670 _c4assert((bufsz > buf.len) == (!ttag.str));
4671 _c4assert(!!bufsz == (ttag.len == bufsz));
4672 // try again if the arena size was not enough
4673 if(!ttag.str)
4674 {
4675 _c4dbgpf("tag requires arena, but it was small. arena.len={} arena.slack={} tag.required={}", m_evt_handler->arena_rem().len, m_evt_handler->arena().len, ttag.len);
4676 _c4assert(ttag.len == bufsz);
4677 buf = _alloc_arena(bufsz, &tag);
4678 if(buf.str) // the alloc may fail eg with the ints handler
4679 {
4680 ttag = tds.resolve(buf, &bufsz, tag, m_evt_handler->m_curr_doc,
4681 m_evt_handler->m_curr->pos,
4682 m_evt_handler->m_stack.m_callbacks);
4683 }
4684 _c4assert(ttag.len == bufsz);
4685 _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4686 }
4687 else if(bufsz) // if we succeeded writing into the arena, grow it as needed
4688 {
4689 _c4dbgp("tag required arena. update size");
4690 _c4assert(ttag.len == bufsz);
4691 _c4assert(ttag.is_sub(buf));
4692 (void)_alloc_arena(bufsz);
4693 }
4694 C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4127) // conditional expression is constant
4695 if C4_IF_CONSTEXPR (EventHandler::requires_strings_on_buffers) // NOLINT
4696 {
4697 _c4dbgpf("handler requires tags in buffers. !!ttag.str={} in_arena={} in_src={}", !!ttag.str, ttag.is_sub(m_evt_handler->arena()), ttag.is_sub(_buf()));
4698 // is the resolved tag not in any of those buffers?
4699 if(ttag.str && !ttag.is_sub(m_evt_handler->arena()) && !ttag.is_sub(_buf()))
4700 {
4701 _c4dbgpf("copying resolved tag to arena: slack={} required={}", m_evt_handler->arena_rem().len, ttag.len);
4702 buf = _alloc_arena(ttag.len, &tag);
4703 if(buf.str) // the alloc may fail eg with the ints handler
4704 memcpy(buf.str, ttag.str, ttag.len);
4705 ttag.str = buf.str; // keep the current len!
4706 _c4assert(!ttag.str || ttag.is_sub(m_evt_handler->arena()));
4707 }
4708 }
4709 C4_SUPPRESS_WARNING_MSVC_POP
4710 _c4dbgpf("resolved tag: {} --> [{}]~~~{}~~~", _prs(tag), ttag.len, _maybe_null_str(ttag));
4711 _c4assert(ttag.len > 0);
4712 // cache the hard-earned result!
4713 m_evt_handler->tag_cache().add(tag, ttag, m_evt_handler->m_curr_doc, ret.pos);
4714 return ttag;
4715}
4716
4717template<class EventHandler>
4718bool ParseEngine<EventHandler>::_validate_directive_yaml(csubstr *C4_RESTRICT directive, csubstr *C4_RESTRICT version) const
4719{
4720 _c4assert(directive->begins_with("%YAML"));
4721 size_t version_start = directive->first_not_of(" \t", 5);
4722 if(version_start != npos)
4723 {
4724 csubstr digits = "0123456789";
4725 size_t major_end = directive->first_not_of(digits, version_start);
4726 if(major_end != npos && directive->str[major_end] == '.') // single dot
4727 {
4728 size_t minor_end = directive->first_not_of(digits, major_end + 1);
4729 if(minor_end == npos)
4730 minor_end = directive->len;
4731 _set_first_strict(*directive, minor_end);
4732 *version = directive->range(version_start, minor_end);
4733 _c4dbgpf("%YAML: version={} full={}", *version, _prs(*directive, true));
4734 return true;
4735 }
4736 }
4737 return false;
4738}
4739
4740template<class EventHandler>
4741bool ParseEngine<EventHandler>::_validate_directive_tag(csubstr *C4_RESTRICT directive, csubstr *C4_RESTRICT handle, csubstr *C4_RESTRICT prefix) const
4742{
4743 _c4assert(directive->begins_with("%TAG"));
4744 csubstr whitespace = " \t";
4745 size_t handle_start = directive->first_not_of(whitespace, 4);
4746 if(handle_start != npos && directive->str[handle_start] == '!')
4747 {
4748 size_t handle_end = directive->first_of(whitespace, handle_start);
4749 if(handle_end != npos)
4750 {
4751 size_t prefix_start = directive->first_not_of(whitespace, handle_end);
4752 if(prefix_start != npos)
4753 {
4754 size_t prefix_end = directive->first_of(whitespace, prefix_start);
4755 if(prefix_end == npos)
4756 prefix_end = directive->len;
4757 _set_first_strict(*directive, prefix_end);
4758 *handle = directive->range(handle_start, handle_end);
4759 *prefix = directive->range(prefix_start, prefix_end);
4760 _c4dbgpf("%TAG: handle={} prefix={} full={}", *handle, *prefix, _prs(*directive, true));
4761 if(is_valid_tag_handle(*handle))
4762 return true;
4763 }
4764 }
4765 }
4766 return false;
4767}
4768
4769template<class EventHandler>
4770void ParseEngine<EventHandler>::_handle_directive(csubstr directive)
4771{
4772 _c4dbgpf("handle_directive: rem={}", _prs(directive, true));
4773 _c4assert(m_evt_handler->m_curr->line_contents.rem.begins_with('%'));
4774 _c4assert(directive.str == m_evt_handler->m_curr->line_contents.rem.str);
4775 const char *err = nullptr;
4776 csubstr rem;
4777 size_t pos;
4778 auto isdirective = [](csubstr str, csubstr dir) {
4779 if(str.begins_with(dir))
4780 {
4781 csubstr rest = str.sub(dir.len);
4782 return (!rest.len || rest.str[0] == ' ' || rest.str[0] == '\t');
4783 }
4784 return false;
4785 };
4786 if(isdirective(directive, "%TAG"))
4787 {
4788 csubstr handle;
4789 csubstr prefix;
4790 if(C4_UNLIKELY(!_validate_directive_tag(&directive, &handle, &prefix)))
4791 {
4792 err = "invalid %TAG directive";
4793 goto directive_error; // NOLINT
4794 }
4795 m_evt_handler->add_directive_tag(handle, prefix);
4796 }
4797 else if(isdirective(directive, "%YAML"))
4798 {
4800 if(C4_UNLIKELY(!_validate_directive_yaml(&directive, &version)))
4801 {
4802 err = "invalid %YAML directive";
4803 goto directive_error; // NOLINT
4804 }
4805 if(C4_UNLIKELY(m_has_directives_yaml))
4806 {
4807 err = "multiple %YAML directives";
4808 goto directive_error; // NOLINT
4809 }
4810 m_has_directives_yaml = true;
4811 m_evt_handler->add_directive_yaml(version);
4812 }
4813 m_has_directives = true;
4814 rem = m_evt_handler->m_curr->line_contents.rem;
4815 pos = rem.first_not_of(" \t", directive.len);
4816 pos = pos != npos ? pos : rem.len;
4817 _line_progressed(pos);
4818 rem = rem.sub(pos);
4819 _c4dbgpf("handle_directive: rest={}", _prs(rem));
4820 if(C4_UNLIKELY(rem.len && !rem.begins_with('#')))
4821 {
4822 err = "invalid tokens after directive";
4823 goto directive_error; // NOLINT
4824 }
4825directive_error:
4826 if(C4_UNLIKELY(err != nullptr))
4827 _c4err(err);
4828}
4829
4830template<class EventHandler>
4831bool ParseEngine<EventHandler>::_handle_bom()
4832{
4833 const csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4834 if(rem.len)
4835 {
4836 const csubstr rest = rem.sub(1);
4837 // https://yaml.org/spec/1.2.2/#52-character-encodings
4838 #define _rymlisascii(c) ((c) > '\0' && (c) <= '\x7f') // is the character ASCII?
4839 if(rem.begins_with(csubstr{"\x00\x00\xfe\xff", 4}) || (rem.begins_with(csubstr{"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[3])))
4840 {
4841 _c4dbgp("byte order mark: UTF32BE");
4842 _handle_bom(UTF32BE);
4843 _line_progressed(4);
4844 m_bom_len = 4;
4845 return true;
4846 }
4847 else if(rem.begins_with(csubstr{"\xff\xfe\x00\x00", 4}) || (rest.begins_with(csubstr{"\x00\x00\x00", 3}) && rem.len >= 4u && _rymlisascii(rem.str[0])))
4848 {
4849 _c4dbgp("byte order mark: UTF32LE");
4850 _handle_bom(UTF32LE);
4851 _line_progressed(4);
4852 m_bom_len = 4;
4853 return true;
4854 }
4855 else if(rem.begins_with("\xfe\xff") || (rem.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[1])))
4856 {
4857 _c4dbgp("byte order mark: UTF16BE");
4858 _handle_bom(UTF16BE);
4859 _line_progressed(2);
4860 m_bom_len = 2;
4861 return true;
4862 }
4863 else if(rem.begins_with("\xff\xfe") || (rest.begins_with('\x00') && rem.len >= 2u && _rymlisascii(rem.str[0])))
4864 {
4865 _c4dbgp("byte order mark: UTF16LE");
4866 _handle_bom(UTF16LE);
4867 _line_progressed(2);
4868 m_bom_len = 2;
4869 return true;
4870 }
4871 else if(rem.begins_with("\xef\xbb\xbf"))
4872 {
4873 _c4dbgp("byte order mark: UTF8");
4874 _handle_bom(UTF8);
4875 _line_progressed(3);
4876 m_bom_len = 3;
4877 return true;
4878 }
4879 #undef _rymlisascii
4880 }
4881 return false;
4882}
4883
4884template<class EventHandler>
4885void ParseEngine<EventHandler>::_handle_bom(Encoding_e enc)
4886{
4887 if(m_encoding == NOBOM)
4888 {
4889 if(enc == UTF8 || /*beginning of file*/(m_evt_handler->m_curr->line_contents.rem.str == _buf().str))
4890 m_encoding = enc;
4891 else
4892 _c4err("non-UTF8 byte order mark can appear only at the beginning of the file");
4893 }
4894 else if(enc != m_encoding)
4895 {
4896 _c4err("byte order mark can only be set once");
4897 }
4898}
4899
4900
4901//-----------------------------------------------------------------------------
4902
4903template<class EventHandler>
4904void ParseEngine<EventHandler>::_handle_seq_json()
4905{
4906seqjson_start:
4907 _c4dbgpf("handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
4908
4909 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
4910 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ), m_evt_handler->m_curr->pos);
4911 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
4912 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT), m_evt_handler->m_curr->pos);
4913 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT), m_evt_handler->m_curr->pos);
4914
4915 _handle_flow_skip_whitespace();
4916 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
4917 if(!rem.len)
4918 goto seqjson_again;
4919
4920 if(has_any(RVAL))
4921 {
4922 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
4923 const char first = rem.str[0];
4924 _c4dbgpf("mapjson[RVAL]: '{}'", first);
4925 switch(first)
4926 {
4927 case '"':
4928 {
4929 _c4dbgp("seqjson[RVAL]: scanning double-quoted scalar");
4930 ScannedScalar sc = _scan_scalar_dquot();
4931 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
4932 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
4933 addrem_flags(RNXT, RVAL);
4934 break;
4935 }
4936 case '[':
4937 {
4938 _c4dbgp("seqjson[RVAL]: start child seqjson");
4939 addrem_flags(RNXT, RVAL);
4940 m_evt_handler->begin_seq_val_flow();
4941 addrem_flags(RVAL, RNXT);
4942 _line_progressed(1);
4943 break;
4944 }
4945 case '{':
4946 {
4947 _c4dbgp("seqjson[RVAL]: start child mapjson");
4948 addrem_flags(RNXT, RVAL);
4949 m_evt_handler->begin_map_val_flow();
4950 addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
4951 _line_progressed(1);
4952 goto seqjson_finish;
4953 }
4954 case ']': // this happens on a trailing comma like ", ]"
4955 {
4956 _c4dbgp("seqjson[RVAL]: end!");
4957 rem_flags(RSEQ);
4958 _end_seq_flow();
4959 _line_progressed(1);
4960 if(!has_all(RSEQ|RFLOW))
4961 goto seqjson_finish;
4962 break;
4963 }
4964 default:
4965 {
4966 ScannedScalar sc;
4967 if(_scan_scalar_seq_json(&sc))
4968 {
4969 _c4dbgp("seqjson[RVAL]: it's a plain scalar.");
4970 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
4971 m_evt_handler->set_val_scalar_plain(maybe_filtered);
4972 addrem_flags(RNXT, RVAL);
4973 }
4974 else
4975 {
4976 _c4err("parse error");
4977 }
4978 }
4979 }
4980 }
4981 else // RNXT
4982 {
4983 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
4984 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
4985 const char first = rem.str[0];
4986 _c4dbgpf("mapjson[RNXT]: '{}'", first);
4987 switch(first)
4988 {
4989 case ',':
4990 {
4991 _c4dbgp("seqjson[RNXT]: expect next val");
4992 addrem_flags(RVAL, RNXT);
4993 m_evt_handler->add_sibling();
4994 _line_progressed(1);
4995 break;
4996 }
4997 case ']':
4998 {
4999 _c4dbgp("seqjson[RNXT]: end!");
5000 _end_seq_flow();
5001 _line_progressed(1);
5002 goto seqjson_finish;
5003 }
5004 default:
5005 _c4err("parse error");
5006 }
5007 }
5008
5009 seqjson_again:
5010 _c4dbgt("seqjson: go again", 0);
5011 if(_finished_line())
5012 {
5013 if(C4_LIKELY(!_finished_file()))
5014 {
5015 _line_ended();
5016 _scan_line();
5018 }
5019 else
5020 {
5021 _c4err("missing terminating ]");
5022 }
5023 }
5024 goto seqjson_start;
5025
5026 seqjson_finish:
5027 _c4dbgp("seqjson: finish");
5028}
5029
5030
5031//-----------------------------------------------------------------------------
5032
5033template<class EventHandler>
5034void ParseEngine<EventHandler>::_handle_map_json()
5035{
5036mapjson_start:
5037 _c4dbgpf("handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5038
5039 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
5040 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
5041 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5042 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT), m_evt_handler->m_curr->pos);
5043 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT)), m_evt_handler->m_curr->pos);
5044
5045 _handle_flow_skip_whitespace();
5046 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5047 if(!rem.len)
5048 goto mapjson_again;
5049
5050 if(has_any(RKEY))
5051 {
5052 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5053 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5054 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5055 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5056 const char first = rem.str[0];
5057 _c4dbgpf("mapjson[RKEY]: '{}'", first);
5058 switch(first)
5059 {
5060 case '"':
5061 {
5062 _c4dbgp("mapjson[RKEY]: scanning double-quoted scalar");
5063 ScannedScalar sc = _scan_scalar_dquot();
5064 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5065 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5066 addrem_flags(RKCL, RKEY);
5067 break;
5068 }
5069 case '}': // this happens on a trailing comma like ", }"
5070 {
5071 _c4dbgp("mapjson[RKEY]: end!");
5072 _end_map_flow();
5073 _line_progressed(1);
5074 goto mapjson_finish;
5075 }
5076 default:
5077 _c4err("parse error");
5078 }
5079 }
5080 else if(has_any(RVAL))
5081 {
5082 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5083 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5084 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5085 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5086 const char first = rem.str[0];
5087 _c4dbgpf("mapjson[RVAL]: '{}'", first);
5088 switch(first)
5089 {
5090 case '"':
5091 {
5092 _c4dbgp("mapjson[RVAL]: scanning double-quoted scalar");
5093 ScannedScalar sc = _scan_scalar_dquot();
5094 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5095 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5096 addrem_flags(RNXT, RVAL);
5097 break;
5098 }
5099 case '[':
5100 {
5101 _c4dbgp("mapjson[RVAL]: start val seqjson");
5102 addrem_flags(RNXT, RVAL);
5103 m_evt_handler->begin_seq_val_flow();
5104 _set_indentation(m_evt_handler->m_parent->indref);
5105 addrem_flags(RSEQ|RVAL, RMAP|RNXT);
5106 _line_progressed(1);
5107 goto mapjson_finish;
5108 }
5109 case '{':
5110 {
5111 _c4dbgp("mapjson[RVAL]: start val mapjson");
5112 addrem_flags(RNXT, RVAL);
5113 m_evt_handler->begin_map_val_flow();
5114 _set_indentation(m_evt_handler->m_parent->indref);
5115 addrem_flags(RKEY, RNXT);
5116 _line_progressed(1);
5117 // keep going in this function
5118 break;
5119 }
5120 default:
5121 {
5122 ScannedScalar sc;
5123 if(_scan_scalar_map_json(&sc))
5124 {
5125 _c4dbgp("mapjson[RVAL]: plain scalar.");
5126 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5127 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5128 addrem_flags(RNXT, RVAL);
5129 }
5130 else
5131 {
5132 _c4err("parse error");
5133 }
5134 break;
5135 }
5136 }
5137 }
5138 else if(has_any(RKCL)) // read the key colon
5139 {
5140 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5141 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5142 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5143 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5144 const char first = rem.str[0];
5145 _c4dbgpf("mapjson[RKCL]: '{}'", first);
5146 if(first == ':')
5147 {
5148 _c4dbgp("mapjson[RKCL]: found the colon");
5149 addrem_flags(RVAL, RKCL);
5150 _line_progressed(1);
5151 }
5152 else
5153 {
5154 _c4err("parse error");
5155 }
5156 }
5157 else if(has_any(RNXT))
5158 {
5159 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5160 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5161 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5162 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5163 _c4dbgpf("mapjson[RNXT]: '{}'", rem.str[0]);
5164 if(rem.begins_with(','))
5165 {
5166 _c4dbgp("mapjson[RNXT]: expect next keyval");
5167 m_evt_handler->add_sibling();
5168 addrem_flags(RKEY, RNXT);
5169 _line_progressed(1);
5170 }
5171 else if(rem.begins_with('}'))
5172 {
5173 _c4dbgp("mapjson[RNXT]: end!");
5174 _end_map_flow();
5175 _line_progressed(1);
5176 goto mapjson_finish;
5177 }
5178 else
5179 {
5180 _c4err("parse error"); // LCOV_EXCL_LINE
5181 }
5182 }
5183
5184 mapjson_again:
5185 _c4dbgt("mapjson: go again", 0);
5186 if(_finished_line())
5187 {
5188 if(C4_LIKELY(!_finished_file()))
5189 {
5190 _line_ended();
5191 _scan_line();
5193 }
5194 else
5195 {
5196 _c4err("missing terminating }");
5197 }
5198 }
5199 goto mapjson_start;
5200
5201 mapjson_finish:
5202 _c4dbgp("mapjson: finish");
5203}
5204
5205
5206//-----------------------------------------------------------------------------
5207
5208template<class EventHandler>
5209void ParseEngine<EventHandler>::_handle_seq_imap()
5210{
5211seqimap_start:
5212 _c4dbgpf("handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5213
5214 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQIMAP), m_evt_handler->m_curr->pos);
5215 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5216 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT|QMRK|RKCL), m_evt_handler->m_curr->pos);
5217 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == has_all(RVAL) + has_all(RNXT) + has_all(QMRK) + has_all(RKCL), m_evt_handler->m_curr->pos);
5218 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3, m_evt_handler->m_curr->pos);
5219
5220 _handle_flow_skip_whitespace();
5221 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
5222 if(!rem.len)
5223 goto seqimap_again;
5224
5225 if(has_any(RVAL))
5226 {
5227 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL), m_evt_handler->m_curr->pos);
5228 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5229 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5230 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5231 const char first = rem.str[0];
5232 _c4dbgpf("seqimap[RVAL]: '{}'", _c4prc(first));
5233 ScannedScalar sc;
5234 if(first == '\'')
5235 {
5236 _c4dbgp("seqimap[RVAL]: scanning single-quoted scalar");
5237 sc = _scan_scalar_squot();
5238 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5239 _handle_annotations_before_blck_val_scalar();
5240 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5241 _end_map_flow();
5242 goto seqimap_finish;
5243 }
5244 else if(first == '"')
5245 {
5246 _c4dbgp("seqimap[RVAL]: scanning double-quoted scalar");
5247 sc = _scan_scalar_dquot();
5248 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5249 _handle_annotations_before_blck_val_scalar();
5250 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5251 _end_map_flow();
5252 goto seqimap_finish;
5253 }
5254 // block scalars (ie | and >) cannot appear in flow containers
5255 else if(_scan_scalar_plain_map_flow(&sc))
5256 {
5257 _c4dbgp("seqimap[RVAL]: it's a scalar.");
5258 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5259 _handle_annotations_before_blck_val_scalar();
5260 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5261 _end_map_flow();
5262 goto seqimap_finish;
5263 }
5264 else if(first == '[')
5265 {
5266 _c4dbgp("seqimap[RVAL]: start child seqflow");
5267 addrem_flags(RNXT, RVAL);
5268 _handle_annotations_before_blck_val_scalar();
5269 m_evt_handler->begin_seq_val_flow();
5270 addrem_flags(RVAL, RNXT|RSEQIMAP);
5271 _set_indentation(m_evt_handler->m_parent->indref);
5272 _line_progressed(1);
5273 goto seqimap_finish;
5274 }
5275 else if(first == '{')
5276 {
5277 _c4dbgp("seqimap[RVAL]: start child mapflow");
5278 addrem_flags(RNXT, RVAL);
5279 _handle_annotations_before_blck_val_scalar();
5280 m_evt_handler->begin_map_val_flow();
5281 addrem_flags(RMAP|RKEY, RSEQ|RVAL|RSEQIMAP|RNXT);
5282 _set_indentation(m_evt_handler->m_parent->indref);
5283 _line_progressed(1);
5284 goto seqimap_finish;
5285 }
5286 else if(first == ',' || first == ']')
5287 {
5288 _c4dbgp("seqimap[RVAL]: finish without val.");
5289 _handle_annotations_before_blck_val_scalar();
5290 m_evt_handler->set_val_scalar_plain_empty();
5291 _end_map_flow();
5292 goto seqimap_finish;
5293 }
5294 else if(first == '*')
5295 {
5296 csubstr ref = _scan_ref_seq();
5297 _c4dbgpf("seqimap[RVAL]: ref! {}", _prs(ref));
5298 _handle_valref(ref);
5299 addrem_flags(RNXT, RVAL);
5300 }
5301 else if(first == '&')
5302 {
5303 csubstr anchor = _scan_anchor();
5304 _c4dbgpf("seqimap[RVAL]: anchor! {}", _prs(anchor));
5305 _add_annotation(&m_pending_anchors, anchor);
5306 }
5307 else if(first == '!')
5308 {
5309 csubstr tag = _scan_tag();
5310 _c4dbgpf("seqimap[RVAL]: tag! {}", _prs(tag));
5311 _add_annotation(&m_pending_tags, tag);
5312 }
5313 else
5314 {
5315 _c4err("parse error"); // LCOV_EXCL_LINE
5316 }
5317 }
5318 else if(has_any(RNXT))
5319 {
5320 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
5321 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5322 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5323 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5324 const char first = rem.str[0];
5325 _c4dbgpf("seqimap[RNXT]: '{}'", _c4prc(first));
5326 if(first == ',' || first == ']')
5327 {
5328 // we may get here because a map or a seq started and we
5329 // return later
5330 _c4dbgp("seqimap: done");
5331 _end_map_flow();
5332 goto seqimap_finish;
5333 }
5334 else
5335 {
5336 _c4err("parse error"); // LCOV_EXCL_LINE
5337 }
5338 }
5339 else if(has_any(QMRK))
5340 {
5341 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(QMRK), m_evt_handler->m_curr->pos);
5342 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5343 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5344 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5345 const char first = rem.str[0];
5346 _c4dbgpf("seqimap[QMRK]: '{}'", _c4prc(first));
5347 ScannedScalar sc;
5348 if(first == '\'')
5349 {
5350 _c4dbgp("seqimap[QMRK]: scanning single-quoted scalar");
5351 sc = _scan_scalar_squot();
5352 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5353 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5354 addrem_flags(RKCL, QMRK);
5355 goto seqimap_again;
5356 }
5357 else if(first == '"')
5358 {
5359 _c4dbgp("seqimap[QMRK]: scanning double-quoted scalar");
5360 sc = _scan_scalar_dquot();
5361 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5362 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5363 addrem_flags(RKCL, QMRK);
5364 goto seqimap_again;
5365 }
5366 // block scalars (ie | and >) cannot appear in flow containers
5367 else if(_scan_scalar_plain_map_flow(&sc))
5368 {
5369 _c4dbgp("seqimap[QMRK]: it's a scalar.");
5370 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5371 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5372 addrem_flags(RKCL, QMRK);
5373 goto seqimap_again;
5374 }
5375 else if(first == '[')
5376 {
5377 _c4dbgp("seqimap[QMRK]: start child seqflow");
5378 addrem_flags(RKCL, QMRK);
5379 m_evt_handler->begin_seq_key_flow();
5380 addrem_flags(RSEQ|RVAL, RKCL|RSEQIMAP);
5381 _set_indentation(m_evt_handler->m_parent->indref);
5382 _line_progressed(1);
5383 goto seqimap_finish;
5384 }
5385 else if(first == '{')
5386 {
5387 _c4dbgp("seqimap[QMRK]: start child mapflow");
5388 addrem_flags(RKCL, QMRK);
5389 m_evt_handler->begin_map_key_flow();
5390 addrem_flags(RMAP|RKEY, RSEQ|RKCL|RSEQIMAP);
5391 _set_indentation(m_evt_handler->m_parent->indref);
5392 _line_progressed(1);
5393 goto seqimap_finish;
5394 }
5395 else if(first == ',' || first == ']')
5396 {
5397 _c4dbgp("seqimap[QMRK]: finish without key.");
5398 m_evt_handler->set_key_scalar_plain_empty();
5399 m_evt_handler->set_val_scalar_plain_empty();
5400 _end_map_flow();
5401 goto seqimap_finish;
5402 }
5403 else if(first == '&')
5404 {
5405 csubstr anchor = _scan_anchor();
5406 _c4dbgp("seqimap[QMRK]: anchor!");
5407 m_evt_handler->set_key_anchor(anchor);
5408 }
5409 else if(first == '*')
5410 {
5411 csubstr ref = _scan_ref_seq();
5412 _c4dbgp("seqimap[QMRK]: ref!");
5413 _handle_keyref(ref);
5414 addrem_flags(RKCL, QMRK);
5415 }
5416 else
5417 {
5418 _c4err("parse error"); // LCOV_EXCL_LINE
5419 }
5420 }
5421 else if(has_any(RKCL))
5422 {
5423 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5424 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5425 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5426 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKCL), m_evt_handler->m_curr->pos);
5427 const char first = rem.str[0];
5428 _c4dbgpf("seqimap[RKCL]: '{}'", _c4prc(first));
5429 if(first == ':')
5430 {
5431 _c4dbgp("seqimap[RKCL]: found ':'");
5432 addrem_flags(RVAL, RKCL);
5433 _line_progressed(1);
5434 goto seqimap_again;
5435 }
5436 else if(first == ',' || first == ']')
5437 {
5438 _c4dbgp("seqimap[RKCL]: found ','. finish without val");
5439 m_evt_handler->set_val_scalar_plain_empty();
5440 _end_map_flow();
5441 goto seqimap_finish;
5442 }
5443 else
5444 {
5445 _c4err("parse error"); // LCOV_EXCL_LINE
5446 }
5447 }
5448
5449 seqimap_again:
5450 _c4dbgt("seqimap: go again", 0);
5451 if(_finished_line())
5452 {
5453 if(C4_LIKELY(!_finished_file()))
5454 {
5455 _line_ended();
5456 _scan_line();
5458 }
5459 else
5460 {
5461 _c4err("parse error");
5462 }
5463 }
5464 goto seqimap_start;
5465
5466 seqimap_finish:
5467 _c4dbgp("seqimap: finish");
5468}
5469
5470
5471//-----------------------------------------------------------------------------
5472
5473template<class EventHandler>
5474void ParseEngine<EventHandler>::_handle_seq_flow()
5475{
5476seqflow_start:
5477 _c4dbgpf("handle_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5478
5479 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5480 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ), m_evt_handler->m_curr->pos);
5481 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
5482 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT), m_evt_handler->m_curr->pos);
5483 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT), m_evt_handler->m_curr->pos);
5484 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
5485
5486 if(m_evt_handler->m_curr->at_line_beginning())
5487 {
5488 _handle_flow_line_beginning();
5489 }
5490
5491 _handle_flow_skip_whitespace();
5492 if(!m_evt_handler->m_curr->line_contents.rem.len)
5493 goto seqflow_again;
5494
5495 if(has_any(RVAL))
5496 {
5497 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5498 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5499 ScannedScalar sc;
5500 if(first == '\'')
5501 {
5502 _c4dbgp("seqflow[RVAL]: scanning single-quoted scalar");
5503 sc = _scan_scalar_squot();
5504 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5505 _handle_annotations_before_blck_val_scalar();
5506 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5507 addrem_flags(RNXT, RVAL);
5508 _mark_seqflow_val_end();
5509 }
5510 else if(first == '"')
5511 {
5512 _c4dbgp("seqflow[RVAL]: scanning double-quoted scalar");
5513 sc = _scan_scalar_dquot();
5514 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5515 _handle_annotations_before_blck_val_scalar();
5516 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5517 addrem_flags(RNXT, RVAL);
5518 _mark_seqflow_val_end();
5519 }
5520 // block scalars (ie | and >) cannot appear in flow containers
5521 else if(_scan_scalar_plain_seq_flow(&sc))
5522 {
5523 _c4dbgp("seqflow[RVAL]: it's a scalar.");
5524 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5525 _handle_annotations_before_blck_val_scalar();
5526 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5527 addrem_flags(RNXT, RVAL);
5528 _mark_seqflow_val_end();
5529 }
5530 else if(first == '[')
5531 {
5532 _c4dbgp("seqflow[RVAL]: start child seqflow");
5533 addrem_flags(RNXT, RVAL);
5534 _handle_annotations_before_blck_val_scalar();
5535 m_evt_handler->begin_seq_val_flow();
5536 _set_indentation(m_evt_handler->m_parent->indref);
5537 addrem_flags(RVAL, RNXT);
5538 _line_progressed(1);
5539 }
5540 else if(first == '{')
5541 {
5542 _c4dbgp("seqflow[RVAL]: start child mapflow");
5543 addrem_flags(RNXT, RVAL);
5544 _handle_annotations_before_blck_val_scalar();
5545 m_evt_handler->begin_map_val_flow();
5546 _set_indentation(m_evt_handler->m_parent->indref);
5547 addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
5548 _line_progressed(1);
5549 goto seqflow_finish;
5550 }
5551 else if(first == ']') // this happens on cases such as [] or [.., ]
5552 {
5553 _c4dbgp("seqflow[RVAL]: end!");
5554 if(m_pending_anchors.num_entries | m_pending_tags.num_entries)
5555 {
5556 _c4dbgp("seqflow[RVAL]: add pending annotations");
5557 _handle_annotations_before_blck_val_scalar();
5558 m_evt_handler->set_val_scalar_plain_empty();
5559 }
5560 _line_progressed(1);
5561 _end_seq_flow();
5562 goto seqflow_finish;
5563 }
5564 else if(first == '*')
5565 {
5566 csubstr ref = _scan_ref_seq();
5567 _c4dbgpf("seqflow[RVAL]: ref! {}", _prs(ref));
5568 _handle_valref(ref);
5569 addrem_flags(RNXT, RVAL);
5570 }
5571 else if(first == '&')
5572 {
5573 csubstr anchor = _scan_anchor();
5574 _c4dbgpf("seqflow[RVAL]: anchor! {}", _prs(anchor));
5575 _add_annotation(&m_pending_anchors, anchor);
5576 }
5577 else if(first == '!')
5578 {
5579 csubstr tag = _scan_tag();
5580 _c4dbgpf("seqflow[RVAL]: tag! {}", _prs(tag));
5581 _add_annotation(&m_pending_tags, tag);
5582 }
5583 else if(first == ':')
5584 {
5585 _c4dbgpf("seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
5586 addrem_flags(RNXT, RVAL);
5587 m_evt_handler->begin_map_val_flow();
5588 _set_indentation(m_evt_handler->m_parent->indref);
5589 _handle_annotations_before_blck_key_scalar();
5590 m_evt_handler->set_key_scalar_plain_empty();
5591 addrem_flags(RSEQIMAP|RVAL, RSEQ|RNXT);
5592 _line_progressed(1);
5593 goto seqflow_finish;
5594 }
5595 else if(first == '?')
5596 {
5597 _c4dbgp("seqflow[RVAL]: start child mapflow, explicit key");
5598 addrem_flags(RNXT, RVAL);
5599 m_evt_handler->begin_map_val_flow();
5600 _set_indentation(m_evt_handler->m_parent->indref);
5601 addrem_flags(RSEQIMAP|QMRK, RSEQ|RNXT);
5602 _line_progressed(1);
5603 _maybe_skip_whitespace_tokens();
5604 goto seqflow_finish;
5605 }
5606 else if(first == ',')
5607 {
5608 if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
5609 {
5610 _c4dbgp("seqflow[RVAL]: add pending annotations");
5611 _handle_annotations_before_blck_val_scalar();
5612 m_evt_handler->set_val_scalar_plain_empty();
5613 addrem_flags(RNXT, RVAL);
5614 _mark_seqflow_val_end();
5615 }
5616 else
5617 {
5618 _c4err("parse error");
5619 }
5620 }
5621 else
5622 {
5623 _c4err("parse error");
5624 }
5625 }
5626 else // RNXT
5627 {
5628 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
5629 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5630 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5631 if(first == ',')
5632 {
5633 _c4dbgp("seqflow[RNXT]: expect next val");
5634 addrem_flags(RVAL, RNXT);
5635 m_evt_handler->add_sibling();
5636 _line_progressed(1);
5637 if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
5638 {
5639 _c4err("parse error: invalid comment after comma");
5640 }
5641 _mark_seqflow_val_end();
5642 }
5643 else if(first == ']')
5644 {
5645 _c4dbgp("seqflow[RNXT]: end!");
5646 _line_progressed(1);
5647 _end_seq_flow();
5648 goto seqflow_finish;
5649 }
5650 else if(first == ':')
5651 {
5652 _c4dbgpf("seqflow[RNXT]: line@valend={} line@now={}", m_prev_val_end, m_evt_handler->m_curr->pos.line);
5653 if(m_prev_val_end != NONE && m_evt_handler->m_curr->pos.line == m_prev_val_end)
5654 {
5655 _c4dbgpf("seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
5656 m_evt_handler->actually_val_is_first_key_of_new_map_flow();
5657 _set_indentation(m_evt_handler->m_parent->indref);
5658 _line_progressed(1);
5659 addrem_flags(RSEQIMAP|RVAL, RNXT);
5660 goto seqflow_finish;
5661 }
5662 else
5663 {
5664 _c4err("parse error");
5665 }
5666 }
5667 else
5668 {
5669 _c4err("parse error");
5670 }
5671 }
5672
5673 seqflow_again:
5674 _c4dbgt("seqflow: go again", 0);
5675 if(_finished_line())
5676 {
5677 if(C4_LIKELY(!_finished_file()))
5678 {
5679 _line_ended();
5680 _scan_line();
5682 }
5683 else
5684 {
5685 _c4err("missing terminating ]");
5686 }
5687 }
5688 goto seqflow_start;
5689
5690 seqflow_finish:
5691 _c4dbgp("seqflow: finish");
5692}
5693
5694
5695//-----------------------------------------------------------------------------
5696
5697template<class EventHandler>
5698void ParseEngine<EventHandler>::_handle_map_flow()
5699{
5700mapflow_start:
5701 _c4dbgpf("handle_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
5702
5703 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
5704 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RFLOW), m_evt_handler->m_curr->pos);
5705 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK), m_evt_handler->m_curr->pos);
5706 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)), m_evt_handler->m_curr->pos);
5707
5708 if(m_evt_handler->m_curr->at_line_beginning())
5709 {
5710 _handle_flow_line_beginning();
5711 }
5712
5713 _handle_flow_skip_whitespace();
5714 if(!m_evt_handler->m_curr->line_contents.rem.len)
5715 goto mapflow_again;
5716
5717 if(has_any(RKEY))
5718 {
5719 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5720 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5721 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5722 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5723 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5724 _c4dbgpf("mapflow[RKEY]: '{}'", first);
5725 ScannedScalar sc;
5726 if(first == '\'')
5727 {
5728 _c4dbgp("mapflow[RKEY]: scanning single-quoted scalar");
5729 sc = _scan_scalar_squot();
5730 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
5731 _handle_annotations_before_blck_key_scalar();
5732 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
5733 addrem_flags(RKCL, RKEY|QMRK);
5734 }
5735 else if(first == '"')
5736 {
5737 _c4dbgp("mapflow[RKEY]: scanning double-quoted scalar");
5738 sc = _scan_scalar_dquot();
5739 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
5740 _handle_annotations_before_blck_key_scalar();
5741 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
5742 addrem_flags(RKCL, RKEY|QMRK);
5743 }
5744 // block scalars (ie | and >) cannot appear in flow containers
5745 else if(_scan_scalar_plain_map_flow(&sc))
5746 {
5747 _c4dbgp("mapflow[RKEY]: plain scalar");
5748 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
5749 _handle_annotations_before_blck_key_scalar();
5750 m_evt_handler->set_key_scalar_plain(maybe_filtered);
5751 addrem_flags(RKCL, RKEY|QMRK);
5752 }
5753 else if(first == '?')
5754 {
5755 _c4dbgp("mapflow[RKEY]: explicit key");
5756 _handle_annotations_before_blck_key_scalar();
5757 addrem_flags(QMRK, RKEY);
5758 _line_progressed(1);
5759 _maybe_skip_whitespace_tokens();
5760 }
5761 else if(first == ':')
5762 {
5763 _c4dbgp("mapflow[RKEY]: setting empty key");
5764 _handle_annotations_before_blck_key_scalar();
5765 m_evt_handler->set_key_scalar_plain_empty();
5766 addrem_flags(RVAL, RKEY|QMRK);
5767 _line_progressed(1);
5768 _maybe_skip_whitespace_tokens();
5769 }
5770 else if(first == ',')
5771 {
5772 _c4dbgp("mapflow[RKEY]: comma!");
5773 if(!_handle_annotations_before_unexpected_flow_token_rkey())
5774 _c4err("unexpected comma");
5775 addrem_flags(RNXT, RKEY|QMRK);
5776 // keep going in this function
5777 }
5778 else if(first == '}') // this happens on a trailing comma like ", }"
5779 {
5780 _c4dbgp("mapflow[RKEY]: end!");
5781 (void)_handle_annotations_before_unexpected_flow_token_rkey();
5782 _line_progressed(1);
5783 _end_map_flow();
5784 goto mapflow_finish;
5785 }
5786 else if(first == '&')
5787 {
5788 csubstr anchor = _scan_anchor();
5789 _c4dbgpf("mapflow[RKEY]: key anchor! {}", _prs(anchor));
5790 _add_annotation(&m_pending_anchors, anchor);
5791 }
5792 else if(first == '!')
5793 {
5794 csubstr tag = _scan_tag();
5795 _c4dbgpf("mapflow[RKEY]: tag! {}", _prs(tag));
5796 _add_annotation(&m_pending_tags, tag);
5797 }
5798 else if(first == '*')
5799 {
5800 csubstr ref = _scan_ref_map();
5801 _c4dbgpf("mapflow[RKEY]: key ref! {}", _prs(ref));
5802 _handle_keyref(ref);
5803 addrem_flags(RKCL, RKEY);
5804 }
5805 else if(first == '[')
5806 {
5807 // RYML's tree cannot store container keys, but that's
5808 // handled inside the tree event handler. Other handler
5809 // types may be able to handle it.
5810 _c4dbgp("mapflow[RKEY]: start child seqflow (!)");
5811 _handle_annotations_before_blck_key_scalar();
5812 addrem_flags(RKCL, RKEY);
5813 m_evt_handler->begin_seq_key_flow();
5814 addrem_flags(RSEQ|RVAL, RMAP|RKCL);
5815 _set_indentation(m_evt_handler->m_parent->indref);
5816 _line_progressed(1);
5817 goto mapflow_finish;
5818 }
5819 else if(first == '{')
5820 {
5821 // RYML's tree cannot store container keys, but that's
5822 // handled inside the tree event handler. Other handler
5823 // types may be able to handle it.
5824 _c4dbgp("mapflow[RKEY]: start child mapflow (!)");
5825 _handle_annotations_before_blck_key_scalar();
5826 addrem_flags(RKCL, RKEY);
5827 m_evt_handler->begin_map_key_flow();
5828 addrem_flags(RKEY, RVAL|RKCL);
5829 _set_indentation(m_evt_handler->m_parent->indref);
5830 _line_progressed(1);
5831 // keep going in this function
5832 }
5833 else
5834 {
5835 _c4err("parse error"); // LCOV_EXCL_LINE
5836 }
5837 }
5838 else if(has_any(RKCL)) // read the key colon
5839 {
5840 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5841 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5842 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5843 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5844 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5845 _c4dbgpf("mapflow[RKCL]: '{}'", first);
5846 if(first == ':')
5847 {
5848 _c4dbgp("mapflow[RKCL]: found the colon");
5849 addrem_flags(RVAL, RKCL);
5850 _line_progressed(1);
5851 }
5852 else if(first == '}')
5853 {
5854 _c4dbgp("mapflow[RKCL]: end with missing val!");
5855 addrem_flags(RVAL, RKCL);
5856 m_evt_handler->set_val_scalar_plain_empty();
5857 _line_progressed(1);
5858 _end_map_flow();
5859 goto mapflow_finish;
5860 }
5861 else if(first == ',')
5862 {
5863 _c4dbgp("mapflow[RKCL]: got comma. val is missing");
5864 m_evt_handler->set_val_scalar_plain_empty();
5865 m_evt_handler->add_sibling();
5866 addrem_flags(RKEY, RKCL);
5867 _line_progressed(1);
5868 if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
5869 {
5870 _c4err("parse error: invalid comment after comma");
5871 }
5872 }
5873 else
5874 {
5875 _c4err("parse error");
5876 }
5877 }
5878 else if(has_any(RVAL))
5879 {
5880 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5881 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5882 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
5883 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5884 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
5885 _c4dbgpf("mapflow[RVAL]: '{}'", first);
5886 ScannedScalar sc;
5887 if(first == '\'')
5888 {
5889 _c4dbgp("mapflow[RVAL]: scanning single-quoted scalar");
5890 sc = _scan_scalar_squot();
5891 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
5892 _handle_annotations_before_blck_val_scalar();
5893 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
5894 addrem_flags(RNXT, RVAL);
5895 }
5896 else if(first == '"')
5897 {
5898 _c4dbgp("mapflow[RVAL]: scanning double-quoted scalar");
5899 sc = _scan_scalar_dquot();
5900 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
5901 _handle_annotations_before_blck_val_scalar();
5902 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
5903 addrem_flags(RNXT, RVAL);
5904 }
5905 // block scalars (ie | and >) cannot appear in flow containers
5906 else if(_scan_scalar_plain_map_flow(&sc))
5907 {
5908 _c4dbgp("mapflow[RVAL]: plain scalar.");
5909 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
5910 _handle_annotations_before_blck_val_scalar();
5911 m_evt_handler->set_val_scalar_plain(maybe_filtered);
5912 addrem_flags(RNXT, RVAL);
5913 }
5914 else if(first == '[')
5915 {
5916 _c4dbgp("mapflow[RVAL]: start val seqflow");
5917 addrem_flags(RNXT, RVAL);
5918 _handle_annotations_before_blck_val_scalar();
5919 m_evt_handler->begin_seq_val_flow();
5920 _set_indentation(m_evt_handler->m_parent->indref);
5921 addrem_flags(RSEQ|RVAL, RMAP|RNXT);
5922 _line_progressed(1);
5923 goto mapflow_finish;
5924 }
5925 else if(first == '{')
5926 {
5927 _c4dbgp("mapflow[RVAL]: start val mapflow");
5928 addrem_flags(RNXT, RVAL);
5929 _handle_annotations_before_blck_val_scalar();
5930 m_evt_handler->begin_map_val_flow();
5931 _set_indentation(m_evt_handler->m_parent->indref);
5932 addrem_flags(RKEY, RNXT);
5933 _line_progressed(1);
5934 // keep going in this function
5935 }
5936 else if(first == '}')
5937 {
5938 _c4dbgp("mapflow[RVAL]: end!");
5939 _handle_annotations_before_blck_val_scalar();
5940 m_evt_handler->set_val_scalar_plain_empty();
5941 _line_progressed(1);
5942 _end_map_flow();
5943 goto mapflow_finish;
5944 }
5945 else if(first == ',')
5946 {
5947 _c4dbgp("mapflow[RVAL]: empty val!");
5948 _handle_annotations_before_blck_val_scalar();
5949 m_evt_handler->set_val_scalar_plain_empty();
5950 addrem_flags(RNXT, RVAL);
5951 // keep going in this function
5952 }
5953 else if(first == '*')
5954 {
5955 csubstr ref = _scan_ref_map();
5956 _c4dbgpf("mapflow[RVAL]: key ref! {}", _prs(ref));
5957 _handle_valref(ref);
5958 addrem_flags(RNXT, RVAL);
5959 }
5960 else if(first == '&')
5961 {
5962 csubstr anchor = _scan_anchor();
5963 _c4dbgpf("mapflow[RVAL]: key anchor! {}", _prs(anchor));
5964 _add_annotation(&m_pending_anchors, anchor);
5965 }
5966 else if(first == '!')
5967 {
5968 csubstr tag = _scan_tag();
5969 _c4dbgpf("mapflow[RVAL]: tag! {}", _prs(tag));
5970 _add_annotation(&m_pending_tags, tag);
5971 }
5972 else
5973 {
5974 _c4err("parse error");
5975 }
5976 }
5977 else if(has_any(RNXT))
5978 {
5979 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
5980 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
5981 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
5982 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
5983 _c4dbgpf("mapflow[RNXT]: '{}'", m_evt_handler->m_curr->line_contents.rem.str[0]);
5984 if(m_evt_handler->m_curr->line_contents.rem.begins_with(','))
5985 {
5986 _c4dbgp("mapflow[RNXT]: expect next keyval");
5987 m_evt_handler->add_sibling();
5988 addrem_flags(RKEY, RNXT);
5989 _line_progressed(1);
5990 if(m_evt_handler->m_curr->line_contents.rem.begins_with('#'))
5991 {
5992 _c4err("parse error: invalid comment after comma");
5993 }
5994 }
5995 else if(m_evt_handler->m_curr->line_contents.rem.begins_with('}'))
5996 {
5997 _c4dbgp("mapflow[RNXT]: end!");
5998 _line_progressed(1);
5999 _end_map_flow();
6000 goto mapflow_finish;
6001 }
6002 else
6003 {
6004 _c4err("parse error");
6005 }
6006 }
6007 else if(has_any(QMRK))
6008 {
6009 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
6010 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
6011 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
6012 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
6013 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6014 _c4dbgpf("mapflow[QMRK]: '{}'", first);
6015 ScannedScalar sc;
6016 if(first == '\'')
6017 {
6018 _c4dbgp("mapflow[QMRK]: scanning single-quoted scalar");
6019 sc = _scan_scalar_squot();
6020 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
6021 _handle_annotations_before_blck_key_scalar();
6022 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6023 addrem_flags(RKCL, QMRK);
6024 }
6025 else if(first == '"')
6026 {
6027 _c4dbgp("mapflow[QMRK]: scanning double-quoted scalar");
6028 sc = _scan_scalar_dquot();
6029 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
6030 _handle_annotations_before_blck_key_scalar();
6031 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6032 addrem_flags(RKCL, QMRK);
6033 }
6034 // block scalars (ie | and >) cannot appear in flow containers
6035 else if(_scan_scalar_plain_map_flow(&sc))
6036 {
6037 _c4dbgp("mapflow[QMRK]: plain scalar");
6038 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
6039 _handle_annotations_before_blck_key_scalar();
6040 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6041 addrem_flags(RKCL, QMRK);
6042 }
6043 else if(first == ':')
6044 {
6045 _c4dbgp("mapflow[QMRK]: setting empty key");
6046 _handle_annotations_before_blck_key_scalar();
6047 m_evt_handler->set_key_scalar_plain_empty();
6048 addrem_flags(RVAL, QMRK);
6049 _line_progressed(1);
6050 _maybe_skip_whitespace_tokens();
6051 }
6052 else if(first == '}') // this happens on a trailing comma like ", }"
6053 {
6054 _c4dbgp("mapflow[QMRK]: end!");
6055 _handle_annotations_before_blck_key_scalar();
6056 m_evt_handler->set_key_scalar_plain_empty();
6057 m_evt_handler->set_val_scalar_plain_empty();
6058 _end_map_flow();
6059 _line_progressed(1);
6060 goto mapflow_finish;
6061 }
6062 else if(first == ',')
6063 {
6064 _c4dbgp("mapflow[QMRK]: empty key+val!");
6065 _handle_annotations_before_blck_key_scalar();
6066 m_evt_handler->set_key_scalar_plain_empty();
6067 m_evt_handler->set_val_scalar_plain_empty();
6068 addrem_flags(RNXT, QMRK);
6069 }
6070 else if(first == '&')
6071 {
6072 csubstr anchor = _scan_anchor();
6073 _c4dbgpf("mapflow[QMRK]: key anchor! {}", _prs(anchor));
6074 _add_annotation(&m_pending_anchors, anchor);
6075 }
6076 else if(first == '*')
6077 {
6078 csubstr ref = _scan_ref_map();
6079 _c4dbgpf("mapflow[QMRK]: key ref! {}", _prs(ref));
6080 _handle_keyref(ref);
6081 addrem_flags(RKCL, QMRK);
6082 }
6083 else if(first == '[')
6084 {
6085 // RYML's tree cannot store container keys, but that's
6086 // handled inside the tree sink. Other sink types may be
6087 // able to handle it.
6088 _c4dbgp("mapflow[QMRK]: start child seqflow (!)");
6089 addrem_flags(RKCL, QMRK);
6090 _handle_annotations_before_blck_key_scalar();
6091 m_evt_handler->begin_seq_key_flow();
6092 addrem_flags(RSEQ|RVAL, RMAP|RKCL);
6093 _set_indentation(m_evt_handler->m_parent->indref);
6094 _line_progressed(1);
6095 goto mapflow_finish;
6096 }
6097 else if(first == '{')
6098 {
6099 // RYML's tree cannot store container keys, but that's
6100 // handled inside the tree sink. Other sink types may be
6101 // able to handle it.
6102 _c4dbgp("mapflow[QMRK]: start child mapflow (!)");
6103 addrem_flags(RKCL, QMRK);
6104 _handle_annotations_before_blck_key_scalar();
6105 m_evt_handler->begin_map_key_flow();
6106 _set_indentation(m_evt_handler->m_parent->indref);
6107 addrem_flags(RKEY, RKCL);
6108 _line_progressed(1);
6109 // keep going in this function
6110 }
6111 else if(first == '!')
6112 {
6113 csubstr tag = _scan_tag();
6114 _c4dbgpf("mapflow[QMRK]: tag! {}", _prs(tag));
6115 _add_annotation(&m_pending_tags, tag);
6116 }
6117 else
6118 {
6119 _c4err("parse error"); // LCOV_EXCL_LINE
6120 }
6121 }
6122
6123 mapflow_again:
6124 _c4dbgt("mapflow: go again", 0);
6125 if(_finished_line())
6126 {
6127 if(C4_LIKELY(!_finished_file()))
6128 {
6129 _line_ended();
6130 _scan_line();
6132 }
6133 else
6134 {
6135 _c4err("missing terminating }");
6136 }
6137 }
6138 goto mapflow_start;
6139
6140 mapflow_finish:
6141 _c4dbgp("mapflow: finish");
6142}
6143
6144
6145//-----------------------------------------------------------------------------
6146
6147template<class EventHandler>
6148void ParseEngine<EventHandler>::_handle_seq_block()
6149{
6150seqblck_start:
6151 _c4dbgpf("handle_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6152
6153 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ), m_evt_handler->m_curr->pos);
6154 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RBLCK), m_evt_handler->m_curr->pos);
6155 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT), m_evt_handler->m_curr->pos);
6156 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RVAL) + has_any(RNXT)), m_evt_handler->m_curr->pos);
6157
6158 _maybe_skip_comment_strict();
6159 if(!m_evt_handler->m_curr->line_contents.rem.len)
6160 goto seqblck_again;
6161
6162 if(has_any(RVAL))
6163 {
6164 _c4dbgpf("seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
6165 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
6166 if(m_evt_handler->m_curr->at_line_beginning())
6167 {
6168 _c4dbgpf("seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6169 if(m_evt_handler->m_curr->indentation_ge_extra())
6170 {
6171 _c4dbgpf("seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
6172 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6173 if(!m_evt_handler->m_curr->line_contents.rem.len)
6174 goto seqblck_again;
6175 }
6176 else if(m_evt_handler->m_curr->indentation_lt_extra())
6177 {
6178 _c4dbgp("seqblck[RVAL]: smaller indentation than RVAL!");
6179 if(m_evt_handler->m_curr->indentation_eq())
6180 {
6181 _c4dbgp("seqblck[RVAL]: smaller indentation than RVAL!");
6182 _handle_annotations_before_blck_val_scalar();
6183 m_evt_handler->set_val_scalar_plain_empty();
6184 addrem_flags(RNXT, RVAL);
6185 goto seqblck_again;
6186 }
6187 else
6188 {
6189 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6190 _c4dbgp("seqblck[RVAL]: smaller indentation!");
6191 _handle_indentation_pop_from_block_seq();
6192 goto seqblck_finish;
6193 }
6194 }
6195 else if(m_evt_handler->m_curr->line_contents.indentation == npos)
6196 {
6197 _c4dbgp("seqblck[RVAL]: empty line!");
6198 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6199 goto seqblck_again;
6200 }
6201 }
6202 _RYML_ASSERT_PARSE_(callbacks(), m_evt_handler->m_curr->line_contents.rem.len, m_evt_handler->m_curr->pos);
6203 const size_t startmark = _handle_block_skip_leading_whitespace();
6204 _c4dbgpf("seqblck[RVAL]: startmark={}", startmark);
6205 if(startmark == npos)
6206 {
6207 _c4dbgp("seqblck[RVAL]: whitespace only");
6208 goto seqblck_again;
6209 }
6210 const size_t tabmark = _handle_block_get_whitespace_mark();
6211 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6212 _c4dbgpf("seqblck[RVAL]: first='{}' currcol={}", first, m_evt_handler->m_curr->pos.col - 1);
6213 const size_t startline = m_evt_handler->m_curr->pos.line;
6214 _c4assert(m_evt_handler->m_curr->line_contents.current_col() >= m_bom_len);
6215 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col() - m_bom_len;
6216 ScannedScalar sc;
6217 if(first == '\'')
6218 {
6219 _c4dbgp("seqblck[RVAL]: single-quoted scalar");
6220 sc = _scan_scalar_squot();
6221 if(!_maybe_scan_following_colon())
6222 {
6223 _c4dbgp("seqblck[RVAL]: set as val");
6224 _handle_annotations_before_blck_val_scalar();
6225 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
6226 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6227 addrem_flags(RNXT, RVAL);
6228 }
6229 else
6230 {
6231 _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
6232 _handle_block_check_leading_tabs(startmark);
6233 addrem_flags(RNXT, RVAL);
6234 _handle_annotations_before_start_mapblck(startline);
6235 _handle_colon();
6236 m_evt_handler->begin_map_val_block();
6237 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6238 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6239 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6240 addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6241 _maybe_skip_whitespace_tokens();
6242 goto seqblck_finish;
6243 }
6244 }
6245 else if(first == '"')
6246 {
6247 _c4dbgp("seqblck[RVAL]: double-quoted scalar");
6248 sc = _scan_scalar_dquot();
6249 if(!_maybe_scan_following_colon())
6250 {
6251 _c4dbgp("seqblck[RVAL]: set as val");
6252 _handle_annotations_before_blck_val_scalar();
6253 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
6254 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6255 addrem_flags(RNXT, RVAL);
6256 }
6257 else
6258 {
6259 _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
6260 addrem_flags(RNXT, RVAL);
6261 _handle_block_check_leading_tabs(startmark);
6262 _handle_annotations_before_start_mapblck(startline);
6263 _handle_colon();
6264 m_evt_handler->begin_map_val_block();
6265 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6266 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6267 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6268 addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6269 _maybe_skip_whitespace_tokens();
6270 goto seqblck_finish;
6271 }
6272 }
6273 // block scalars can only appear as keys when in QMRK scope
6274 // (ie, after ? tokens), so no need to scan following colon in
6275 // here.
6276 else if(first == '|')
6277 {
6278 _c4dbgp("seqblck[RVAL]: block-literal scalar");
6279 ScannedBlock sb;
6280 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6281 _handle_annotations_before_blck_val_scalar();
6282 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
6283 m_evt_handler->set_val_scalar_literal(maybe_filtered);
6284 addrem_flags(RNXT, RVAL);
6285 }
6286 else if(first == '>')
6287 {
6288 _c4dbgp("seqblck[RVAL]: block-folded scalar");
6289 ScannedBlock sb;
6290 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
6291 _handle_annotations_before_blck_val_scalar();
6292 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
6293 m_evt_handler->set_val_scalar_folded(maybe_filtered);
6294 addrem_flags(RNXT, RVAL);
6295 }
6296 else if(_scan_scalar_plain_seq_blck(&sc))
6297 {
6298 _c4dbgp("seqblck[RVAL]: plain scalar.");
6299 if(!_maybe_scan_following_colon())
6300 {
6301 _c4dbgp("seqblck[RVAL]: set as val");
6302 _handle_annotations_before_blck_val_scalar();
6303 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
6304 m_evt_handler->set_val_scalar_plain(maybe_filtered);
6305 addrem_flags(RNXT, RVAL);
6306 }
6307 else
6308 {
6309 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
6310 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6311 _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
6312 _handle_block_check_leading_tabs(startmark, tabmark);
6313 addrem_flags(RNXT, RVAL);
6314 _handle_annotations_before_start_mapblck(startline);
6315 _handle_colon();
6316 m_evt_handler->begin_map_val_block();
6317 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6318 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
6319 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6320 addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6321 _maybe_skip_whitespace_tokens();
6322 goto seqblck_finish;
6323 }
6324 }
6325 else if(first == '[')
6326 {
6327 _c4dbgp("seqblck[RVAL]: start child seqflow");
6328 addrem_flags(RNXT, RVAL);
6329 _handle_annotations_before_blck_val_scalar();
6330 m_evt_handler->begin_seq_val_flow();
6331 addrem_flags(RFLOW|RVAL, RBLCK|RNXT);
6332 _line_progressed(1);
6333 _set_indentation(m_evt_handler->m_parent->indref + 1u);
6334 goto seqblck_finish;
6335 }
6336 else if(first == '{')
6337 {
6338 _c4dbgp("seqblck[RVAL]: start child mapflow");
6339 addrem_flags(RNXT, RVAL);
6340 _handle_annotations_before_blck_val_scalar();
6341 m_evt_handler->begin_map_val_flow();
6342 addrem_flags(RMAP|RKEY|RFLOW, RBLCK|RSEQ|RVAL|RNXT);
6343 _line_progressed(1);
6344 _set_indentation(m_evt_handler->m_parent->indref + 1u);
6345 goto seqblck_finish;
6346 }
6347 else if(first == '-')
6348 {
6349 _c4dbgp("seqblck[RVAL]: dash");
6350 _handle_block_check_leading_tabs(startmark);
6351 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
6352 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6353 _c4dbgp("seqblck[RVAL]: start child seqblck");
6354 _RYML_ASSERT_PARSE_(this->callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
6355 addrem_flags(RNXT, RVAL);
6356 _handle_annotations_before_blck_val_scalar();
6357 m_evt_handler->begin_seq_val_block();
6358 addrem_flags(RVAL, RNXT);
6359 _set_indentation(startindent);
6360 // keep going on inside this function
6361 _line_progressed(1);
6362 }
6363 else if(first == ':')
6364 {
6365 _c4dbgp("seqblck[RVAL]: start child mapblck with empty key");
6366 addrem_flags(RNXT, RVAL);
6367 _handle_annotations_before_start_mapblck(startline);
6368 _handle_colon();
6369 m_evt_handler->begin_map_val_block();
6370 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6371 m_evt_handler->set_key_scalar_plain_empty();
6372 addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6373 _line_progressed(1);
6374 _maybe_skip_whitespace_tokens();
6375 goto seqblck_finish;
6376 }
6377 else if(first == '&')
6378 {
6379 const csubstr anchor = _scan_anchor();
6380 _c4dbgpf("seqblck[RVAL]: anchor! {}", _prs(anchor));
6381 // we need to buffer the anchors, as there may be two
6382 // consecutive anchors in here
6383 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6384 }
6385 else if(first == '*')
6386 {
6387 csubstr ref = _scan_ref_seq();
6388 _c4dbgpf("seqblck[RVAL]: ref! {}", _prs(ref));
6389 if(!_maybe_scan_following_colon())
6390 {
6391 _c4dbgp("seqblck[RVAL]: set ref as val!");
6392 _handle_valref(ref);
6393 addrem_flags(RNXT, RVAL);
6394 }
6395 else
6396 {
6397 _c4dbgp("seqblck[RVAL]: ref is key of map");
6398 addrem_flags(RNXT, RVAL);
6399 _handle_annotations_before_start_mapblck(startline);
6400 m_evt_handler->begin_map_val_block();
6401 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6402 _handle_keyref(ref);
6403 addrem_flags(RMAP|RVAL, RSEQ|RNXT);
6404 _set_indentation(startindent);
6405 _maybe_skip_whitespace_tokens();
6406 goto seqblck_finish;
6407 }
6408 }
6409 else if(first == '!')
6410 {
6411 csubstr tag = _scan_tag();
6412 _c4dbgpf("seqblck[RVAL]: val tag! {}", _prs(tag));
6413 // we need to buffer the tags, as there may be two
6414 // consecutive tags in here
6415 _add_annotation(&m_pending_tags, tag, startindent, startline);
6416 }
6417 else if(first == '?')
6418 {
6419 _c4dbgp("seqblck[RVAL]: start child mapblck, explicit key");
6420 addrem_flags(RNXT, RVAL);
6421 m_evt_handler->begin_map_val_block();
6422 addrem_flags(RMAP|QMRK, RSEQ|RNXT);
6423 _set_indentation(startindent);
6424 _line_progressed(1);
6425 _maybe_skipchars(' ');
6426 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6427 {
6428 _c4dbgp("seqblck[RVAL]: seqblck starts after ?");
6429 addrem_flags(RKCL, QMRK);
6430 m_evt_handler->begin_seq_key_block();
6431 addrem_flags(RSEQ|RVAL, RMAP|RKCL);
6432 _save_indentation();
6433 _line_progressed(1);
6434 _maybe_skipchars(' ');
6435 }
6436 goto seqblck_finish;
6437 }
6438 else
6439 {
6440 _c4err("parse error");
6441 }
6442 }
6443 else // RNXT
6444 {
6445 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
6446 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
6447 //
6448 // handle indentation
6449 //
6450 _c4dbgpf("seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
6451 if(C4_LIKELY(m_evt_handler->m_curr->at_line_beginning()))
6452 {
6453 _c4dbgp("seqblck[RNXT]: at line begin");
6454 if(m_evt_handler->m_curr->indentation_ge())
6455 {
6456 _c4dbgpf("seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
6457 _line_progressed(m_evt_handler->m_curr->indref);
6458 if(!m_evt_handler->m_curr->line_contents.rem.len)
6459 goto seqblck_again;
6460 }
6461 else if(m_evt_handler->m_curr->indentation_lt())
6462 {
6463 _c4dbgp("seqblck[RNXT]: smaller indentation!");
6464 _handle_indentation_pop_from_block_seq();
6465 if(has_all(RSEQ|RBLCK))
6466 {
6467 _c4dbgp("seqblck[RNXT]: still seqblck!");
6468 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RNXT), m_evt_handler->m_curr->pos);
6469 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6470 if(!m_evt_handler->m_curr->line_contents.rem.len)
6471 goto seqblck_again; // LCOV_EXCL_LINE
6472 }
6473 else
6474 {
6475 _c4dbgp("seqblck[RNXT]: no longer seqblck!");
6476 goto seqblck_finish;
6477 }
6478 }
6479 else if(m_evt_handler->m_curr->line_contents.indentation == npos)
6480 {
6481 _c4dbgpf("seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
6482 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6483 if(!m_evt_handler->m_curr->line_contents.rem.len)
6484 goto seqblck_again; // LCOV_EXCL_LINE
6485 }
6486 }
6487 else
6488 {
6489 _c4dbgp("seqblck[RNXT]: NOT at line begin");
6490 if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(" \t"))
6491 {
6492 _c4err("parse error");
6493 }
6494 else
6495 {
6496 _skipchars(" \t");
6497 if(!m_evt_handler->m_curr->line_contents.rem.len)
6498 {
6499 _c4dbgp("seqblck[RNXT]: again");
6500 goto seqblck_again; // LCOV_EXCL_LINE
6501 }
6502 }
6503 }
6504 //
6505 // now handle the tokens
6506 //
6507 _c4assert(m_evt_handler->m_curr->line_contents.rem.len > 0);
6508 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6509 _c4dbgpf("seqblck[RNXT]: '{}' node_id={}", _c4prc(first), m_evt_handler->m_curr->node_id);
6510 if(first == '-')
6511 {
6512 if(m_evt_handler->m_curr->indref > 0
6513 || m_evt_handler->m_curr->line_contents.indentation > 0
6514 || !_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6515 {
6516 if(C4_LIKELY(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem)))
6517 {
6518 _c4dbgp("seqblck[RNXT]: expect next val");
6519 addrem_flags(RVAL, RNXT);
6520 m_evt_handler->add_sibling();
6521 _line_progressed(1);
6522 }
6523 else
6524 {
6525 _c4err("parse error");
6526 }
6527 }
6528 else
6529 {
6530 _c4dbgp("seqblck[RNXT]: start doc");
6531 _start_doc_suddenly();
6532 _line_progressed(3);
6533 _maybe_skip_whitespace_tokens();
6534 goto seqblck_finish;
6535 }
6536 }
6537 else if(first == ':')
6538 {
6539 // This happens for example in `- [a: b]: c` (after
6540 // terminating the seq, ie, after `]`). All other cases
6541 // (ie colon after scalars) are caught elsewhere (ie, in
6542 // RVAL state).
6543 if(C4_LIKELY(m_evt_handler->m_parent && (m_evt_handler->m_parent->flags & RMAP)))
6544 {
6545 _c4dbgp("seqblck[RNXT]: actually this seq was '?' key of parent map");
6546 m_evt_handler->end_seq_block();
6547 goto seqblck_finish;
6548 }
6549 else
6550 {
6551 _c4err("parse error");
6552 }
6553 }
6554 else if(first == '.')
6555 {
6556 _c4dbgp("seqblck[RNXT]: maybe doc?");
6557 if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6558 {
6559 _c4dbgp("seqblck[RNXT]: end doc");
6560 _end_doc_suddenly();
6561 _line_progressed(3);
6562 _maybe_skip_whitespace_tokens();
6563 _check_doc_end_tokens();
6564 goto seqblck_finish;
6565 }
6566 else
6567 {
6568 _c4err("parse error");
6569 }
6570 }
6571 else
6572 {
6573 // may be an indentless sequence nested in a map...
6574 #ifdef RYML_DBG
6575 _print_state_stack();
6576 #endif
6577 if(m_evt_handler->m_parent
6578 && has_all(RMAP|RBLCK, m_evt_handler->m_parent)
6579 && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
6580 {
6581 _c4dbgpf("seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
6582 _RYML_ASSERT_PARSE_(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent, m_evt_handler->m_curr->pos);
6583 _handle_indentation_pop(m_evt_handler->m_parent);
6584 _RYML_ASSERT_PARSE_(this->callbacks(), has_all(RMAP|RBLCK), m_evt_handler->m_curr->pos);
6585 m_evt_handler->add_sibling();
6586 addrem_flags(RKEY, RNXT);
6587 goto seqblck_finish;
6588 }
6589 else if(first == '\t')
6590 {
6591 size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of('\t');
6592 if(pos == npos)
6593 {
6594 _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
6595 goto seqblck_again;
6596 }
6597 }
6598 _c4err("parse error");
6599 }
6600 }
6601
6602 seqblck_again:
6603 _c4dbgt("seqblck: go again", 0);
6604 if(_finished_line())
6605 {
6606 m_bom_len = 0;
6607 _line_ended();
6608 _scan_line();
6609 if(_finished_file())
6610 {
6611 _c4dbgp("seqblck: finish!");
6612 _end_seq_blck();
6613 goto seqblck_finish;
6614 }
6616 }
6617 goto seqblck_start;
6618
6619 seqblck_finish:
6620 _c4dbgp("seqblck: finish");
6621}
6622
6623
6624//-----------------------------------------------------------------------------
6625
6626template<class EventHandler>
6627void ParseEngine<EventHandler>::_handle_map_block()
6628{
6629mapblck_start:
6630 _c4dbgpf("handle_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
6631
6632 // states: RKEY -> RVAL -> RNXT
6633 // states: QMRK -> RKCL -> RVAL -> RNXT
6634 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
6635 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RBLCK), m_evt_handler->m_curr->pos);
6636 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK), m_evt_handler->m_curr->pos);
6637 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)), m_evt_handler->m_curr->pos);
6638
6639 _maybe_skip_comment();
6640 if(!m_evt_handler->m_curr->line_contents.rem.len)
6641 goto mapblck_again;
6642
6643 if(has_any(RKEY))
6644 {
6645 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
6646 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
6647 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
6648 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
6649 //
6650 // handle indentation
6651 //
6652 if(m_evt_handler->m_curr->at_line_beginning())
6653 {
6654 if(m_evt_handler->m_curr->indentation_eq())
6655 {
6656 _c4dbgpf("mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
6657 _line_progressed(m_evt_handler->m_curr->indref);
6658 if(!m_evt_handler->m_curr->line_contents.rem.len)
6659 goto mapblck_again;
6660 }
6661 else if(m_evt_handler->m_curr->indentation_lt())
6662 {
6663 _c4dbgp("mapblck[RKEY]: smaller indentation!");
6664 _handle_indentation_pop_from_block_map();
6665 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6666 if(has_all(RMAP|RBLCK))
6667 {
6668 _c4dbgp("mapblck[RKEY]: still mapblck!");
6669 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_any(RKEY), m_evt_handler->m_curr->pos);
6670 if(!m_evt_handler->m_curr->line_contents.rem.len)
6671 goto mapblck_again;
6672 }
6673 else
6674 {
6675 _c4dbgp("mapblck[RKEY]: no longer mapblck!");
6676 goto mapblck_finish;
6677 }
6678 }
6679 else
6680 {
6681 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt(), m_evt_handler->m_curr->pos);
6682 _c4err("invalid indentation");
6683 }
6684 }
6685 //
6686 // now handle the tokens
6687 //
6688 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6689 const size_t startline = m_evt_handler->m_curr->pos.line;
6690 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6691 _c4dbgpf("mapblck[RKEY]: '{}'", _c4prc(first));
6692 ScannedScalar sc;
6693 if(first == '\'')
6694 {
6695 _c4dbgp("mapblck[RKEY]: scanning single-quoted scalar");
6696 sc = _scan_scalar_squot();
6697 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
6698 _handle_annotations_before_blck_key_scalar();
6699 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6700 addrem_flags(RVAL, RKEY);
6701 if(!_maybe_scan_following_colon())
6702 _c4err("could not find ':' colon after key");
6703 _handle_colon();
6704 _maybe_skip_whitespace_tokens();
6705 }
6706 else if(first == '"')
6707 {
6708 _c4dbgp("mapblck[RKEY]: scanning double-quoted scalar");
6709 sc = _scan_scalar_dquot();
6710 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
6711 _handle_annotations_before_blck_key_scalar();
6712 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6713 addrem_flags(RVAL, RKEY);
6714 if(!_maybe_scan_following_colon())
6715 _c4err("could not find ':' colon after key");
6716 _handle_colon();
6717 _maybe_skip_whitespace_tokens();
6718 }
6719 // block scalars (| and >) can not be used as keys unless they
6720 // appear in an explicit QMRK scope (ie, after the ? token),
6721 else if(C4_UNLIKELY(first == '|'))
6722 {
6723 _c4err("block map: literal keys must be enclosed in '?'");
6724 }
6725 else if(C4_UNLIKELY(first == '>'))
6726 {
6727 _c4err("block map: folded keys must be enclosed in '?'");
6728 }
6729 else if(_scan_scalar_plain_map_blck(&sc))
6730 {
6731 _c4dbgp("mapblck[RKEY]: plain scalar");
6732 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
6733 _handle_annotations_before_blck_key_scalar();
6734 m_evt_handler->set_key_scalar_plain(maybe_filtered);
6735 addrem_flags(RVAL, RKEY);
6736 if(!_maybe_scan_following_colon())
6737 _c4err("could not find ':' colon after key");
6738 _handle_colon();
6739 _maybe_skip_whitespace_tokens();
6740 }
6741 else if(first == '?')
6742 {
6743 _c4dbgp("mapblck[RKEY]: key token!");
6744 addrem_flags(QMRK, RKEY);
6745 _line_progressed(1);
6746 _maybe_skipchars(' ');
6747 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
6748 {
6749 _c4dbgp("mapblck[RKEY]: seqblck starts after ?");
6750 addrem_flags(RKCL, QMRK);
6751 m_evt_handler->begin_seq_key_block();
6752 addrem_flags(RSEQ|RVAL, RMAP|RKCL);
6753 _save_indentation();
6754 _line_progressed(1);
6755 _maybe_skipchars(' ');
6756 goto mapblck_finish;
6757 }
6758 goto mapblck_again;
6759 }
6760 else if(first == ':')
6761 {
6762 _c4dbgp("mapblck[RKEY]: setting empty key");
6763 _handle_annotations_before_blck_key_scalar();
6764 m_evt_handler->set_key_scalar_plain_empty();
6765 addrem_flags(RVAL, RKEY);
6766 _line_progressed(1);
6767 _handle_colon();
6768 _maybe_skip_whitespace_tokens();
6769 }
6770 else if(first == '*')
6771 {
6772 csubstr ref = _scan_ref_map();
6773 _c4dbgpf("mapblck[RKEY]: key ref! {}", _prs(ref));
6774 _handle_keyref(ref);
6775 addrem_flags(RVAL, RKEY);
6776 if(!_maybe_scan_following_colon())
6777 _c4err("could not find ':' colon after key");
6778 _handle_colon();
6779 _maybe_skip_whitespace_tokens();
6780 }
6781 else if(first == '&')
6782 {
6783 csubstr anchor = _scan_anchor();
6784 _c4dbgpf("mapblck[RKEY]: key anchor! {}", _prs(anchor));
6785 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
6786 }
6787 else if(first == '!')
6788 {
6789 csubstr tag = _scan_tag();
6790 _c4dbgpf("mapblck[RKEY]: key tag! {}", _prs(tag));
6791 _add_annotation(&m_pending_tags, tag, startindent, startline);
6792 }
6793 else if(first == '[')
6794 {
6795 // RYML's tree cannot store container keys, but that's
6796 // handled inside the tree handler. Other handlers may be
6797 // able to handle it.
6798 _c4dbgp("mapblck[RKEY]: start child seqflow (!)");
6799 _handle_annotations_before_blck_key_scalar();
6800 m_evt_handler->begin_seq_key_flow();
6801 addrem_flags(RSEQ|RFLOW|RVAL, RKEY|RMAP|RBLCK);
6802 _line_progressed(1);
6803 _set_indentation(startindent);
6804 goto mapblck_finish;
6805 }
6806 else if(first == '{')
6807 {
6808 // RYML's tree cannot store container keys, but that's
6809 // handled inside the tree handler. Other handlers may be
6810 // able to handle it.
6811 _c4dbgp("mapblck[RKEY]: start child mapflow (!)");
6812 _handle_annotations_before_blck_key_scalar();
6813 m_evt_handler->begin_map_key_flow();
6814 addrem_flags(RFLOW|RKEY, RBLCK);
6815 _line_progressed(1);
6816 _set_indentation(startindent);
6817 goto mapblck_finish;
6818 }
6819 else if(first == '-')
6820 {
6821 _c4dbgp("mapblck[RKEY]: maybe doc?");
6822 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
6823 {
6824 _c4dbgp("mapblck[RKEY]: end+start doc");
6825 _start_doc_suddenly();
6826 _line_progressed(3);
6827 _maybe_skip_whitespace_tokens();
6828 goto mapblck_finish;
6829 }
6830 else
6831 {
6832 _c4err("parse error");
6833 }
6834 }
6835 else if(first == '.')
6836 {
6837 _c4dbgp("mapblck[RKEY]: maybe end doc?");
6838 if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
6839 {
6840 _c4dbgp("mapblck[RKEY]: end doc");
6841 _end_doc_suddenly();
6842 _line_progressed(3);
6843 _maybe_skip_whitespace_tokens();
6844 _check_doc_end_tokens();
6845 goto mapblck_finish;
6846 }
6847 else
6848 {
6849 _c4err("parse error"); // LCOV_EXCL_LINE
6850 }
6851 }
6852 else
6853 {
6854 _c4err("parse error");
6855 }
6856 }
6857 else if(has_any(RVAL))
6858 {
6859 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
6860 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
6861 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
6862 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
6863 //
6864 // handle indentation
6865 //
6866 if(m_evt_handler->m_curr->at_line_beginning())
6867 {
6868 _c4dbgpf("mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref+1, m_evt_handler->m_curr->line_contents.indentation);
6869 m_evt_handler->m_curr->more_indented = false;
6870 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos, m_evt_handler->m_curr->pos);
6871 if(m_evt_handler->m_curr->indentation_eq_extra())
6872 {
6873 _c4dbgp("mapblck[RVAL]: skip indentation!");
6874 _line_progressed(m_evt_handler->m_curr->indref + 1);
6875 if(!m_evt_handler->m_curr->line_contents.rem.len)
6876 goto mapblck_again;
6877 }
6878 else if(m_evt_handler->m_curr->indentation_gt_extra())
6879 {
6880 _c4dbgp("mapblck[RVAL]: more indented!");
6881 m_evt_handler->m_curr->more_indented = true;
6882 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6883 if(!m_evt_handler->m_curr->line_contents.rem.len)
6884 goto mapblck_again; // LCOV_EXCL_LINE
6885 }
6886 else if(m_evt_handler->m_curr->indentation_lt_extra())
6887 {
6888 if(m_evt_handler->m_curr->indentation_eq())
6889 {
6890 _c4dbgp("mapblck[RVAL]: smaller indentation than RVAL!");
6891 // watchout for indentless seqs
6892 if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem.sub(m_evt_handler->m_curr->line_contents.indentation)))
6893 {
6894 _c4dbgp("mapblck[RVAL]: smaller indentation than RVAL!");
6895 _handle_annotations_before_blck_val_scalar();
6896 m_evt_handler->set_val_scalar_plain_empty();
6897 addrem_flags(RNXT, RVAL);
6898 goto mapblck_again;
6899 }
6900 }
6901 else
6902 {
6903 _c4dbgp("mapblck[RVAL]: smaller indentation than RKEY!");
6904 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt(), m_evt_handler->m_curr->pos);
6905 _handle_indentation_pop_from_block_map();
6906 if(has_all(RMAP|RBLCK))
6907 {
6908 _c4dbgp("mapblck[RVAL]: still mapblck!");
6909 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
6910 if(has_any(RNXT))
6911 {
6912 _c4dbgp("mapblck[RVAL]: speculatively expect next keyval");
6913 m_evt_handler->add_sibling();
6914 addrem_flags(RKEY, RNXT);
6915 }
6916 goto mapblck_again;
6917 }
6918 else
6919 {
6920 _c4dbgp("mapblck[RVAL]: no longer mapblck!");
6921 goto mapblck_finish;
6922 }
6923 }
6924 }
6925 }
6926 const size_t startcol = _handle_block_skip_leading_whitespace();
6927 if(startcol == npos)
6928 {
6929 _c4dbgp("mapblck[RVAL]: whitespace only");
6930 goto mapblck_again; // LCOV_EXCL_LINE
6931 }
6932 const size_t tabmark = _handle_block_get_whitespace_mark();
6933 //
6934 // now handle the tokens
6935 //
6936 _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
6937 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
6938 const size_t startline = m_evt_handler->m_curr->pos.line;
6939 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
6940 _c4dbgpf("mapblck[RVAL]: '{}'", _c4prc(first));
6941 ScannedScalar sc;
6942 if(first == '\'')
6943 {
6944 _c4dbgp("mapblck[RVAL]: scanning single-quoted scalar");
6945 sc = _scan_scalar_squot();
6946 if(!_maybe_scan_following_colon())
6947 {
6948 _c4dbgp("mapblck[RVAL]: set as val");
6949 _handle_annotations_before_blck_val_scalar();
6950 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
6951 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
6952 addrem_flags(RNXT, RVAL);
6953 }
6954 else
6955 {
6956 _c4assert(m_evt_handler->m_curr->indref != npos);
6957 _c4assert(startindent > m_evt_handler->m_curr->indref);
6958 _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6959 _handle_block_check_leading_tabs(startcol);
6960 _handle_annotations_before_start_mapblck(startline);
6961 addrem_flags(RNXT, RVAL);
6962 _handle_colon();
6963 m_evt_handler->begin_map_val_block();
6964 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6965 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
6966 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
6967 _maybe_skip_whitespace_tokens();
6968 // keep the child state on RVAL
6969 addrem_flags(RVAL, RNXT);
6970 }
6971 }
6972 else if(first == '"')
6973 {
6974 _c4dbgp("mapblck[RVAL]: scanning double-quoted scalar");
6975 sc = _scan_scalar_dquot();
6976 if(!_maybe_scan_following_colon())
6977 {
6978 _c4dbgp("mapblck[RVAL]: set as val");
6979 _handle_annotations_before_blck_val_scalar();
6980 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
6981 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
6982 addrem_flags(RNXT, RVAL);
6983 }
6984 else
6985 {
6986 _c4assert(m_evt_handler->m_curr->indref != npos);
6987 _c4assert(startindent > m_evt_handler->m_curr->indref);
6988 _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
6989 _handle_block_check_leading_tabs(startcol);
6990 _handle_annotations_before_start_mapblck(startline);
6991 addrem_flags(RNXT, RVAL);
6992 _handle_colon();
6993 m_evt_handler->begin_map_val_block();
6994 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
6995 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
6996 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
6997 _maybe_skip_whitespace_tokens();
6998 // keep the child state on RVAL
6999 addrem_flags(RVAL, RNXT);
7000 }
7001 }
7002 // block scalars can only appear as keys when in QMRK scope
7003 // (ie, after ? tokens), so no need to scan following colon
7004 else if(first == '|')
7005 {
7006 _c4dbgp("mapblck[RVAL]: scanning block-literal scalar");
7007 ScannedBlock sb;
7008 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7009 _handle_annotations_before_blck_val_scalar();
7010 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
7011 m_evt_handler->set_val_scalar_literal(maybe_filtered);
7012 addrem_flags(RNXT, RVAL);
7013 }
7014 else if(first == '>')
7015 {
7016 _c4dbgp("mapblck[RVAL]: scanning block-folded scalar");
7017 ScannedBlock sb;
7018 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7019 _handle_annotations_before_blck_val_scalar();
7020 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
7021 m_evt_handler->set_val_scalar_folded(maybe_filtered);
7022 addrem_flags(RNXT, RVAL);
7023 }
7024 else if(_scan_scalar_plain_map_blck(&sc))
7025 {
7026 _c4dbgp("mapblck[RVAL]: plain scalar.");
7027 if(!_maybe_scan_following_colon())
7028 {
7029 _c4dbgp("mapblck[RVAL]: set as val");
7030 _handle_annotations_before_blck_val_scalar();
7031 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
7032 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7033 addrem_flags(RNXT, RVAL);
7034 }
7035 else
7036 {
7037 _c4assert(m_evt_handler->m_curr->indref != npos);
7038 _c4assert(startindent > m_evt_handler->m_curr->indref);
7039 _c4dbgpf("mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
7040 _handle_block_check_leading_tabs(startcol, tabmark);
7041 addrem_flags(RNXT, RVAL);
7042 _handle_annotations_before_start_mapblck(startline);
7043 _handle_colon();
7044 m_evt_handler->begin_map_val_block();
7045 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7046 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
7047 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7048 _maybe_skip_whitespace_tokens();
7049 // keep the child state on RVAL
7050 addrem_flags(RVAL, RNXT);
7051 }
7052 }
7053 else if(first == '-' && _is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7054 {
7055 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7056 _c4err("parse error");
7057 _c4dbgp("mapblck[RVAL]: start val seqblck");
7058 _handle_block_check_leading_tabs(startcol);
7059 addrem_flags(RNXT, RVAL);
7060 _handle_annotations_before_blck_val_scalar();
7061 m_evt_handler->begin_seq_val_block();
7062 addrem_flags(RSEQ|RVAL, RMAP|RNXT);
7063 _set_indentation(startindent);
7064 _line_progressed(1);
7065 _maybe_skip_whitespace_tokens();
7066 goto mapblck_finish;
7067 }
7068 else if(first == '[')
7069 {
7070 _c4dbgp("mapblck[RVAL]: start val seqflow");
7071 addrem_flags(RNXT, RVAL);
7072 _handle_annotations_before_blck_val_scalar();
7073 m_evt_handler->begin_seq_val_flow();
7074 addrem_flags(RSEQ|RFLOW|RVAL, RMAP|RBLCK|RNXT);
7075 _set_indentation(m_evt_handler->m_parent->indref + 1u);
7076 _line_progressed(1);
7077 goto mapblck_finish;
7078 }
7079 else if(first == '{')
7080 {
7081 _c4dbgp("mapblck[RVAL]: start val mapflow");
7082 addrem_flags(RNXT, RVAL);
7083 _handle_annotations_before_blck_val_scalar();
7084 m_evt_handler->begin_map_val_flow();
7085 addrem_flags(RKEY|RFLOW, RBLCK|RVAL|RNXT);
7086 m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
7087 _set_indentation(m_evt_handler->m_parent->indref + 1u);
7088 _line_progressed(1);
7089 goto mapblck_finish;
7090 }
7091 else if(first == '*')
7092 {
7093 csubstr ref = _scan_ref_map();
7094 _c4dbgpf("mapblck[RVAL]: ref! {}", _prs(ref));
7095 if(_maybe_scan_following_colon())
7096 {
7097 _c4dbgp("mapblck[RVAL]: start child map, block");
7098 addrem_flags(RNXT, RVAL);
7099 _handle_annotations_before_blck_val_scalar();
7100 m_evt_handler->begin_map_val_block();
7101 _handle_keyref(ref);
7102 _set_indentation(startindent);
7103 // keep going in RVAL
7104 addrem_flags(RVAL, RNXT);
7105 }
7106 else
7107 {
7108 _c4dbgp("mapblck[RVAL]: was val ref");
7109 _handle_valref(ref);
7110 addrem_flags(RNXT, RVAL);
7111 }
7112 _maybe_skip_whitespace_tokens();
7113 }
7114 else if(first == '&')
7115 {
7116 csubstr anchor = _scan_anchor();
7117 _c4dbgpf("mapblck[RVAL]: anchor! {}", _prs(anchor));
7118 // we need to buffer the anchors, as there may be two
7119 // consecutive anchors in here
7120 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7121 }
7122 else if(first == '!')
7123 {
7124 csubstr tag = _scan_tag();
7125 _c4dbgpf("mapblck[RVAL]: tag! {}", _prs(tag));
7126 // we need to buffer the tags, as there may be two
7127 // consecutive tags in here
7128 _add_annotation(&m_pending_tags, tag, startindent, startline);
7129 }
7130 else if(first == '?')
7131 {
7132 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7133 _c4err("parse error");
7134 _c4dbgp("mapblck[RVAL]: start val mapblck");
7135 addrem_flags(RNXT, RVAL);
7136 _handle_annotations_before_blck_val_scalar();
7137 m_evt_handler->begin_map_val_block();
7138 addrem_flags(QMRK, RNXT);
7139 _set_indentation(startindent);
7140 _line_progressed(1);
7141 _maybe_skipchars(' ');
7142 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7143 {
7144 _c4dbgp("mapblck[RVAL]: seqblck starts after ?");
7145 addrem_flags(RKCL, QMRK);
7146 m_evt_handler->begin_seq_key_block();
7147 addrem_flags(RSEQ|RVAL, RMAP|RKCL);
7148 _save_indentation();
7149 _line_progressed(1);
7150 _maybe_skipchars(' ');
7151 goto mapblck_finish;
7152 }
7153 goto mapblck_again;
7154 }
7155 else if(first == ':')
7156 {
7157 _c4dbgp("mapblck[RVAL]: start val mapblck");
7158 addrem_flags(RNXT, RVAL);
7159 _handle_annotations_before_start_mapblck(startline);
7160 _handle_colon();
7161 m_evt_handler->begin_map_val_block();
7162 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7163 m_evt_handler->set_key_scalar_plain_empty();
7164 // keep the child state on RVAL
7165 addrem_flags(RVAL, RNXT);
7166 _line_progressed(1);
7167 _maybe_skip_whitespace_tokens();
7168 goto mapblck_again;
7169 }
7170 else
7171 {
7172 _c4err("parse error"); // LCOV_EXCL_LINE
7173 }
7174 }
7175 else if(has_any(RNXT))
7176 {
7177 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
7178 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
7179 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
7180 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
7181 //
7182 // handle indentation
7183 //
7184 if(m_evt_handler->m_curr->at_line_beginning())
7185 {
7186 _c4dbgpf("mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
7187 if(m_evt_handler->m_curr->indentation_eq())
7188 {
7189 _c4dbgpf("mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
7190 _line_progressed(m_evt_handler->m_curr->indref);
7191 _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
7192 m_evt_handler->add_sibling();
7193 addrem_flags(RKEY, RNXT);
7194 goto mapblck_again;
7195 }
7196 else if(m_evt_handler->m_curr->indentation_lt())
7197 {
7198 _c4dbgp("mapblck[RNXT]: smaller indentation!");
7199 _handle_indentation_pop_from_block_map();
7200 if(has_all(RMAP|RBLCK))
7201 {
7202 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7203 if(!has_any(RKCL))
7204 {
7205 _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
7206 m_evt_handler->add_sibling();
7207 addrem_flags(RKEY, RNXT);
7208 }
7209 goto mapblck_again;
7210 }
7211 else
7212 {
7213 goto mapblck_finish;
7214 }
7215 }
7216 }
7217 else
7218 {
7219 _c4dbgp("mapblck[RNXT]: NOT at line begin");
7220 if(!m_evt_handler->m_curr->line_contents.rem.begins_with_any(" \t"))
7221 {
7222 _c4err("parse error");
7223 }
7224 else
7225 {
7226 _skipchars(" \t");
7227 if(!m_evt_handler->m_curr->line_contents.rem.len)
7228 {
7229 _c4dbgp("seqblck[RNXT]: again");
7230 goto mapblck_again; // LCOV_EXCL_LINE
7231 }
7232 }
7233 }
7234 //
7235 // handle tokens
7236 //
7237 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7238 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7239 _c4dbgpf("mapblck[RNXT]: '{}'", _c4prc(first));
7240 if(first == ' ')
7241 {
7242 _c4dbgp("mapblck[RNXT]: skip spaces");
7243 _maybe_skip_whitespace_tokens();
7244 }
7245 else
7246 {
7247 _c4err("parse error");
7248 }
7249 }
7250 else if(has_any(QMRK))
7251 {
7252 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
7253 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKCL), m_evt_handler->m_curr->pos);
7254 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
7255 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
7256 if(_handle_map_block_qmrk())
7257 goto mapblck_again;
7258 else
7259 goto mapblck_finish;
7260 }
7261 else if(has_any(RKCL)) // read the key colon (after QMRK)
7262 {
7263 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RKEY), m_evt_handler->m_curr->pos);
7264 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RVAL), m_evt_handler->m_curr->pos);
7265 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT), m_evt_handler->m_curr->pos);
7266 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(QMRK), m_evt_handler->m_curr->pos);
7267 if(_handle_map_block_rkcl())
7268 goto mapblck_again;
7269 else
7270 goto mapblck_finish;
7271 }
7272
7273 mapblck_again:
7274 _c4dbgt("mapblck: again", 0);
7275 if(_finished_line())
7276 {
7277 _line_ended();
7278 _scan_line();
7279 if(_finished_file())
7280 {
7281 _c4dbgp("mapblck: file finished!");
7282 _end_map_blck();
7283 goto mapblck_finish;
7284 }
7286 }
7287 goto mapblck_start;
7288
7289 mapblck_finish:
7290 _c4dbgp("mapblck: finish");
7291}
7292
7293
7294//-----------------------------------------------------------------------------
7295
7296// return true if we should remain in map_block
7297template<class EventHandler>
7298bool ParseEngine<EventHandler>::_handle_map_block_qmrk()
7299{
7300 //
7301 // handle indentation
7302 //
7303 if(m_evt_handler->m_curr->at_line_beginning())
7304 {
7305 _c4dbgpf("mapblck[QMRK]: at line beginning. ind={} indref={}", m_evt_handler->m_curr->line_contents.indentation, m_evt_handler->m_curr->indref);
7306 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation != npos, m_evt_handler->m_curr->pos);
7307 if(m_evt_handler->m_curr->indentation_eq_extra())
7308 {
7309 _c4dbgpf("mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref + 1);
7310 _line_progressed(m_evt_handler->m_curr->indref + 1);
7311 if(!m_evt_handler->m_curr->line_contents.rem.len)
7312 return true; // go again
7313 }
7314 // indentation can be larger in QMRK state
7315 else if(m_evt_handler->m_curr->indentation_gt_extra())
7316 {
7317 _c4dbgp("mapblck[QMRK]: larger indentation !");
7318 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7319 if(!m_evt_handler->m_curr->line_contents.rem.len)
7320 return true; // go again
7321 }
7322 else
7323 {
7324 _c4dbgp("mapblck[QMRK]: smaller indentation!");
7325 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_lt_extra(), m_evt_handler->m_curr->pos);
7326 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.len > 0, m_evt_handler->m_curr->pos);
7327 if(m_evt_handler->m_curr->indentation_eq()
7328 // defend against docs or indentless seqs
7329 && m_evt_handler->m_curr->line_contents.rem.str[0] != '-')
7330 {
7331 _c4dbgp("mapblck[QMRK]: QMRK finished!");
7332 _handle_annotations_before_blck_key_scalar();
7333 m_evt_handler->set_key_scalar_plain_empty();
7334 addrem_flags(RKCL, QMRK);
7335 return true; // go again
7336 }
7337 else if(m_evt_handler->m_curr->indentation_lt())
7338 {
7339 _c4dbgp("mapblck[QMRK]: indentation pop!");
7340 _handle_indentation_pop_from_block_map();
7341 _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
7342 if(has_all(RMAP|RBLCK))
7343 {
7344 _c4dbgp("mapblck[QMRK]: still mapblck!");
7345 return true; // go again
7346 }
7347 else
7348 {
7349 _c4dbgp("mapblck[QMRK]: no longer mapblck!");
7350 return false; // finish mapblck
7351 }
7352 }
7353 }
7354 }
7355 //
7356 // now handle the tokens
7357 //
7358 _c4assert(m_evt_handler->m_curr->line_contents.rem.len);
7359 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7360 const size_t startline = m_evt_handler->m_curr->pos.line;
7361 const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
7362 _c4dbgpf("mapblck[QMRK]: '{}'", first);
7363 ScannedScalar sc;
7364 if(first == '\'')
7365 {
7366 _c4dbgp("mapblck[QMRK]: scanning single-quoted scalar");
7367 sc = _scan_scalar_squot();
7368 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
7369 addrem_flags(RKCL, QMRK);
7370 if(!_maybe_scan_following_colon())
7371 {
7372 _c4dbgp("mapblck[QMRK]: set as key");
7373 _handle_annotations_before_blck_key_scalar();
7374 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7375 }
7376 else
7377 {
7378 _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7379 _handle_annotations_before_start_mapblck_as_key();
7380 m_evt_handler->begin_map_key_block();
7381 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7382 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
7383 _maybe_skip_whitespace_tokens();
7384 _set_indentation(startindent);
7385 // keep the child state on RVAL
7386 addrem_flags(RVAL, RKCL);
7387 }
7388 }
7389 else if(first == '"')
7390 {
7391 _c4dbgp("mapblck[QMRK]: scanning double-quoted scalar");
7392 sc = _scan_scalar_dquot();
7393 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
7394 addrem_flags(RKCL, QMRK);
7395 if(!_maybe_scan_following_colon())
7396 {
7397 _c4dbgp("mapblck[QMRK]: set as key");
7398 _handle_annotations_before_blck_key_scalar();
7399 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7400 }
7401 else
7402 {
7403 _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7404 _handle_annotations_before_start_mapblck_as_key();
7405 m_evt_handler->begin_map_key_block();
7406 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7407 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
7408 _maybe_skip_whitespace_tokens();
7409 _set_indentation(startindent);
7410 // keep the child state on RVAL
7411 addrem_flags(RVAL, RKCL);
7412 }
7413 }
7414 else if(first == '|')
7415 {
7416 _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
7417 ScannedBlock sb;
7418 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7419 csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb); // KEY!
7420 _handle_annotations_before_blck_key_scalar();
7421 m_evt_handler->set_key_scalar_literal(maybe_filtered);
7422 addrem_flags(RKCL, QMRK);
7423 }
7424 else if(first == '>')
7425 {
7426 _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
7427 ScannedBlock sb;
7428 _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
7429 csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb); // KEY!
7430 _handle_annotations_before_blck_key_scalar();
7431 m_evt_handler->set_key_scalar_folded(maybe_filtered);
7432 addrem_flags(RKCL, QMRK);
7433 }
7434 else if(_scan_scalar_plain_map_blck(&sc))
7435 {
7436 _c4dbgp("mapblck[QMRK]: plain scalar");
7437 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
7438 addrem_flags(RKCL, QMRK);
7439 if(!_maybe_scan_following_colon())
7440 {
7441 _c4dbgp("mapblck[QMRK]: set as key");
7442 _handle_annotations_before_blck_key_scalar();
7443 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7444 }
7445 else
7446 {
7447 _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
7448 _handle_annotations_before_start_mapblck_as_key();
7449 m_evt_handler->begin_map_key_block();
7450 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7451 m_evt_handler->set_key_scalar_plain(maybe_filtered);
7452 _maybe_skip_whitespace_tokens();
7453 _set_indentation(startindent);
7454 // keep the child state on RVAL
7455 addrem_flags(RVAL, RKCL);
7456 }
7457 }
7458 else if(first == ':')
7459 {
7460 _c4dbgp("mapblck[QMRK]: start new block map as key (!), empty key");
7461 addrem_flags(RKCL, QMRK);
7462 _handle_annotations_before_start_mapblck_as_key();
7463 m_evt_handler->begin_map_key_block();
7464 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7465 m_evt_handler->set_key_scalar_plain_empty();
7466 _line_progressed(1);
7467 _maybe_skip_whitespace_tokens();
7468 _set_indentation(startindent);
7469 // keep the child state on RVAL
7470 addrem_flags(RVAL, RKCL);
7471 }
7472 else if(first == '*')
7473 {
7474 csubstr ref = _scan_ref_map();
7475 _c4dbgpf("mapblck[QMRK]: key ref! {}", _prs(ref));
7476 addrem_flags(RKCL, QMRK);
7477 if(!_maybe_scan_following_colon())
7478 {
7479 _c4dbgp("mapblck[QMRK]: set ref as key");
7480 _handle_keyref(ref);
7481 }
7482 else
7483 {
7484 _c4dbgp("mapblck[QMRK]: start new block map as key (!), set ref as key");
7485 _handle_annotations_before_start_mapblck_as_key();
7486 m_evt_handler->begin_map_key_block();
7487 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
7488 _handle_keyref(ref);
7489 _set_indentation(startindent);
7490 // keep the child state on RVAL
7491 addrem_flags(RVAL, RKCL|QMRK);
7492 }
7493 _maybe_skip_whitespace_tokens();
7494 }
7495 else if(first == '&')
7496 {
7497 csubstr anchor = _scan_anchor();
7498 _c4dbgpf("mapblck[QMRK]: key anchor! {}", _prs(anchor));
7499 _add_annotation(&m_pending_anchors, anchor, startindent, startline);
7500 }
7501 else if(first == '!')
7502 {
7503 csubstr tag = _scan_tag();
7504 _c4dbgpf("mapblck[QMRK]: key tag! {}", _prs(tag));
7505 _add_annotation(&m_pending_tags, tag, startindent, startline);
7506 }
7507 else if(first == '-')
7508 {
7509 _c4dbgp("mapblck[QMRK]: maybe seq or doc?");
7510 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7511 {
7512 _c4dbgp("mapblck[QMRK]: start child seqblck (!)");
7513 addrem_flags(RKCL, QMRK);
7514 _handle_annotations_before_blck_key_scalar();
7515 m_evt_handler->begin_seq_key_block();
7516 addrem_flags(RVAL|RSEQ, RMAP|RKCL);
7517 _set_indentation(startindent);
7518 _line_progressed(1);
7519 }
7520 else
7521 {
7522 _c4dbgp("mapblck[QMRK]: end+start doc");
7523 _c4assert(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem));
7524 _start_doc_suddenly();
7525 _line_progressed(3);
7526 }
7527 _maybe_skip_whitespace_tokens();
7528 return false; // finish mapblck
7529 }
7530 else if(first == '[')
7531 {
7532 _c4dbgp("mapblck[QMRK]: start child seqflow (!)");
7533 addrem_flags(RKCL, QMRK);
7534 _handle_annotations_before_blck_key_scalar();
7535 m_evt_handler->begin_seq_key_flow();
7536 addrem_flags(RVAL|RSEQ|RFLOW, RMAP|RKCL|RBLCK);
7537 _set_indentation(m_evt_handler->m_parent->indref + 1);
7538 _line_progressed(1);
7539 return false; // finish mapblck
7540 }
7541 else if(first == '{')
7542 {
7543 _c4dbgp("mapblck[QMRK]: start child mapflow (!)");
7544 addrem_flags(RKCL, QMRK);
7545 _handle_annotations_before_blck_key_scalar();
7546 m_evt_handler->begin_map_key_flow();
7547 addrem_flags(RKEY|RFLOW, RVAL|RKCL|RBLCK);
7548 _set_indentation(m_evt_handler->m_parent->indref + 1);
7549 _line_progressed(1);
7550 return false; // finish mapblck
7551 }
7552 else if(first == '?')
7553 {
7554 _c4dbgpf("mapblck[QMRK]: another QMRK '?'. ind={} indref={}", startindent, m_evt_handler->m_curr->indref);
7555 _RYML_ASSERT_PARSE_(callbacks(), startindent > m_evt_handler->m_curr->indref, m_evt_handler->m_curr->pos);
7556 _c4dbgp("mapblck[QMRK]: ? indent gt - start child mapblck (!)");
7557 addrem_flags(RKCL, QMRK);
7558 _handle_annotations_before_blck_key_scalar();
7559 m_evt_handler->begin_map_key_block();
7560 addrem_flags(QMRK, RKCL);
7561 _set_indentation(startindent);
7562 // indentation_lt() should be handled elsewhere
7563 _line_progressed(1);
7564 _maybe_skipchars(' ');
7565 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7566 {
7567 _c4dbgp("mapblck[RVAL]: seqblck starts after ?");
7568 addrem_flags(RKCL, QMRK);
7569 m_evt_handler->begin_seq_key_block();
7570 addrem_flags(RSEQ|RVAL, RMAP|RKCL);
7571 _save_indentation();
7572 _line_progressed(1);
7573 _maybe_skipchars(' ');
7574 return false;
7575 }
7576 }
7577 else
7578 {
7579 _c4err("parse error");
7580 }
7581 return true; // continue in mapblck
7582}
7583
7584
7585//-----------------------------------------------------------------------------
7586
7587// return true if we should remain in map_block
7588template<class EventHandler>
7589bool ParseEngine<EventHandler>::_handle_map_block_rkcl()
7590{
7591 //
7592 // handle indentation
7593 //
7594 if(m_evt_handler->m_curr->at_line_beginning())
7595 {
7596 if(m_evt_handler->m_curr->indentation_eq())
7597 {
7598 _c4dbgpf("mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
7599 _line_progressed(m_evt_handler->m_curr->indref);
7600 if(!m_evt_handler->m_curr->line_contents.rem.len)
7601 return true; // continue in mapblck
7602 }
7603 else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
7604 {
7605 _c4err("invalid indentation");
7606 }
7607 }
7608 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7609 _c4dbgpf("mapblck[RKCL]: '{}'", first);
7610 if(first == ':')
7611 {
7612 _c4dbgp("mapblck[RKCL]: found the colon");
7613 _line_progressed(1);
7614 _maybe_skipchars(' ');
7615 #if defined(__GNUC__) && ( \
7616 ((__GNUC__ >= 12) && ((C4_WORDSIZE == 4) || defined(C4_CPU_S390_X) || defined(C4_CPU_PPC64))) \
7617 || \
7618 (__GNUC__ == 16 && defined(C4_CPU_X86_64)))
7619 C4_DONT_OPTIMIZE(m_evt_handler->m_curr->line_contents.rem);
7620 #endif
7621 // sequence is valid after the RKCL ':'
7622 if(!_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7623 {
7624 addrem_flags(RVAL, RKCL);
7625 return true; // continue in mapblck
7626 }
7627 else
7628 {
7629 _c4dbgp("mapblck[RKCL]: start val seqblck");
7630 addrem_flags(RNXT, RKCL);
7631 m_evt_handler->begin_seq_val_block();
7632 addrem_flags(RSEQ|RVAL, RMAP|RNXT);
7633 _save_indentation();
7634 _line_progressed(1);
7635 _maybe_skipchars(' ');
7636 return false; // finish mapblck
7637 }
7638 }
7639 else if(first == '?')
7640 {
7641 _c4dbgp("mapblck[RKCL]: got '?'. val was empty");
7642 m_evt_handler->set_val_scalar_plain_empty();
7643 m_evt_handler->add_sibling();
7644 addrem_flags(QMRK, RKCL);
7645 _line_progressed(1);
7646 _maybe_skipchars(' ');
7647 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7648 {
7649 _c4dbgp("mapblck[RKCL]: seqblck starts after ?");
7650 addrem_flags(RKCL, QMRK);
7651 m_evt_handler->begin_seq_key_block();
7652 addrem_flags(RSEQ|RVAL, RMAP|QMRK);
7653 _save_indentation();
7654 _line_progressed(1);
7655 _maybe_skipchars(' ');
7656 return false;
7657 }
7658 }
7659 else if(first == '-')
7660 {
7661 if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7662 {
7663 _c4dbgp("mapblck[RKCL]: end+start doc");
7664 _RYML_CHECK_PARSE_(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem), m_evt_handler->m_curr->pos);
7665 _start_doc_suddenly();
7666 _line_progressed(3);
7667 _maybe_skip_whitespace_tokens();
7668 return false; // finish mapblck
7669 }
7670 else
7671 {
7672 _c4err("parse error"); // LCOV_EXCL_LINE
7673 }
7674 }
7675 else if(first == '.')
7676 {
7677 _c4dbgp("mapblck[RKCL]: maybe end doc?");
7678 csubstr rs = m_evt_handler->m_curr->line_contents.rem.sub(1);
7679 if(rs == ".." || rs.begins_with(".. "))
7680 {
7681 _c4dbgp("mapblck[RKCL]: end+start doc");
7682 _end_doc_suddenly();
7683 _line_progressed(3);
7684 _maybe_skip_whitespace_tokens();
7685 _check_doc_end_tokens();
7686 return false; // finish mapblck
7687 }
7688 else
7689 {
7690 _c4err("parse error"); // LCOV_EXCL_LINE
7691 }
7692 }
7693 else/* if(m_was_inside_qmrk) */
7694 {
7695 _c4dbgp("mapblck[RKCL]: missing :");
7696 if(C4_UNLIKELY(!m_evt_handler->m_curr->indentation_eq()))
7697 _c4err("parse error"); // LCOV_EXCL_LINE
7698 m_evt_handler->set_val_scalar_plain_empty();
7699 m_evt_handler->add_sibling();
7700 addrem_flags(RKEY, RKCL);
7701 }
7702 return true;
7703}
7704
7705
7706//-----------------------------------------------------------------------------
7707
7708template<class EventHandler>
7709void ParseEngine<EventHandler>::_handle_unk_json()
7710{
7711 _c4dbgpf("handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7712
7713 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP), m_evt_handler->m_curr->pos);
7714 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RTOP), m_evt_handler->m_curr->pos);
7715
7716 _maybe_skip_comment();
7717 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
7718 if(!rem.len)
7719 return;
7720
7721 size_t pos = rem.first_not_of(" \t");
7722 if(pos)
7723 {
7724 pos = pos != npos ? pos : rem.len;
7725 _c4dbgpf("skipping indentation of {}", pos);
7726 _line_progressed(pos);
7727 rem = m_evt_handler->m_curr->line_contents.rem;
7728 if(!rem.len)
7729 return;
7730 _c4dbgpf("rem is now {}", _prs(rem));
7731 }
7732
7733 if(rem.begins_with('['))
7734 {
7735 _c4dbgp("it's a seq");
7736 _check_trailing_doc_token();
7737 _maybe_begin_doc();
7738 m_evt_handler->begin_seq_val_flow();
7739 addrem_flags(RSEQ|RFLOW|RVAL, RUNK|RTOP|RDOC);
7740 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7741 m_doc_empty = false;
7742 _line_progressed(1);
7743 }
7744 else if(rem.begins_with('{'))
7745 {
7746 _c4dbgp("it's a map");
7747 _check_trailing_doc_token();
7748 _maybe_begin_doc();
7749 m_evt_handler->begin_map_val_flow();
7750 addrem_flags(RMAP|RFLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7751 m_doc_empty = false;
7752 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
7753 _line_progressed(1);
7754 }
7755 else if(_handle_bom())
7756 {
7757 _c4dbgp("byte order mark");
7758 }
7759 else
7760 {
7761 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
7762 _maybe_skip_whitespace_tokens();
7763 csubstr s = m_evt_handler->m_curr->line_contents.rem;
7764 if(!s.len)
7765 return;
7766 const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
7767 const char first = s.str[0];
7768 ScannedScalar sc;
7769 if(first == '"')
7770 {
7771 _c4dbgp("runk_json: scanning double-quoted scalar");
7772 _check_trailing_doc_token();
7773 _maybe_begin_doc();
7774 add_flags(RDOC);
7775 m_doc_empty = false;
7776 sc = _scan_scalar_dquot();
7777 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
7778 if(!_maybe_scan_following_colon())
7779 {
7780 _c4dbgp("runk_json: set as val");
7781 _handle_annotations_before_blck_val_scalar();
7782 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
7783 }
7784 else
7785 {
7786 _c4err("parse error");
7787 }
7788 }
7789 else if(_scan_scalar_plain_unk(&sc))
7790 {
7791 _c4dbgp("runk_json: got a plain scalar");
7792 _check_trailing_doc_token();
7793 _maybe_begin_doc();
7794 add_flags(RDOC);
7795 m_doc_empty = false;
7796 if(!_maybe_scan_following_colon())
7797 {
7798 _c4dbgp("runk_json: set as val");
7799 _handle_annotations_before_blck_val_scalar();
7800 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
7801 m_evt_handler->set_val_scalar_plain(maybe_filtered);
7802 }
7803 else
7804 {
7805 _c4err("parse error"); // LCOV_EXCL_LINE
7806 }
7807 }
7808 else
7809 {
7810 _c4err("parse error"); // LCOV_EXCL_LINE
7811 }
7812 }
7813}
7814
7815
7816//-----------------------------------------------------------------------------
7817
7818template<class EventHandler>
7819void ParseEngine<EventHandler>::_handle_unk()
7820{
7821 _c4dbgpf("handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
7822
7823 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP), m_evt_handler->m_curr->pos);
7824 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RTOP), m_evt_handler->m_curr->pos);
7825
7826 _maybe_skipchars(' ');
7827 _maybe_skip_comment();
7828
7829 if(!m_evt_handler->m_curr->line_contents.rem.len)
7830 return;
7831
7832 _c4dbgpf("runk: rem is now {}", _prs(m_evt_handler->m_curr->line_contents.rem));
7833
7834 if(m_evt_handler->m_curr->line_contents.indentation == 0u && (m_evt_handler->m_curr->at_line_beginning() || (m_bom_len && (m_evt_handler->m_curr->pos.line == m_bom_line))))
7835 {
7836 _c4dbgpf("runk: rtop: zero indent + at line begin. offset={}", m_evt_handler->m_curr->pos.offset);
7837 _c4dbgp("runk: check BOM");
7838 if(_handle_bom())
7839 {
7840 m_bom_line = m_evt_handler->m_curr->pos.line;
7841 _c4dbgpf("runk: byte order mark! line={} offset={}", m_bom_line, m_evt_handler->m_curr->pos.offset);
7842 return;
7843 }
7844 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7845 _c4dbgpf("runk: rtop: first={}", _c4prc(first));
7846 if(first == '-')
7847 {
7848 _c4dbgp("runk: rtop: suspecting doc");
7849 if(_is_doc_begin_token(m_evt_handler->m_curr->line_contents.rem))
7850 {
7851 _c4dbgp("runk: rtop: begin doc");
7852 _maybe_end_doc();
7853 _begin2_doc_expl();
7854 _set_indentation(0);
7855 addrem_flags(RDOC|RUNK, NDOC);
7856 _line_progressed(3u);
7857 _maybe_skip_whitespace_tokens();
7858 return;
7859 }
7860 }
7861 else if(first == '.')
7862 {
7863 _c4dbgp("runk: rtop: suspecting doc end");
7864 if(_is_doc_end_token(m_evt_handler->m_curr->line_contents.rem))
7865 {
7866 _c4dbgp("runk: rtop: end doc");
7867 if(has_any(RDOC))
7868 {
7869 _end2_doc_expl();
7870 }
7871 else
7872 {
7873 _c4dbgp("runk: rtop: ignore end doc");
7874 }
7875 addrem_flags(NDOC|RUNK, RDOC);
7876 _line_progressed(3u);
7877 _maybe_skip_whitespace_tokens();
7878 _check_doc_end_tokens();
7879 return;
7880 }
7881 }
7882 else if(first == '%')
7883 {
7884 _c4dbgpf("directive: {}", m_evt_handler->m_curr->line_contents.rem);
7885 if(C4_UNLIKELY(has_any(RDOC) || (!m_doc_empty && has_none(NDOC))))
7886 _c4err("need document footer before directives");
7887 _handle_directive(m_evt_handler->m_curr->line_contents.rem);
7888 return;
7889 }
7890 }
7891
7892 /* no else-if! */
7893
7894 size_t startindent = m_evt_handler->m_curr->line_contents.indentation;
7895 size_t remindent = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
7896 if(m_bom_len)
7897 {
7898 _c4dbgpf("runk: prev BOMlen={}", m_bom_len);
7899 if(m_evt_handler->m_curr->pos.line == m_bom_line)
7900 {
7901 _c4dbgpf("runk: BOM remindent={} offset={}", remindent, m_evt_handler->m_curr->pos.offset);
7902 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, remindent >= m_bom_len, m_evt_handler->m_curr->pos);
7903 remindent -= m_bom_len;
7904 }
7905 else
7906 {
7907 m_bom_len = 0;
7908 }
7909 }
7910
7911 size_t startcol = _handle_block_skip_leading_whitespace();
7912 const char first = m_evt_handler->m_curr->line_contents.rem.str[0];
7913
7914 if(first == '[')
7915 {
7916 _c4dbgp("runk: flow seq?");
7917 _handle_unk_begin_doc();
7918 if(C4_LIKELY( ! _annotations_require_key_container()))
7919 {
7920 _c4dbgp("runk: it's a seq, flow");
7921 _handle_annotations_before_blck_val_scalar();
7922 m_evt_handler->begin_seq_val_flow();
7923 addrem_flags(RSEQ|RFLOW|RVAL, RUNK|RTOP|RDOC);
7924 _set_indentation(0);
7925 }
7926 else
7927 {
7928 _c4dbgp("runk: start new block map, set flow seq as key (!)");
7929 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7930 m_evt_handler->begin_map_val_block();
7931 addrem_flags(RMAP|RBLCK|RKEY, RUNK|RTOP|RDOC);
7932 _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7933 m_evt_handler->begin_seq_key_flow();
7934 addrem_flags(RSEQ|RFLOW|RVAL, RMAP|RBLCK|RKEY);
7935 _set_indentation(0);
7936 }
7937 _line_progressed(1);
7938 }
7939 else if(first == '{')
7940 {
7941 _c4dbgp("runk: flow map?");
7942 _handle_unk_begin_doc();
7943 if(C4_LIKELY( ! _annotations_require_key_container()))
7944 {
7945 _c4dbgp("runk: it's a map, flow");
7946 _handle_annotations_before_blck_val_scalar();
7947 m_evt_handler->begin_map_val_flow();
7948 addrem_flags(RMAP|RFLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
7949 _set_indentation(0);
7950 }
7951 else
7952 {
7953 _c4dbgp("runk: start new block map, set flow map as key (!)");
7954 _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
7955 m_evt_handler->begin_map_val_block();
7956 addrem_flags(RMAP|RBLCK|RKEY, RUNK|RTOP|RDOC);
7957 _handle_annotations_and_indentation_after_start_mapblck(remindent, m_evt_handler->m_curr->pos.line);
7958 m_evt_handler->begin_map_key_flow();
7959 addrem_flags(RMAP|RFLOW, RBLCK);
7960 _set_indentation(0);
7961 }
7962 _line_progressed(1);
7963 }
7964 else if(first == '-' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7965 {
7966 _c4dbgp("runk: it's a seq, block");
7967 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7968 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col, /*skip_annotations*/false);
7969 _handle_unk_begin_doc();
7970 _handle_annotations_before_blck_val_scalar();
7971 m_evt_handler->begin_seq_val_block();
7972 addrem_flags(RSEQ|RBLCK|RVAL, RNXT|RTOP|RUNK|RDOC);
7973 _set_indentation(startindent);
7974 _line_progressed(1);
7975 _maybe_skipchars(' ');
7976 }
7977 else if(first == '?' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
7978 {
7979 _c4dbgp("runk: it's a map + this key is complex");
7980 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
7981 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col, /*skip_annotations*/false);
7982 _handle_block_check_leading_tabs(startcol);
7983 _handle_unk_begin_doc();
7984 _handle_annotations_before_blck_val_scalar();
7985 m_evt_handler->begin_map_val_block();
7986 addrem_flags(RMAP|RBLCK|QMRK, RKEY|RVAL|RTOP|RUNK|RDOC);
7987 _set_indentation(startindent);
7988 _line_progressed(1);
7989 _maybe_skipchars(' ');
7990 if(_is_blck_seq_token_maybe(m_evt_handler->m_curr->line_contents.rem))
7991 {
7992 _c4dbgp("runk: seqblck key starts after ?");
7993 addrem_flags(RKCL, QMRK);
7994 m_evt_handler->begin_seq_key_block();
7995 addrem_flags(RSEQ|RVAL, RMAP|RKCL);
7996 _save_indentation();
7997 _line_progressed(1);
7998 _maybe_skipchars(' ');
7999 }
8000 }
8001 else if(first == ':' && _is_blck_token(m_evt_handler->m_curr->line_contents.rem))
8002 {
8003 if(m_doc_empty || (m_pending_anchors.num_entries | m_pending_tags.num_entries))
8004 {
8005 _c4dbgp("runk: it's a map with an empty key");
8006 if(C4_UNLIKELY(!m_evt_handler->m_curr->at_first_token()))
8007 startindent = _handle_unk_check_left_tokens(startindent, m_evt_handler->m_curr->pos.col);
8008 _handle_block_check_leading_tabs(startcol);
8009 const size_t startline = m_evt_handler->m_curr->pos.line; // save
8010 _handle_unk_begin_doc();
8011 _handle_annotations_before_start_mapblck(startline);
8012 _handle_colon();
8013 m_evt_handler->begin_map_val_block();
8014 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8015 m_evt_handler->set_key_scalar_plain_empty();
8016 _set_indentation(startindent);
8017 }
8018 else
8019 {
8020 _c4err("block colon cannot occur on a new line unless ? is used");
8021 }
8022 addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
8023 _line_progressed(1);
8024 _maybe_skip_whitespace_tokens();
8025 }
8026 else if(first == '&')
8027 {
8028 csubstr anchor = _scan_anchor();
8029 _c4dbgpf("anchor! {}", _prs(anchor));
8030 const size_t line = m_evt_handler->m_curr->pos.line;
8031 _handle_unk_begin_doc();
8032 _add_annotation(&m_pending_anchors, anchor, remindent, line);
8033 _set_indentation(0);
8034 }
8035 else if(first == '*')
8036 {
8037 csubstr ref = _scan_ref_map();
8038 _c4dbgpf("runk: ref! {}", _prs(ref));
8039 _handle_unk_begin_doc();
8040 if(!_maybe_scan_following_colon())
8041 {
8042 _c4dbgp("runk: set val ref");
8043 _handle_valref(ref);
8044 }
8045 else
8046 {
8047 _c4dbgp("runk: start new block map, set ref as key");
8048 _handle_block_check_leading_tabs(startcol);
8049 const size_t startline = m_evt_handler->m_curr->pos.line; // save
8050 _handle_annotations_before_start_mapblck(startline);
8051 m_evt_handler->begin_map_val_block();
8052 _handle_keyref(ref);
8053 _maybe_skip_whitespace_tokens();
8054 _set_indentation(0);
8055 addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
8056 }
8057 }
8058 else if(first == '!')
8059 {
8060 csubstr tag_orig;
8061 csubstr tag = _scan_tag(&tag_orig);
8062 _c4dbgpf("runk: val tag! {}", _prs(tag));
8063 // we need to buffer the tags, as there may be two
8064 // consecutive tags in here
8065 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(m_evt_handler->m_curr->line_contents.rem);
8066 const size_t line = m_evt_handler->m_curr->pos.line;
8067 _add_annotation(&m_pending_tags, tag, indentation, line, tag_orig);
8068 }
8069 else
8070 {
8071 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
8072 const size_t startscalar = _handle_block_get_whitespace_mark();
8073 const size_t startline = m_evt_handler->m_curr->pos.line; // save
8074 auto beginmap = [&](size_t startindent_){
8075 if(C4_UNLIKELY(m_evt_handler->m_curr->pos.line > startline))
8076 _c4err("multiline scalars cannot be used as implicit keys");
8077 _handle_block_check_leading_tabs(startcol, startscalar);
8078 _handle_annotations_before_start_mapblck(startline);
8079 _handle_colon();
8080 m_evt_handler->begin_map_val_block();
8081 _handle_annotations_and_indentation_after_start_mapblck(startindent_, startline);
8082 };
8083 auto after_beginmap = [&](size_t startindent_){
8084 _maybe_skip_whitespace_tokens();
8085 _set_indentation(startindent_);
8086 addrem_flags(RMAP|RBLCK|RVAL, RTOP|RUNK|RDOC);
8087 };
8088 if(first == '|')
8089 {
8090 _c4dbgp("runk: block-literal scalar");
8091 _handle_unk_begin_doc();
8092 ScannedBlock sb;
8093 _scan_block(&sb, startindent);
8094 _handle_annotations_before_blck_val_scalar();
8095 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8096 m_evt_handler->set_val_scalar_literal(maybe_filtered);
8097 }
8098 else if(first == '>')
8099 {
8100 _c4dbgp("runk: block-folded scalar");
8101 _handle_unk_begin_doc();
8102 ScannedBlock sb;
8103 _scan_block(&sb, startindent);
8104 _handle_annotations_before_blck_val_scalar();
8105 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8106 m_evt_handler->set_val_scalar_folded(maybe_filtered);
8107 }
8108 else if(first == '\'')
8109 {
8110 _c4dbgp("runk: single-quoted scalar");
8111 _handle_unk_begin_doc();
8112 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8113 size_t col = m_evt_handler->m_curr->pos.col;
8114 ScannedScalar sc = _scan_scalar_squot();
8115 if(!_maybe_scan_following_colon())
8116 {
8117 _c4dbgp("runk: set as val");
8118 _handle_annotations_before_blck_val_scalar();
8119 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8120 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8121 }
8122 else
8123 {
8124 _c4dbgp("runk: start new block map, set single-quoted scalar as key");
8125 if(C4_UNLIKELY(m_evt_handler->m_curr->pos.line > startline))
8126 _c4err("multiline key");
8127 if(!firsttoken)
8128 startindent = _handle_unk_check_left_tokens(startindent, col);
8129 beginmap(startindent);
8130 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8131 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8132 after_beginmap(startindent);
8133 }
8134 }
8135 else if(first == '"')
8136 {
8137 _c4dbgp("runk: double-quoted scalar");
8138 _handle_unk_begin_doc();
8139 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8140 size_t col = m_evt_handler->m_curr->pos.col;
8141 ScannedScalar sc = _scan_scalar_dquot();
8142 if(!_maybe_scan_following_colon())
8143 {
8144 _c4dbgp("runk: set as val");
8145 _handle_annotations_before_blck_val_scalar();
8146 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8147 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8148 }
8149 else
8150 {
8151 _c4dbgp("runk: start new block map, set double-quoted scalar as key");
8152 if(C4_UNLIKELY(m_evt_handler->m_curr->pos.line > startline))
8153 _c4err("multiline key");
8154 if(!firsttoken)
8155 startindent = _handle_unk_check_left_tokens(startindent, col);
8156 beginmap(startindent);
8157 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8158 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8159 after_beginmap(startindent);
8160 }
8161 }
8162 else
8163 {
8164 bool firsttoken = m_evt_handler->m_curr->at_first_token();
8165 size_t col = m_evt_handler->m_curr->pos.col;
8166 ScannedScalar sc;
8167 if(_scan_scalar_plain_unk(&sc))
8168 {
8169 _c4dbgp("runk: plain scalar");
8170 _handle_unk_begin_doc();
8171 if(!_maybe_scan_following_colon())
8172 {
8173 _c4dbgp("runk: set as val");
8174 _handle_annotations_before_blck_val_scalar();
8175 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8176 m_evt_handler->set_val_scalar_plain(maybe_filtered);
8177 }
8178 else
8179 {
8180 _c4dbgp("runk: start new block map, set plain scalar as key");
8181 // there is already a check to multiline inside
8182 // _scan_scalar_plain_unk(), so we don't need to
8183 // throw an error here. but let's be safe by
8184 // asserting the assumption:
8185 _c4assert(m_evt_handler->m_curr->pos.line == startline);
8186 if(!firsttoken)
8187 startindent = _handle_unk_check_left_tokens(startindent, col);
8188 beginmap(startindent);
8189 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8190 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8191 after_beginmap(startindent);
8192 }
8193 }
8194 else
8195 {
8196 _c4err("parse error"); // LCOV_EXCL_LINE
8197 }
8198 }
8199 }
8200
8201 if(m_bom_len && has_none(RUNK))
8202 {
8203 _c4dbgpf("runk: BOMlen={} BOMline={} now={} at_end={}", m_bom_len, m_bom_line, m_evt_handler->m_curr->pos.line, !m_evt_handler->m_curr->line_contents.rem.len);
8204 if(m_evt_handler->m_curr->pos.line != m_bom_line || !m_evt_handler->m_curr->line_contents.rem.len)
8205 {
8206 _c4dbgp("runk: clear BOMlen");
8207 m_bom_len = 0;
8208 }
8209 }
8210}
8211
8212template<class EventHandler>
8213void ParseEngine<EventHandler>::_handle_unk_begin_doc()
8214{
8215 _c4dbgp("runk: begin doc");
8216 _check_trailing_doc_token();
8217 _maybe_begin_doc();
8218 add_flags(RDOC);
8219 m_doc_empty = false;
8220}
8221
8222template<class EventHandler>
8223size_t ParseEngine<EventHandler>::_handle_unk_check_left_tokens(size_t realindent, size_t col, bool skip_annotations)
8224{
8225 _c4assert(col >= 1);
8226 col -= 1;
8227 _c4assert(col >= m_bom_len);
8228 csubstr s = m_evt_handler->m_curr->line_contents.full.range(m_bom_len, col);
8229 size_t pos = 0;
8230 _c4dbgpf("runk: check left tokens: s={}", _prs(s, /*escape*/true));
8231 if(skip_annotations)
8232 {
8233 _handle_unk_get_first_non_pending_token_pos(s, &realindent, &pos);
8234 _c4dbgpf("runk: skip annotations: realindent={} pos={}", realindent, pos);
8235 }
8236 size_t firstns = s.first_not_of(' ', pos);
8237 if(firstns == npos)
8238 firstns = s.len;
8239 _c4dbgpf("runk: check left tokens:\n"
8240 " tokens={} skipped={}\n"
8241 " bomlen={} first={} col={}\n"
8242 " (bomlen+first)={} vs {}=col\n"
8243 " startindent={} lineindent={}"
8244 , _prs(s, /*escape*/true), _prs(s.sub(firstns), /*escape*/true)
8245 , m_bom_len, firstns, col
8246 , m_bom_len+firstns, col,
8247 realindent, m_evt_handler->m_curr->line_contents.indentation);
8248 if(m_bom_len + firstns != col)
8249 _c4err("parse error");
8250 if(!skip_annotations)
8251 realindent = firstns;
8252 _c4dbgpf("runk: pos={} firstns={} -> realindent={}", pos, firstns, realindent);
8253 return realindent;
8254}
8255
8256
8257/** skip annotations which are pending on the same line */
8258template<class EventHandler>
8259void ParseEngine<EventHandler>::_handle_unk_get_first_non_pending_token_pos(csubstr s, size_t *indent, size_t *first_non_token_pos)
8260{
8261 csubstr first, second;
8262 uint32_t total = _get_annotations_same_line(s, &first, &second);
8263 _c4dbgpf("runk: before skip: {}", _prs(s, true));
8264 size_t pos = s.first_not_of(" \t");
8265 if(pos == npos)
8266 pos = s.len;
8267 if(!total)
8268 {
8269 *indent = *first_non_token_pos = pos;
8270 return;
8271 }
8272 _c4assert(!s.sub(pos).begins_with_any(" \t"));
8273 _c4dbgpf("runk: after skip leading {} whitespace: {}", pos, _prs(s.sub(pos), true));
8274 _c4dbgpf("runk: first annotation: {}", first);
8275 _c4assert(first.len);
8276 _c4assert(first.is_sub(s));
8277 _c4assert(first.is_sub(s.sub(pos)));
8278 _c4assert(s.sub(pos).begins_with(first));
8279 *indent = pos;
8280 pos += first.len;
8281 _c4dbgpf("runk: after skip first annotation: pos={} {}", pos, _prs(s.sub(pos), true));
8282 if(total > 1)
8283 {
8284 _c4dbgpf("runk: second annotation: {}", second);
8285 _c4assert(total == 2);
8286 _c4assert(second.len);
8287 _c4assert(second.is_sub(s));
8288 _c4assert(second.is_sub(s.sub(pos)));
8289 csubstr spos = s.sub(pos);
8290 size_t more = spos.first_not_of(" \t");
8291 _c4assert(more != npos); // because the annotations are on the same line
8292 _c4dbgpf("runk: next nonspace: {}", pos + more);
8293 pos += more;
8294 _c4dbgpf("runk: after skip annotation whitespace: pos={} {}", pos, _prs(s.sub(pos), true));
8295 _c4assert(s.sub(pos).begins_with(second));
8296 pos += second.len;
8297 _c4dbgpf("runk: after skip annotation 2: pos={} {}", pos, _prs(s.sub(pos), true));
8298 }
8299 *first_non_token_pos = pos;
8300}
8301
8302
8303template<class EventHandler>
8304uint32_t ParseEngine<EventHandler>::_get_annotations_same_line(csubstr token_soup, csubstr *first_, csubstr *second_) const
8305{
8306 _c4assert(!m_evt_handler->m_curr->at_first_token());
8307 (void)token_soup;
8308 using EntryPtr = typename Annotation::Entry const* C4_RESTRICT;
8309 EntryPtr first = nullptr;
8310 EntryPtr second = nullptr;
8311 uint32_t total = (uint32_t)(m_pending_anchors.num_entries + m_pending_tags.num_entries);
8312 if(total)
8313 {
8314 _c4dbgpf("there are {} pending annotations: {} anchors + {} tags", total, m_pending_anchors.num_entries, m_pending_tags.num_entries);
8315 auto valid_if_same_line = [this](EntryPtr entry){
8316 _c4dbgpf("pending: {} indent={} line={} vs currline={}", _maybe_null_str(entry->str), entry->indentation, entry->line, m_evt_handler->m_curr->pos.line);
8317 return (entry->line == m_evt_handler->m_curr->pos.line) ? entry : nullptr;
8318 };
8319 // now select annotations only on the same line
8320 total = 0;
8321 for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8322 total += !!valid_if_same_line(&m_pending_anchors.annotations[i]);
8323 for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
8324 total += !!valid_if_same_line(&m_pending_tags.annotations[i]);
8325 _c4dbgpf("{} annotations on same line", total);
8326 _c4assert(total > 0); // because this function is only called
8327 // while not at the first token. That
8328 // means we must have same-line
8329 // annotations.
8330 auto get_first_on_same_line = [this](EntryPtr not_this_one){
8331 for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
8332 if(&m_pending_anchors.annotations[i] != not_this_one
8333 && m_pending_anchors.annotations[i].line == m_evt_handler->m_curr->pos.line)
8334 return &m_pending_anchors.annotations[i];
8335 for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
8336 if(&m_pending_tags.annotations[i] != not_this_one
8337 && m_pending_tags.annotations[i].line == m_evt_handler->m_curr->pos.line)
8338 return &m_pending_tags.annotations[i];
8339 C4_UNREACHABLE();
8340 return (EntryPtr)nullptr; // LCOV_EXCL_LINE
8341 };
8342 _c4assert(total >= 1);
8343 // assign to first
8344 first = get_first_on_same_line(nullptr);
8345 _c4assert(first);
8346 _c4dbgpf("first annotation: {} indent={} line={}", _maybe_null_str(first->str), first->indentation, first->line);
8347 if(total > 1)
8348 {
8349 _c4assert(total == 2);
8350 // assign to second
8351 second = get_first_on_same_line(first);
8352 _c4assert(second);
8353 _c4dbgpf("second annotation: {} indent={} line={}", _maybe_null_str(second->str), second->indentation, second->line);
8354 }
8355 auto extract_string = [&](EntryPtr e){
8356 // tags can be null when the arena ran out of space
8357 if(!e->str.str || e->str.begins_with_any("!<"))
8358 {
8359 csubstr tag = e->orig;
8360 _c4assert(tag.str);
8361 _c4assert(tag.len);
8362 _c4assert(tag.is_sub(token_soup));
8363 _c4dbgpf("tag: {} -> {}", _maybe_null_str(e->str), tag);
8364 return tag;
8365 }
8366 csubstr anchor = e->str;
8367 _c4assert(anchor.len);
8368 _c4assert(anchor.str);
8369 _c4assert(anchor.is_sub(token_soup));
8370 _c4assert(!anchor.begins_with('&'));
8371 _c4assert(anchor.str - token_soup.str > 0);
8372 // add back the anchor's &
8373 --anchor.str;
8374 ++anchor.len;
8375 _c4assert(anchor.begins_with('&'));
8376 _c4dbgpf("anchor: {} -> {}", e->str, anchor);
8377 return anchor;
8378 };
8379 *first_ = first ? extract_string(first) : nullptr;
8380 *second_ = second ? extract_string(second) : nullptr;
8381 if(total > 1 && (first_->str > second_->str))
8382 {
8383 csubstr tmp = *first_;
8384 *first_ = *second_;
8385 *second_ = tmp;
8386 _c4dbgpf("swap first and second: {} -> {}", *first_, *second_);
8387 }
8388 }
8389 return total;
8390}
8391
8392
8393//-----------------------------------------------------------------------------
8394
8395template<class EventHandler>
8396C4_COLD void ParseEngine<EventHandler>::_handle_usty()
8397{
8398 _c4dbgpf("handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
8399
8400 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_none(RBLCK|RFLOW), m_evt_handler->m_curr->pos);
8401
8402 #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
8403 if(has_any(RNXT))
8404 {
8405 _c4dbgp("usty[RNXT]: finishing!");
8406 _end_stream();
8407 }
8408 #endif
8409
8410 _maybe_skip_comment();
8411 csubstr rem = m_evt_handler->m_curr->line_contents.rem;
8412 if(!rem.len)
8413 return;
8414
8415 size_t pos = rem.first_not_of(" \t");
8416 if(pos)
8417 {
8418 pos = pos != npos ? pos : rem.len;
8419 _c4dbgpf("skipping indentation of {}", pos);
8420 _line_progressed(pos);
8421 rem = m_evt_handler->m_curr->line_contents.rem;
8422 if(!rem.len)
8423 return;
8424 _c4dbgpf("rem is now {}", _prs(rem));
8425 }
8426
8427 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, rem.len > 0, m_evt_handler->m_curr->pos);
8428 size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
8429 char first = rem.str[0];
8430 if(has_any(RSEQ)) // destination is a sequence
8431 {
8432 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(RMAP), m_evt_handler->m_curr->pos);
8433 _c4dbgpf("usty[RSEQ]: first='{}'", _c4prc(first));
8434 if(first == '[')
8435 {
8436 _c4dbgp("usty[RSEQ]: it's a flow seq. merging it");
8437 add_flags(RNXT);
8438 m_evt_handler->_push();
8439 addrem_flags(RFLOW|RVAL, RNXT|USTY);
8440 _set_indentation(startindent);
8441 _line_progressed(1);
8442 _maybe_skip_whitespace_tokens();
8443 }
8444 else if(first == '-' && _is_blck_token(rem))
8445 {
8446 _c4dbgp("usty[RSEQ]: it's a block seq. merging it");
8447 add_flags(RNXT);
8448 m_evt_handler->_push();
8449 addrem_flags(RBLCK|RVAL, RNXT|USTY);
8450 _set_indentation(startindent);
8451 _line_progressed(1);
8452 _maybe_skip_whitespace_tokens();
8453 }
8454 else
8455 {
8456 _c4err("can only parse a seq into an existing seq");
8457 }
8458 }
8459 else if(has_any(RMAP)) // destination is a map
8460 {
8461 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ), m_evt_handler->m_curr->pos);
8462 _c4dbgpf("usty[RMAP]: first='{}'", _c4prc(first));
8463 if(first == '{')
8464 {
8465 _c4dbgp("usty[RMAP]: it's a flow map. merging it");
8466 add_flags(RNXT);
8467 _handle_annotations_before_blck_val_scalar();
8468 m_evt_handler->_push();
8469 addrem_flags(RMAP|RFLOW|RKEY, RNXT|USTY);
8470 _set_indentation(startindent);
8471 _line_progressed(1);
8472 _maybe_skip_whitespace_tokens();
8473 }
8474 else if(first == '?' && _is_blck_token(rem))
8475 {
8476 _c4dbgp("usty[RMAP]: it's a block map + this key is complex");
8477 add_flags(RNXT);
8478 _handle_annotations_before_blck_val_scalar();
8479 m_evt_handler->_push();
8480 addrem_flags(RMAP|RBLCK|QMRK, RNXT|USTY);
8481 _save_indentation();
8482 _line_progressed(1);
8483 _maybe_skip_whitespace_tokens();
8484 }
8485 else if(first == ':' && _is_blck_token(rem))
8486 {
8487 _c4dbgp("usty[RMAP]: it's a map with an empty key");
8488 add_flags(RNXT);
8489 _handle_annotations_before_blck_val_scalar();
8490 m_evt_handler->_push();
8491 m_evt_handler->set_key_scalar_plain_empty();
8492 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8493 _save_indentation();
8494 _line_progressed(1);
8495 _maybe_skip_whitespace_tokens();
8496 }
8497 else if(rem.begins_with('&'))
8498 {
8499 csubstr anchor = _scan_anchor();
8500 _c4dbgpf("usty[RMAP]: anchor! {}", _prs(anchor));
8501 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8502 const size_t line = m_evt_handler->m_curr->pos.line;
8503 _add_annotation(&m_pending_anchors, anchor, indentation, line);
8504 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8505 }
8506 else if(first == '*')
8507 {
8508 csubstr ref = _scan_ref_map();
8509 _c4dbgpf("usty[RMAP]: ref! {}", _prs(ref));
8510 if(!_maybe_scan_following_colon())
8511 {
8512 _c4err("cannot read a VAL to a map");
8513 }
8514 else
8515 {
8516 _c4dbgp("usty[RMAP]: start new block map, set ref as key");
8517 const size_t startline = m_evt_handler->m_curr->pos.line; // save
8518 add_flags(RNXT);
8519 _handle_annotations_before_start_mapblck(startline);
8520 m_evt_handler->_push();
8521 _handle_keyref(ref);
8522 _maybe_skip_whitespace_tokens();
8523 _set_indentation(startindent);
8524 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8525 }
8526 }
8527 else if(first == '!')
8528 {
8529 csubstr tag = _scan_tag();
8530 _c4dbgpf("usty[RMAP]: val tag! {}", _prs(tag));
8531 // we need to buffer the tags, as there may be two
8532 // consecutive tags in here
8533 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8534 const size_t line = m_evt_handler->m_curr->pos.line;
8535 _add_annotation(&m_pending_tags, tag, indentation, line);
8536 }
8537 else if(first == '[' || (first == '-' && _is_blck_token(rem)))
8538 {
8539 _c4err("cannot parse a seq into an existing map");
8540 }
8541 else
8542 {
8543 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
8544 startindent = m_evt_handler->m_curr->line_contents.indentation; // save
8545 const size_t startline = m_evt_handler->m_curr->pos.line; // save
8546 ScannedScalar sc;
8547 _c4dbgpf("usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
8548 if(first == '\'')
8549 {
8550 _c4dbgp("usty[RMAP]: scanning single-quoted scalar");
8551 sc = _scan_scalar_squot();
8552 if(!_maybe_scan_following_colon())
8553 {
8554 _c4err("cannot read a VAL to a map");
8555 }
8556 else
8557 {
8558 _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
8559 add_flags(RNXT);
8560 _handle_annotations_before_start_mapblck(startline);
8561 m_evt_handler->_push();
8562 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8563 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8564 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8565 _set_indentation(startindent);
8566 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8567 _maybe_skip_whitespace_tokens();
8568 }
8569 }
8570 else if(first == '"')
8571 {
8572 _c4dbgp("usty[RMAP]: scanning double-quoted scalar");
8573 sc = _scan_scalar_dquot();
8574 if(!_maybe_scan_following_colon())
8575 {
8576 _c4err("cannot read a VAL to a map");
8577 }
8578 else
8579 {
8580 _c4dbgp("usty[RMAP]: start new block map, set double-quoted scalar as key");
8581 add_flags(RNXT);
8582 _handle_annotations_before_start_mapblck(startline);
8583 m_evt_handler->_push();
8584 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8585 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8586 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8587 _set_indentation(startindent);
8588 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8589 _maybe_skip_whitespace_tokens();
8590 }
8591 }
8592 else if(first == '|')
8593 {
8594 _c4err("block literal keys must be enclosed in '?'");
8595 }
8596 else if(first == '>')
8597 {
8598 _c4err("block literal keys must be enclosed in '?'");
8599 }
8600 else if(_scan_scalar_plain_unk(&sc))
8601 {
8602 _c4dbgp("usty[RMAP]: got a plain scalar");
8603 if(!_maybe_scan_following_colon())
8604 {
8605 _c4err("cannot read a VAL to a map");
8606 }
8607 else
8608 {
8609 _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
8610 add_flags(RNXT);
8611 _handle_annotations_before_start_mapblck(startline);
8612 m_evt_handler->_push();
8613 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8614 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8615 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8616 _set_indentation(startindent);
8617 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8618 _maybe_skip_whitespace_tokens();
8619 }
8620 }
8621 else
8622 {
8623 _c4err("parse error"); // LCOV_EXCL_LINE
8624 }
8625 }
8626 }
8627 else // destination is unknown
8628 {
8629 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(RSEQ), m_evt_handler->m_curr->pos);
8630 _c4dbgpf("usty[UNK]: first='{}'", _c4prc(first));
8631 if(first == '[')
8632 {
8633 _c4dbgp("usty[UNK]: it's a flow seq");
8634 add_flags(RNXT);
8635 _handle_annotations_before_blck_val_scalar();
8636 m_evt_handler->begin_seq_val_flow();
8637 addrem_flags(RSEQ|RFLOW|RVAL, RNXT|USTY);
8638 _set_indentation(startindent);
8639 _line_progressed(1);
8640 _maybe_skip_whitespace_tokens();
8641 }
8642 else if(first == '-' && _is_blck_token(rem))
8643 {
8644 _c4dbgp("usty[UNK]: it's a block seq");
8645 add_flags(RNXT);
8646 _handle_annotations_before_blck_val_scalar();
8647 m_evt_handler->begin_seq_val_block();
8648 addrem_flags(RSEQ|RBLCK|RVAL, RNXT|USTY);
8649 _set_indentation(startindent);
8650 _line_progressed(1);
8651 _maybe_skip_whitespace_tokens();
8652 }
8653 else if(first == '{')
8654 {
8655 _c4dbgp("usty[UNK]: it's a flow map");
8656 add_flags(RNXT);
8657 _handle_annotations_before_blck_val_scalar();
8658 m_evt_handler->begin_map_val_flow();
8659 addrem_flags(RMAP|RFLOW|RKEY, RNXT|USTY);
8660 _set_indentation(startindent);
8661 _line_progressed(1);
8662 _maybe_skip_whitespace_tokens();
8663 }
8664 else if(first == '?' && _is_blck_token(rem))
8665 {
8666 _c4dbgp("usty[UNK]: it's a map + this key is complex");
8667 add_flags(RNXT);
8668 _handle_annotations_before_blck_val_scalar();
8669 m_evt_handler->begin_map_val_block();
8670 addrem_flags(RMAP|RBLCK|QMRK, RNXT|USTY);
8671 _save_indentation();
8672 _line_progressed(1);
8673 _maybe_skip_whitespace_tokens();
8674 }
8675 else if(first == ':' && _is_blck_token(rem))
8676 {
8677 _c4dbgp("usty[UNK]: it's a map with an empty key");
8678 add_flags(RNXT);
8679 _handle_annotations_before_blck_val_scalar();
8680 m_evt_handler->begin_map_val_block();
8681 m_evt_handler->set_key_scalar_plain_empty();
8682 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8683 _save_indentation();
8684 _line_progressed(1);
8685 _maybe_skip_whitespace_tokens();
8686 }
8687 else if(first == '&')
8688 {
8689 csubstr anchor = _scan_anchor();
8690 _c4dbgpf("usty[UNK]: anchor! {}", _prs(anchor));
8691 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8692 const size_t line = m_evt_handler->m_curr->pos.line;
8693 _add_annotation(&m_pending_anchors, anchor, indentation, line);
8694 _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
8695 }
8696 else if(first == '*')
8697 {
8698 csubstr ref = _scan_ref_map();
8699 _c4dbgpf("usty[UNK]: ref! {}", _prs(ref));
8700 if(!_maybe_scan_following_colon())
8701 {
8702 _c4dbgp("usty[UNK]: set val ref");
8703 _handle_valref(ref);
8704 }
8705 else
8706 {
8707 _c4dbgp("usty[UNK]: start new block map, set ref as key");
8708 const size_t startline = m_evt_handler->m_curr->pos.line; // save
8709 add_flags(RNXT);
8710 _handle_annotations_before_start_mapblck(startline);
8711 m_evt_handler->begin_map_val_block();
8712 _handle_keyref(ref);
8713 _maybe_skip_whitespace_tokens();
8714 _set_indentation(startindent);
8715 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8716 }
8717 }
8718 else if(first == '!')
8719 {
8720 csubstr tag = _scan_tag();
8721 _c4dbgpf("usty[UNK]: val tag! {}", _prs(tag));
8722 // we need to buffer the tags, as there may be two
8723 // consecutive tags in here
8724 const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
8725 const size_t line = m_evt_handler->m_curr->pos.line;
8726 _add_annotation(&m_pending_tags, tag, indentation, line);
8727 }
8728 else
8729 {
8730 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! has_any(SSCL), m_evt_handler->m_curr->pos);
8731 startindent = m_evt_handler->m_curr->line_contents.indentation; // save
8732 const size_t startline = m_evt_handler->m_curr->pos.line; // save
8733 first = rem.str[0];
8734 ScannedScalar sc;
8735 _c4dbgpf("usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
8736 if(first == '\'')
8737 {
8738 _c4dbgp("usty[UNK]: scanning single-quoted scalar");
8739 sc = _scan_scalar_squot();
8740 if(!_maybe_scan_following_colon())
8741 {
8742 _c4dbgp("usty[UNK]: set as val");
8743 _handle_annotations_before_blck_val_scalar();
8744 csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
8745 m_evt_handler->set_val_scalar_squoted(maybe_filtered);
8746 _end_stream();
8747 }
8748 else
8749 {
8750 _c4dbgp("usty[UNK]: start new block map, set scalar as key");
8751 add_flags(RNXT);
8752 _handle_annotations_before_start_mapblck(startline);
8753 m_evt_handler->begin_map_val_block();
8754 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8755 csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
8756 m_evt_handler->set_key_scalar_squoted(maybe_filtered);
8757 _set_indentation(startindent);
8758 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8759 _maybe_skip_whitespace_tokens();
8760 }
8761 }
8762 else if(first == '"')
8763 {
8764 _c4dbgp("usty[UNK]: scanning double-quoted scalar");
8765 sc = _scan_scalar_dquot();
8766 if(!_maybe_scan_following_colon())
8767 {
8768 _c4dbgp("usty[UNK]: set as val");
8769 _handle_annotations_before_blck_val_scalar();
8770 csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
8771 m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
8772 _end_stream();
8773 }
8774 else
8775 {
8776 _c4dbgp("usty[UNK]: start new block map, set double-quoted scalar as key");
8777 add_flags(RNXT);
8778 _handle_annotations_before_start_mapblck(startline);
8779 m_evt_handler->begin_map_val_block();
8780 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8781 csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
8782 m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
8783 _set_indentation(startindent);
8784 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8785 _maybe_skip_whitespace_tokens();
8786 }
8787 }
8788 else if(first == '|')
8789 {
8790 _c4dbgp("usty[UNK]: scanning block-literal scalar");
8791 ScannedBlock sb;
8792 _scan_block(&sb, startindent);
8793 _c4dbgp("usty[UNK]: set as val");
8794 _handle_annotations_before_blck_val_scalar();
8795 csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
8796 m_evt_handler->set_val_scalar_literal(maybe_filtered);
8797 _end_stream();
8798 }
8799 else if(first == '>')
8800 {
8801 _c4dbgp("usty[UNK]: scanning block-folded scalar");
8802 ScannedBlock sb;
8803 _scan_block(&sb, startindent);
8804 _c4dbgp("usty[UNK]: set as val");
8805 _handle_annotations_before_blck_val_scalar();
8806 csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
8807 m_evt_handler->set_val_scalar_folded(maybe_filtered);
8808 _end_stream();
8809 }
8810 else if(_scan_scalar_plain_unk(&sc))
8811 {
8812 _c4dbgp("usty[UNK]: got a plain scalar");
8813 if(!_maybe_scan_following_colon())
8814 {
8815 _c4dbgp("usty[UNK]: set as val");
8816 _handle_annotations_before_blck_val_scalar();
8817 csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
8818 m_evt_handler->set_val_scalar_plain(maybe_filtered);
8819 _end_stream();
8820 }
8821 else
8822 {
8823 _c4dbgp("usty[UNK]: start new block map, set scalar as key");
8824 add_flags(RNXT);
8825 _handle_annotations_before_start_mapblck(startline);
8826 m_evt_handler->begin_map_val_block();
8827 _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
8828 csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
8829 m_evt_handler->set_key_scalar_plain(maybe_filtered);
8830 _set_indentation(startindent);
8831 addrem_flags(RMAP|RBLCK|RVAL, RNXT|USTY);
8832 _maybe_skip_whitespace_tokens();
8833 }
8834 }
8835 else
8836 {
8837 _c4err("parse error"); // LCOV_EXCL_LINE
8838 }
8839 }
8840 }
8841}
8842
8843
8844//-----------------------------------------------------------------------------
8845
8846template<class EventHandler>
8848{
8849 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8850 _RYML_SAVE_TEST_JSON(filename, src);
8851 m_evt_handler->start_parse(filename.str, src);
8852 m_evt_handler->begin_stream();
8853 _reset();
8854 while( ! _finished_file())
8855 {
8856 _scan_line();
8857 while( ! _finished_line())
8858 {
8860 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8861 if(has_any(RSEQ))
8862 {
8863 _handle_seq_json();
8864 }
8865 else if(has_any(RMAP))
8866 {
8867 _handle_map_json();
8868 }
8869 else if(has_any(RUNK))
8870 {
8871 _handle_unk_json();
8872 }
8873 else
8874 {
8875 _c4err("internal error"); // LCOV_EXCL_LINE
8876 }
8877 }
8878 if(_finished_file())
8879 break; // it may have finished because of multiline blocks
8880 _line_ended();
8881 }
8882 _end_stream();
8883 m_evt_handler->finish_parse();
8884}
8885
8886
8887//-----------------------------------------------------------------------------
8888
8889template<class EventHandler>
8891{
8892 _RYML_ASSERT_BASIC_(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
8893 _RYML_SAVE_TEST_YAML(filename, src);
8894 m_evt_handler->start_parse(filename.str, src);
8895 m_evt_handler->begin_stream();
8896 _reset();
8897 while( ! _finished_file())
8898 {
8899 _scan_line();
8900 while( ! _finished_line())
8901 {
8903 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, ! m_evt_handler->m_curr->line_contents.rem.empty(), m_evt_handler->m_curr->pos);
8904 if(has_any(RFLOW))
8905 {
8906 if(has_none(RSEQIMAP))
8907 {
8908 if(has_any(RSEQ))
8909 {
8910 _handle_seq_flow();
8911 }
8912 else
8913 {
8914 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
8915 _handle_map_flow();
8916 }
8917 }
8918 else
8919 {
8920 _handle_seq_imap();
8921 }
8922 }
8923 else if(has_any(RBLCK))
8924 {
8925 if(has_any(RSEQ))
8926 {
8927 _handle_seq_block();
8928 }
8929 else
8930 {
8931 _RYML_ASSERT_PARSE_(m_evt_handler->m_stack.m_callbacks, has_all(RMAP), m_evt_handler->m_curr->pos);
8932 _handle_map_block();
8933 }
8934 }
8935 else if(has_any(RUNK))
8936 {
8937 _handle_unk();
8938 }
8939 else if(has_any(USTY))
8940 {
8941 _handle_usty();
8942 }
8943 else
8944 {
8945 _c4err("internal error"); // LCOV_EXCL_LINE
8946 }
8947 }
8948 if(_finished_file())
8949 break; // it may have finished because of multiline blocks
8950 _line_ended();
8951 }
8952 _end_stream();
8953 m_evt_handler->finish_parse();
8954}
8955/** @endcond */
8956
8957} // namespace yml
8958} // namespace c4
8959
8960// NOLINTEND(hicpp-signed-bitwise,cppcoreguidelines-avoid-goto,hicpp-avoid-goto,hicpp-multiway-paths-covered,modernize-avoid-c-style-cast)
8961
8962#undef _c4dbgnextline
8963#undef _c4assert
8964#undef _c4err
8965
8966C4_SUPPRESS_WARNING_MSVC_POP
8967C4_SUPPRESS_WARNING_GCC_CLANG_POP
8968
8969#endif // _C4_YML_PARSE_ENGINE_DEF_HPP_
Lightweight generic type-safe wrappers for converting individual values to/from strings.
This is the main driver of parsing logic: it scans the YAML or JSON source for tokens,...
FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
filter a plain scalar
csubstr location_contents(Location const &loc) const
Get the string starting at a particular location, to the end of the parsed source buffer.
FilterResult filter_scalar_squoted(csubstr scalar, substr dst)
filter a single-quoted scalar
FilterResult filter_scalar_dquoted(csubstr scalar, substr dst)
filter a double-quoted scalar
void parse_json_in_place_ev(csubstr filename, substr src)
parse JSON in place, emitting events to the current handler
Location val_location(const char *val) const
Given a pointer to a buffer position, get the location.
FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation)
filter a plain scalar in place
FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap)
filter a single-quoted scalar in place
FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap)
filter a double-quoted scalar in place
void parse_in_place_ev(csubstr filename, substr src)
parse YAML in place, emitting events to the current handler
FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar in place
ParseEngine(EventHandler *evt_handler, ParserOptions const &opts={})
FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-literal scalar
FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar in place
ParseEngine & operator=(ParseEngine &&) noexcept
FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
filter a block-folded scalar
#define RYML_LOCATIONS_SMALL_THRESHOLD
threshold at which a location search will revert from linear to binary search.
Definition common.hpp:41
#define RYML_NOEXCEPT
Conditionally expands to noexcept when RYML_USE_ASSERT is 0 and is empty otherwise.
Definition common.hpp:253
bool atou(csubstr str, T *v) noexcept
Convert a trimmed string to an unsigned integral value.
void err_parse(ErrorDataParse const &errdata, const char *msg)
trigger a parse error to its respective handler, with a non-formatted error message.
Definition common.cpp:210
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
Definition charconv.hpp:903
basic_substring< char > substr
a mutable string view
Definition substr.hpp:2356
basic_substring< const char > csubstr
an immutable string view
Definition substr.hpp:2357
bool is_valid_tag_handle(csubstr handle)
Definition tag.cpp:210
bool is_custom_tag(csubstr tag)
is a tag of the form !handle!tag?
Definition tag.cpp:9
substr decode_code_point(substr out, csubstr code_point)
decode the given code_point, writing into the output string in out.
@ npos
a null string position
Definition common.hpp:319
int ParserFlag_t
data type for ParserState_e
@ RTOP
reading at top level
@ RSET
the (implicit) map being read is a !!set.
@ RSEQ
reading a seq
@ RNXT
read next sibling
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ RKEY
reading a key
@ RKCL
reading the key colon (ie the : after the key in the map)
@ NDOC
no document mode. a document has ended and another has not started yet.
@ RDOC
reading a document
@ QSCL
stored scalar was quoted
@ RBLCK
reading in block mode
@ RMAP
reading a map
@ USTY
reading in unknown style mode - must determine FLOW or BLCK reading an implicit map nested in an expl...
@ QMRK
reading an explicit key (? key)
@ SSCL
there's a stored scalar
@ RVAL
reading a val
@ RFLOW
reading is inside explicit flow chars: [] or {}
size_t adjust_pos_with_escapes(csubstr scalar, size_t pos, bool keep_newlines=false)
Adjust a position in a scalar, increasing it to account for any escaped characters.
size_t to_chars(substr buf, escaped_scalar e)
formatting implementation to escape a scalar with escape_scalar()
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition common.hpp:305
@ UTF16BE
UTF16, Big-Endian.
Definition common.hpp:327
@ UTF8
UTF8.
Definition common.hpp:325
@ UTF16LE
UTF16, Little-Endian.
Definition common.hpp:326
@ NOBOM
No Byte Order Mark was found.
Definition common.hpp:324
@ UTF32BE
UTF32, Big-Endian.
Definition common.hpp:329
@ UTF32LE
UTF32, Little-Endian.
Definition common.hpp:328
enum c4::yml::Encoding_ Encoding_e
csubstr version()
Definition version.cpp:6
@ NONE
an index to none
Definition common.hpp:312
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition common.cpp:14
#define _prflag(fl, txt)
#define _c4dbgnextline()
#define _ryml_relocate(s)
#define _c4err(...)
#define _RYML_SAVE_TEST_YAML(filename, src)
#define _c4assert(...)
#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without)
#define _RYML_WITH_TAB_TOKENS(...)
#define _RYML_SAVE_TEST_JSON(filename, src)
basic_substring range(size_t first, size_t last=npos) const noexcept
return [first,last[.
Definition substr.hpp:520
size_t first_not_of(const C c) const
Definition substr.hpp:994
basic_substring triml(const C c) const
trim left
Definition substr.hpp:630
size_t first_of(const C c, size_t start=0) const
Definition substr.hpp:935
basic_substring first(size_t num) const noexcept
return the first num elements: [0,num[
Definition substr.hpp:530
basic_substring sub(size_t first) const noexcept
return [first,len[
Definition substr.hpp:503
basic_substring trimr(const C c) const
trim the character c from the right
Definition substr.hpp:654
C * str
a restricted pointer to the first character of the substring
Definition substr.hpp:216
Data for a parse error.
Definition common.hpp:385
Filters an input string into a different output string.
Abstracts the fact that a scalar filter result may not fit in the intended memory.
Abstracts the fact that a scalar filter result may not fit in the intended memory.
Helper to control the line contents while parsing a buffer.
holds a source or yaml file position, for example when an error is detected; See also location_format...
Definition common.hpp:345
csubstr name
name of the file
Definition common.hpp:349
Options to give to the ParseEngine to control its behavior.
Accelerator structure to reduce memory requirements by enabling reuse of resolved tags.
Definition tag.hpp:71
formatting helper to escape a scalar with escape_scalar_fn()
utilities for UTF and Byte Order Mark