rapidyaml 0.14.0
parse and emit YAML, and do it fast
Loading...
Searching...
No Matches
event_handler_ints.hpp
Go to the documentation of this file.
1#ifndef _C4_YML_EXTRA_EVENT_HANDLER_INTS_HPP_
2#define _C4_YML_EXTRA_EVENT_HANDLER_INTS_HPP_
3
4/** @file event_handler_ints.hpp An event handler that creates an
5 * integer buffer with a very compact representation of the YAML tree
6 * in a source buffer. This is not part of the main rapidyaml library.
7 *
8 * @see c4::yml::extra::ievt::EventFlags
9 * @see c4::yml::extra::EventHandlerInts
10 * */
11
12#ifndef RYML_SINGLE_HEADER
13#ifndef _C4_YML_NODE_TYPE_HPP_
14#include <c4/yml/node_type.hpp>
15#endif
16#ifndef _C4_YML_EVENT_HANDLER_STACK_HPP_
18#endif
19#ifndef _C4_YML_TAG_HPP_
20#include <c4/yml/tag.hpp>
21#endif
22#ifndef _C4_YML_DETAIL_DBGPRINT_HPP_
23#include <c4/yml/detail/dbgprint.hpp>
24#endif
25#endif
26
27// NOLINTBEGIN(hicpp-signed-bitwise)
28
29namespace c4 {
30namespace yml {
31namespace extra {
32
33/** @addtogroup doc_event_handlers
34 * @{ */
35
36namespace ievt {
37
38/** data type for integer events. This is set to a 32 bit signed
39 * integer to allow compatibility with a wide range of processing
40 * languages. */
41using DataType = int32_t;
42
43/** enumeration of integer event bits. */
44typedef enum : DataType {
45
46 // Structure flags
47 KEY_ = (1 << 0), ///< as key
48 VAL_ = (1 << 1), ///< as value
49 /// special flag to enable look-back in the event array. it
50 /// signifies that the previous event has a string, meaning that
51 /// the jump back to that event is 3 positions. without this flag it
52 /// would be impossible to jump to the previous event.
53 /// see also @ref WSTR
54 PSTR = (1 << 2),
55 /// IMPORTANT. Marks events whose string was placed in the
56 /// arena. This happens when the filtered string is larger than the
57 /// original string in the YAML code (eg from tags that resolve to
58 /// a larger string, or from "\L" or "\P" in double quotes, which
59 /// expand from two to three bytes). Because of this size
60 /// expansion, the filtered string cannot be placed in the original
61 /// source and needs to be placed in the arena.
62 AREN = (1 << 3),
63
64 // Event scopes
65 BEG_ = (1 << 5), ///< scope: begin
66 END_ = (1 << 6), ///< scope: end
67 SEQ_ = (1 << 7), ///< scope: seq
68 MAP_ = (1 << 8), ///< scope: map
69 DOC_ = (1 << 9), ///< scope: doc
70 EXPL = (1 << 10), ///< `---` (with BDOC) or `...` (with EDOC)
71 STRM = (1 << 11), ///< scope: stream
72 BSEQ = BEG_|SEQ_, ///< begin seq (+SEQ in test suite events)
73 ESEQ = END_|SEQ_, ///< end seq (-SEQ in test suite events)
74 BMAP = BEG_|MAP_, ///< begin map (+MAP in test suite events)
75 EMAP = END_|MAP_, ///< end map (-MAP in test suite events)
76 BSTR = BEG_|STRM, ///< begin stream (+STR in test suite events)
77 ESTR = END_|STRM, ///< end stream (-STR in test suite events)
78 BDOC = BEG_|DOC_, ///< begin doc (+DOC in test suite events)
79 EDOC = END_|DOC_, ///< end doc (-DOC in test suite events)
80
81 // Single events
82 SCLR = (1 << 12), ///< scalar (=VAL in test suite events)
83 ALIA = (1 << 13), ///< *ref (reference)
84 ANCH = (1 << 14), ///< &anchor
85 TAG_ = (1 << 15), ///< !tag
86
87 // Style flags
88 PLAI = (1 << 16), ///< scalar: plain
89 SQUO = (1 << 17), ///< scalar: single-quoted (')
90 DQUO = (1 << 18), ///< scalar: double-quoted ("")
91 LITL = (1 << 19), ///< scalar: block literal (|)
92 FOLD = (1 << 20), ///< scalar: block folded (>)
93 FLOW = (1 << 21), ///< container: flow: [] for seqs or {} for maps
94 BLCK = (1 << 22), ///< container: block
95
96 // Directive flags
97 YAML = (1 << 23), ///< yaml directive: `\%YAML <version>`
98 TAGH = (1 << 24), ///< tag directive, handle: `\%TAG <handle> ........`
99 TAGP = (1 << 25), ///< tag directive, prefix: `\%TAG ........ <prefix>`
100
101 /// special flag to mark a scalar as unfiltered (when the parser
102 /// is set not to filter).
103 UNFILT = (1 << 26),
104
105 // Utility flags/masks
106 /// the last flag defined above
108 /// a mask of all bits in this enumeration
109 MASK = (LAST << 1) - 1,
110
111 /// WithSTRing: mask of all the events that encode a string
112 /// following the event. For such events, the next two integers
113 /// will provide respectively the string's offset and length. See
114 /// also @ref PSTR.
116
117} EventFlags;
118
119} // namespace ievt
120
121/** @} */
122
123} // namespace extra
124} // namespace yml
125} // namespace c4
126
127
128//-----------------------------------------------------------------------------
129//-----------------------------------------------------------------------------
130//-----------------------------------------------------------------------------
131
132namespace c4 {
133namespace yml {
134namespace extra {
135
136/** @addtogroup doc_event_handlers
137 * @{ */
138
139/** Read YAML source and, without undergoing a full parse, estimate
140 * the size of the integer buffer required for @ref
141 * EventHandlerInts. This estimation is meant to exceed the actual
142 * number of required events.
143 *
144 * @note This function must overpredict. It does so for every case in
145 * the hundreds/thousands of extensive tests of rapidyaml -- both for
146 * the YAML test suite and the internal cases. If you find a case
147 * where that does not hold, it is a bug. Please report it at
148 * https://github.com/biojppm/rapidyaml/issues! */
150
151/** @} */
152
153} // namespace extra
154} // namespace yml
155} // namespace c4
156
157
158//-----------------------------------------------------------------------------
159//-----------------------------------------------------------------------------
160//-----------------------------------------------------------------------------
161
162C4_SUPPRESS_WARNING_GCC_CLANG_PUSH
163C4_SUPPRESS_WARNING_GCC_CLANG("-Wold-style-cast")
164C4_SUPPRESS_WARNING_GCC("-Wuseless-cast")
165
166namespace c4 {
167namespace yml {
168namespace extra {
169
170
171/** @addtogroup doc_event_handlers
172 * @{ */
173
174/** @cond dev */
175struct EventHandlerIntsState : public c4::yml::ParserState
176{
177 c4::yml::type_bits evt_type;
178 int32_t evt_id;
179};
180/** @endcond */
181
182
183/** A parser event handler that creates a compact representation of
184 * the YAML tree in a contiguous buffer of integers. The integers are
185 * @ref ievt::EventFlags containing masks (to represent events),
186 * interleaved with offset+length (to represent strings in the source
187 * buffer).
188 *
189 * This is meant for use by other programming languages, and supports
190 * container keys (unlike the ryml tree). It parses faster than the ryml
191 * tree parser, because the resulting data structure is much simpler.
192 *
193 * The resulting integer buffer is a linear array of integers containing
194 * events (as a mask of @ref ievt::EventFlags), which in some cases (see
195 * @ref ievt::WSTR) are followed by an encoded string (encoded as an
196 * offset and length to the parsed source buffer).
197 *
198 * For example, parsing `[a, bb, ccc]` results in the following event
199 * buffer (grouped to highlight the event sequence structure):
200 *
201 * ```c++
202 * using namespace c4::yml::extra::ievt;
203 * const DataType arr[] = { // result of parsing: [a, bb, ccc]
204 * BSTR, // begin stream
205 * BDOC, // begin doc
206 * VAL_|BSEQ|FLOW, // begin seq as val, flow
207 * VAL_|SCLR|PLAI, 1, 1, // val scalar, plain style: "a" starts at offset 1 and has length 1
208 * VAL_|SCLR|PLAI|PSTR, 4, 2, // val scalar, plain style: "bb" starts at offset 4 and has length 2; preceded by a string event (PSTR)
209 * VAL_|SCLR|PLAI|PSTR, 8, 3, // val scalar, plain style: "ccc" starts at offset 8 and has length 3; preceded by a string event (PSTR)
210 * ESEQ|PSTR, // end seq; preceded by a string event (PSTR)
211 * EDOC, // end doc
212 * ESTR, // end stream
213 * };
214 * ```
215 *
216 * Here is a sketch clarifying the meaning of this event sequence:
217 *
218@code
219source : [a, bb, ccc]
220 has a string........
221 | offset "a"
222 | | length "a"
223 | | |
224 event0 event1 event2 [ event3 "a"......|..|
225 | | | | | |
226(start) +--------+-------+------------------+---------------+--+-----> (continued)
227arr[i] : BSTR BDOC VAL_|BSEQ|FLOW VAL_|SCLR|PLAI..1..1
228i : 0 1 2 3 4 5
229
230
231 has a string............. has a string.............
232 | offset "bb" | offset "ccc"
233 | | length "bb" | | length "ccc"
234 | | | | | |
235 event4 "bb"..........|..| event5 "ccc".........|..|
236 | | | | | |
237 (cont)--> -----+--------------------+--+--------------+--------------------+--+-----> (continued)
238arr[i] : VAL_|SCLR|PLAI|PSTR..4..2 VAL_|SCLR|PLAI|PSTR..8..3
239i : 6 | 7 8 9 | 10 11
240 | |
241 prev event has string prev event has string
242 (to get to prev, jump (to get to prev, jump
243 back 3 slots: ie 6->3) back 3 slots: ie 9->6)
244
245
246
247 event6 ] event7 event8
248 | | |
249 (cont)--> -----+-------------+--------+-----| (end)
250arr[i] : ESEQ|PSTR EDOC ESTR
251i : 12 | 13 14
252 |
253 prev event has string
254 (to get to it, jump
255 back 3 slots: ie 12->9)
256@endcode
257 *
258 * Note that the buffer contains both events and strings encoded as
259 * integer pairs. That is, events that have an associated string are
260 * immediately followed by two integers providing the offset and length
261 * of that string in the source buffer. (In the example above, this
262 * happens in the events for the strings `a`, `bb`, and `ccc` at
263 * positions 3, 6 and 9, respectively).
264 *
265 * The flag @ref ievt::PSTR and the mask @ref ievt::WSTR are provided to
266 * enable easier iteration over the array: you can use them to test for
267 * presence of a string when iterating over the array.
268 *
269 * The flag @ref ievt::PSTR announces that an event is *preceded* by a
270 * string. That is, the previous event has a string, so that when this
271 * flag is found while iterating right-to-left, a jump of -3 should be
272 * used to get at the bitmask of the previous event. (In the example
273 * above, this flag is present for the events for `bb` and `ccc`, but not
274 * `a` because it is not preceded by a string).
275 *
276 * Likewise, to signify that the current event is *followed* by a string,
277 * there is the mask @ref ievt::WSTR, which is a mask of all the flags of
278 * events that have a string: @ref ievt::SCLR, @ref ievt::ALIA, @ref
279 * ievt::ANCH and @ref ievt::TAG_. While iterating left-to-right in the
280 * array, presence of any of the bits in the mask @ref ievt::WSTR means
281 * that a jump of +3 should be employed to get at the bitmask of the next
282 * event.
283 *
284 * Here's another example with the result of parsing `a: bb`
285 * ```c++
286 * const DataType arr[] = { // result of parsing: `a: bb`
287 * BSTR, // begin stream
288 * BDOC, // begin doc
289 * VAL_|BMAP|BLCK, // begin map as val, block
290 * KEY_|SCLR|PLAI, 0, 1, // key scalar, plain style: "a" starts at offset 0 and has length 1
291 * VAL_|SCLR|PLAI|PSTR, 3, 2, // val scalar, plain style: "bb" starts at offset 3 and has length 2
292 * EMAP|PSTR, // end map
293 * EDOC, // end doc
294 * ESTR, // end stream
295 * };
296 * ```
297 *
298 * Typical code to iterate left-to-right over the array will look like
299 * this:
300 *
301 * ```c++
302 * // source buffer, modified in place during parsing (IMPORTANT!)
303 * substr src = ...;
304 * substr arena = ...; // arena used for scalars/tags that are extended during filtering
305 * // events resulting from parsing
306 * const int events[] = {...};
307 * int events_size = ...;
308 * for(int i = 0; i < events_size; ++i)
309 * {
310 * if(events[i] & ievt::WSTR) // this event has a string following it
311 * {
312 * size_t offset = (size_t)events[i+1];
313 * size_t length = (size_t)events[i+2];
314 * csubstr region = (events[i] & ievt::AREN) ? arena : src; // is the string in the arena?
315 * csubstr str = region.sub(offset, length); // get the string
316 * ...
317 * i += 2; // skip the two ints of the string
318 * // (the jump is three places; the loop adds the other place)
319 * }
320 * else // this is a single-int event
321 * {
322 * ...
323 * }
324 * }
325 * ```
326 *
327 * This handler must be initialized with the input source buffer, the
328 * output arena, and the output event buffer. This handler will not take
329 * ownership nor attempt to resize the output buffer. If the size
330 * required for the output buffer or arena are larger than their actual
331 * size, parsing goes all way to the end, determining the required buffer
332 * sizes without writing anything past the end of the respective
333 * buffer. After parsing is finished, the user must ensure that the
334 * buffer size was enough to accomodate all the data that needs to be
335 * written into it, or react accordingly (eg, throw an error, or resize
336 * the buffer then retry the parse).
337 *
338 * A couple of functions will be helpful to do this. After parsing, @ref
339 * EventHandlerInts::fits_buffers() must be used to verify that the
340 * output buffers were enough to accomodate the results. Then, @ref
341 * EventHandlerInts::required_size_events() and @ref
342 * EventHandlerInts::required_size_arena() can be used to retrieve to
343 * necessary information. To get an estimation of the number of events
344 * before parsing, see @ref estimate_events_ints_size().
345 *
346 * Typical code to parse YAML with this handler will look like this:
347 *
348 * ```c++
349 * csubstr filename = ...;
350 * substr src = ...;
351 * // estimate the size required for the events buffer,
352 * // overpredicting it to be safe.
353 * int estimated_size = extra::estimate_events_ints_size(src);
354 * extra::EventHandlerInts handler;
355 * ParseEngine<extra::EventHandlerInts> parser(&handler);
356 * // example with a vector
357 * std::vector<int> evts;
358 * // ensure we have a fighting chance to acommodate the events
359 * evts.resize((size_t)estimated_size);
360 * // arena to place scalars/tags that may have been extended after filtering
361 * std::vector<char> arena;
362 * arena.resize(src.len); // this is generally enough
363 * // initialize the handler
364 * handler.reset(src, arena, evts.data(), (int)evts.size());
365 * // parse the YAML
366 * parser.parse_in_place_ev(filename, src);
367 * if(handler.fits_buffers()) // were the buffer sizes enough?
368 * {
369 * evts.resize((size_t)handler.required_size_events()); // trim the vector
370 * ...
371 * }
372 * else
373 * {
374 * // event size estimation underpredicted, or arena is too small!
375 * // for the first case, open an issue at
376 * // https://github.com/biojppm/rapidyaml/issues
377 * error("buffer could not accomodate all the events");
378 * // NOTE: see below for notes on doing a parse retry.
379 * }
380 * ```
381 *
382 * The result of @ref estimate_events_ints_size() (click to see more
383 * info) must be an overprediction: it overpredicts for every single
384 * case among the many hundreds covered in the unit tests. This is
385 * deliberate, and aims at ensuring that a retry parse is not
386 * needed. But conceivably, it may underpredict in some instances not
387 * found in the out tests. What to do then?
388 *
389 * First, [open an issue](https://github.com/biojppm/rapidyaml/issues) to
390 * allow the estimation to be improved! Second, there are two ways to
391 * handle this situation in code:
392 *
393 * 1) throw an error (as sketched above)
394 *
395 * 2) grow the buffer to the required size (see @ref
396 * EventHandlerInts::required_size_events()), and then parse
397 * again
398 *
399 * If your code must be able to handle any case including where the
400 * prediction undershoots before the estimate function is fixed (after
401 * you open the issue), that is, if you are considering a parse retry,
402 * there is something important that needs attention. The YAML source
403 * buffer is mutated in-place during the parse, and cannot be used to
404 * parse again. So if you want to retry, you need to keep a pristine
405 * copy of the source, and use it for the retry:
406 *
407 * ```c++
408 * const std::string src = ...; // the YAML code to be parsed
409 * std::string parsed_src = src; // this is where we will parse (filter during parsing)
410 * std::vector<int> evts((size_t)estimated_size); // ensure we have a fighting change to acommodate the events
411 * std::vector<char> arena(src.size()); // ensure we have a fighting change to acommodate the events
412 * ParseEngine<extra::EventHandlerInts> parser(&handler);
413 * handler.reset(to_substr(parsed_src), to_substr(arena), evts.data(), (int)evts.size());
414 * parser.parse_in_place_ev(filename, to_substr(parsed_src));
415 * if(handler.fits_buffers()) // were the buffer sizes enough?
416 * {
417 * evts.resize((size_t)handler.required_size()); // trim the vector
418 * ...
419 * }
420 * else
421 * {
422 * evts.resize((size_t)handler.required_size_events()); // buffer size was not enough.
423 * arena.resize(handler.required_size_arena()); // buffer size was not enough.
424 * // copy again
425 * parsed_src = src;
426 * // retry parse
427 * handler.reset(to_substr(parsed_src), to_substr(arena), evts.data(), (int)evts.size());
428 * parser.parse_in_place_ev(filename, to_substr(parsed_src));
429 * assert((size_t)handler.fits_buffers()); // must always be true
430 * }
431 * ```
432 *
433 * When bringing this to other programming languages, the semantics
434 * will be very similar to this.
435 */
436struct EventHandlerInts : public c4::yml::EventHandlerStack<EventHandlerInts, EventHandlerIntsState>
437{
438
439 /** @name types
440 * @{ */
441
443 using state = EventHandlerIntsState; // our internal state must inherit from parser state
445
446 /** @} */
447
448public:
449
450 /** @cond dev */
451 ievt::DataType * m_evt;
452 int32_t m_evt_pos;
453 int32_t m_evt_prev;
454 int32_t m_evt_size;
455 substr m_arena;
456 size_t m_arena_pos;
457 id_type m_curr_doc;
458 TagDirectives m_tag_directives;
459 TagCache m_tag_cache;
460
461 // undefined at the end
462 #define _enable_(bits) _enable__<bits>()
463 #define _disable_(bits) _disable__<bits>()
464 #define _has_any_(bits) _has_any__<bits>()
465 /** @endcond */
466
467public:
468
469 /** @name construction and resetting
470 * @{ */
471
474 {
475 reset(substr{}, substr{}, nullptr, 0);
476 }
481
482 void reset(substr str, substr arena, ievt::DataType *dst, int32_t dst_size)
483 {
486 m_curr->evt_type = {};
487 m_curr->evt_id = 0;
488 m_arena = arena;
489 m_arena_pos = 0;
490 m_src = str;
491 m_evt = dst;
492 m_evt_size = dst_size;
493 m_evt_pos = 0;
494 m_evt_prev = 0;
495 m_curr_doc = 0;
496 m_tag_directives.clear();
497 m_tag_cache.clear();
498 }
499
500 /** get the size needed for the event buffer from the previous parse
501 * @warning this is valid only until the next parse */
503 {
504 return m_evt_pos;
505 }
506
507 /** get the size needed for the arena from the previous parse
508 * @warning this is valid only until the next parse */
509 size_t required_size_arena() const
510 {
511 return m_arena_pos;
512 }
513
514 /** Predicate to test if the event and arena buffers successfully
515 * accomodated all the parse events.
516 *
517 * @warning this is valid only until the next parse */
518 bool fits_buffers() const
519 {
520 return m_evt_pos <= m_evt_size && m_arena_pos <= m_arena.len;
521 }
522
523 void reserve_arena(int /*arena_size*/)
524 {
525 // does not apply here
526 }
527
528 C4_ALWAYS_INLINE TagDirectives &tag_directives() { return m_tag_directives; }
529 C4_ALWAYS_INLINE TagCache &tag_cache() { return m_tag_cache; }
530
531 /** @} */
532
533public:
534
535 /** @name parse events
536 * @{ */
537
538 void start_parse(const char* filename, substr src)
539 {
540 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, src.str == m_src.str);
541 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, src.len == m_src.len);
542 this->_stack_start_parse(filename, src);
543 }
544
546 {
547 this->_stack_finish_parse();
548 }
549
551 {
552 while(m_stack.size() > 1)
553 _pop();
554 }
555
556 /** @} */
557
558public:
559
560 /** @name YAML stream events */
561 /** @{ */
562
564 {
566 }
567
569 {
571 }
572
573 /** @} */
574
575public:
576
577 /** @name YAML document events */
578 /** @{ */
579
580 /** implicit doc start (without ---) */
582 {
583 _c4dbgpf("{}/{}: begin_doc", m_evt_pos, m_evt_size);
586 {
587 _c4dbgp("push!");
588 _push();
589 _enable_(DOC);
590 }
591 }
592 /** implicit doc end (without ...) */
593 void end_doc()
594 {
595 _c4dbgpf("{}/{}: end_doc", m_evt_pos, m_evt_size);
598 {
599 _c4dbgp("pop!");
600 _pop();
601 }
602 ++m_curr_doc;
603 }
604
605 /** explicit doc start, with --- */
607 {
608 _c4dbgpf("{}/{}: begin_doc_expl", m_evt_pos, m_evt_size);
610 _c4dbgp("push!");
611 _push();
612 _enable_(DOC);
613 }
614 /** explicit doc end, with ... */
616 {
617 _c4dbgpf("{}/{}: end_doc_expl", m_evt_pos, m_evt_size);
620 {
621 _c4dbgp("pop!");
622 _pop();
623 }
624 ++m_curr_doc;
625 }
626
627 /** @} */
628
629public:
630
631 /** @name YAML map functions */
632 /** @{ */
633
635 {
636 _c4dbgpf("{}/{}: bmap key flow", m_evt_pos, m_evt_size);
640 _push();
641 }
643 {
644 _c4dbgpf("{}/{}: bmap key block", m_evt_pos, m_evt_size);
648 _push();
649 }
650
652 {
653 _c4dbgpf("{}/{}: bmap flow", m_evt_pos, m_evt_size);
657 _push();
658 }
660 {
661 _c4dbgpf("{}/{}: bmap block", m_evt_pos, m_evt_size);
665 _push();
666 }
667
669 {
670 _pop();
672 }
673
674 void end_map_flow(bool /*multiline*/)
675 {
676 _pop();
678 }
679
680 /** @} */
681
682public:
683
684 /** @name YAML seq events */
685 /** @{ */
686
688 {
689 _c4dbgpf("{}/{}: bseq key flow", m_evt_pos, m_evt_size);
693 _push();
694 }
696 {
697 _c4dbgpf("{}/{}: bseq key block", m_evt_pos, m_evt_size);
701 _push();
702 }
703
705 {
706 _c4dbgpf("{}/{}: bseq flow", m_evt_pos, m_evt_size);
710 _push();
711 }
713 {
714 _c4dbgpf("{}/{}: bseq block", m_evt_pos, m_evt_size);
718 _push();
719 }
720
722 {
723 _pop();
725 }
726
727 void end_seq_flow(bool /*multiline*/)
728 {
729 _pop();
731 }
732
733 /** @} */
734
735public:
736
737 /** @name YAML structure events */
738 /** @{ */
739
741 {
742 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, m_parent);
743 m_curr->evt_type = {};
744 }
745
746 /** @} */
747
748public:
749
750 /** @name YAML scalar events */
751 /** @{ */
752
753
754 C4_ALWAYS_INLINE void set_key_scalar_plain_empty()
755 {
756 _c4dbgpf("{}/{}: set_key_scalar_plain_empty", m_evt_pos, m_evt_size);
759 }
760 C4_ALWAYS_INLINE void set_val_scalar_plain_empty()
761 {
762 _c4dbgpf("{}/{}: set_val_scalar_plain_empty", m_evt_pos, m_evt_size);
765 }
766
767
768 C4_ALWAYS_INLINE void set_key_scalar_plain(csubstr scalar)
769 {
770 _c4dbgpf("{}/{}: set_key_scalar_plain: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str-m_src.str, scalar.len, scalar);
773 }
774 C4_ALWAYS_INLINE void set_val_scalar_plain(csubstr scalar)
775 {
776 _c4dbgpf("{}/{}: set_val_scalar_plain: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str-m_src.str, scalar.len, scalar);
779 }
780
781
782 C4_ALWAYS_INLINE void set_key_scalar_dquoted(csubstr scalar)
783 {
784 _c4dbgpf("{}/{}: set_key_scalar_dquo: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str?size_t(scalar.str-m_src.str):m_src.len, scalar.len, scalar.str?scalar:csubstr{});
787 }
788 C4_ALWAYS_INLINE void set_val_scalar_dquoted(csubstr scalar)
789 {
790 _c4dbgpf("{}/{}: set_val_scalar_dquo: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str?size_t(scalar.str-m_src.str):m_src.len, scalar.len, scalar.str?scalar:csubstr{});
793 }
794
795
796 C4_ALWAYS_INLINE void set_key_scalar_squoted(csubstr scalar)
797 {
798 _c4dbgpf("{}/{}: set_key_scalar_squo: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str-m_src.str, scalar.len, scalar);
801 }
802 C4_ALWAYS_INLINE void set_val_scalar_squoted(csubstr scalar)
803 {
804 _c4dbgpf("{}/{}: set_val_scalar_squo: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str-m_src.str, scalar.len, scalar);
807 }
808
809
810 C4_ALWAYS_INLINE void set_key_scalar_literal(csubstr scalar)
811 {
812 _c4dbgpf("{}/{}: set_key_scalar_literal: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str?size_t(scalar.str-m_src.str):m_src.len, scalar.len, scalar.str?scalar:csubstr{});
815 }
816 C4_ALWAYS_INLINE void set_val_scalar_literal(csubstr scalar)
817 {
818 _c4dbgpf("{}/{}: set_val_scalar_literal: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str?size_t(scalar.str-m_src.str):m_src.len, scalar.len, scalar.str?scalar:csubstr{});
821 }
822
823
824 C4_ALWAYS_INLINE void set_key_scalar_folded(csubstr scalar)
825 {
826 _c4dbgpf("{}/{}: set_key_scalar_folded: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str?size_t(scalar.str-m_src.str):m_src.len, scalar.len, scalar.str?scalar:csubstr{});
829 }
830 C4_ALWAYS_INLINE void set_val_scalar_folded(csubstr scalar)
831 {
832 _c4dbgpf("{}/{}: set_val_scalar_folded: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str?size_t(scalar.str-m_src.str):m_src.len, scalar.len, scalar.str?scalar:csubstr{});
835 }
836
837
838 C4_ALWAYS_INLINE void mark_key_scalar_unfiltered()
839 {
840 _c4dbgpf("{}/{}: mark_key_scalar_unfiltered", m_evt_pos, m_evt_size);
841 if(m_evt_pos < m_evt_size)
842 m_evt[m_evt_pos] |= ievt::UNFILT;
843 }
844 C4_ALWAYS_INLINE void mark_val_scalar_unfiltered()
845 {
846 _c4dbgpf("{}/{}: mark_val_scalar_unfiltered", m_evt_pos, m_evt_size);
847 if(m_evt_pos < m_evt_size)
848 m_evt[m_evt_pos] |= ievt::UNFILT;
849 }
850
851 /** @} */
852
853private:
854
855 /** @cond dev*/
856 #define _add_scalar_(i, scalar) \
857 _c4dbgpf("{}/{}: scalar!", i, m_evt_size); \
858 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, _is_sub_(scalar)); \
859 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, m_evt[i] & ievt::WSTR); \
860 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, i + 3 < m_evt_size); \
861 if(C4_LIKELY(scalar.is_sub(m_src))) \
862 { \
863 m_evt[i + 1] = (ievt::DataType)(scalar.str - m_src.str); \
864 } \
865 else \
866 { \
867 m_evt[i] |= ievt::AREN; \
868 m_evt[i + 1] = (ievt::DataType)(scalar.str - m_arena.str); \
869 _c4dbgpf("{}/{}: arena! ->{}", i, m_evt_size, m_evt[i+1]); \
870 } \
871 m_evt[i + 2] = (ievt::DataType)scalar.len; \
872 m_evt[i + 3] = ievt::PSTR
873 /** @endcond */
874
875public:
876
877 /** @name YAML anchor/reference events */
878 /** @{ */
879
881 {
882 _c4dbgpf("{}/{}: set_key_anchor: {}", m_evt_pos, m_evt_size, anchor);
883 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, !_has_any_(KEYREF));
884 _enable_(c4::yml::KEYANCH);
885 if(m_evt_pos + 3 < m_evt_size)
886 {
887 m_evt[m_evt_pos] |= ievt::KEY_|ievt::ANCH;
888 _add_scalar_(m_evt_pos, anchor);
889 }
890 m_evt_prev = m_evt_pos;
891 m_evt_pos += 3;
892 }
894 {
895 _c4dbgpf("{}/{}: set_val_anchor: {}", m_evt_pos, m_evt_size, anchor);
896 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, !_has_any_(VALREF));
897 _enable_(c4::yml::VALANCH);
898 if(m_evt_pos + 3 < m_evt_size)
899 {
900 m_evt[m_evt_pos] |= ievt::VAL_|ievt::ANCH;
901 _add_scalar_(m_evt_pos, anchor);
902 }
903 m_evt_prev = m_evt_pos;
904 m_evt_pos += 3;
905 }
906
908 {
909 _c4dbgpf("{}/{}: set_key_ref: {}", m_evt_pos, m_evt_size, ref);
910 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, ref.begins_with('*'));
911 if(C4_UNLIKELY(_has_any_(KEYANCH)))
912 _RYML_ERR_PARSE_(m_stack.m_callbacks, m_curr->pos, "key cannot have both anchor and ref");
914 _send_str_(ref.sub(1), ievt::KEY_|ievt::ALIA); // skip the leading *
915 }
917 {
918 _c4dbgpf("{}/{}: set_val_ref: {}", m_evt_pos, m_evt_size, ref);
919 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, ref.begins_with('*'));
920 if(C4_UNLIKELY(_has_any_(VALANCH)))
921 _RYML_ERR_PARSE_(m_stack.m_callbacks, m_curr->pos, "val cannot have both anchor and ref");
923 _send_str_(ref.sub(1), ievt::VAL_|ievt::ALIA); // skip the leading *
924 }
925
926 /** @} */
927
928public:
929
930 /** @name YAML tag events */
931 /** @{ */
932
934 {
935 _c4dbgpf("{}/{}: set key tag [{}]~~~{}~~~", m_evt_pos, m_evt_size, tag.len, tag.str ? tag : csubstr("(arena full)"));
936 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, _is_sub_(tag));
937 _enable_(c4::yml::KEYTAG);
939 }
941 {
942 _c4dbgpf("{}/{}: set val tag [{}]~~~{}~~~", m_evt_pos, m_evt_size, tag.len, tag.str ? tag : csubstr("(arena full)"));
943 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, _is_sub_(tag));
944 _enable_(c4::yml::VALTAG);
946 }
947
948 /** @} */
949
950public:
951
952 /** @name YAML directive events */
953 /** @{ */
954
955 void add_directive_yaml(csubstr yaml_version)
956 {
957 _c4dbgpf("{}/{}: %YAML directive! version={}", m_evt_pos, m_evt_size, yaml_version);
958 _send_str_(yaml_version, ievt::YAML);
959 }
960
961 void add_directive_tag(csubstr handle, csubstr prefix)
962 {
963 _c4dbgpf("{}/{}: %TAG directive! handle={} prefix={} doc_id={}", m_evt_pos, m_evt_size, handle, prefix, m_curr_doc);
964 if(C4_UNLIKELY(!m_tag_directives.add(handle, prefix, m_curr_doc)))
965 _RYML_ERR_PARSE_(m_stack.m_callbacks, m_curr->pos, "too many %TAG directives");
966 _send_str_(handle, ievt::TAGH);
967 _send_str_(prefix, ievt::TAGP);
968 }
969
970 /** @} */
971
972public:
973
974 /** @name YAML structure events */
975 /** @{ */
976
977 /** set the previous val as the first key of a new map, with flow style.
978 *
979 * See the documentation for @ref doc_event_handlers, which has
980 * important notes about this event.
981 */
983 {
984 _c4dbgpf("{}/{}: prev={} actually_val_is_first_key_of_new_map_flow", m_evt_pos, m_evt_size, m_evt_prev);
985 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, m_evt_pos > 2);
986 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, m_evt_prev > 0);
987 // BEFORE
988 // ... flag start len (free)
989 // | |
990 // prev curr
991 // AFTER
992 // ... BMAP flag start len (free)
993 // | |
994 // prev curr
995 if(m_evt_pos < m_evt_size)
996 {
997 if(m_evt[m_evt_prev] & ievt::WSTR)
998 {
999 _c4dbgpf("{}/{}: WSTR", m_evt_pos, m_evt_size);
1000 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, m_evt_prev > 0);
1001 int32_t pos = _extend_left_to_include_tag_and_or_anchor(m_evt_prev);
1002 if(m_evt_pos + 1 < m_evt_size)
1003 {
1004 for(int32_t i = pos; i <= m_evt_prev; i = _next(i))
1005 {
1006 m_evt[i] |= ievt::KEY_;
1007 m_evt[i] &= ~ievt::VAL_;
1008 }
1009 int32_t num_move = m_evt_pos + 1 - pos;
1010 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, num_move > 0);
1011 memmove(m_evt + pos + 1, m_evt + pos, (size_t)num_move * sizeof(ievt::DataType));
1012 }
1013 m_evt[pos] = ievt::BMAP|ievt::FLOW|ievt::VAL_;
1014 // move PSTR to prev
1015 if(m_evt[pos + 1] & ievt::PSTR)
1016 {
1017 m_evt[pos ] |= ievt::PSTR;
1018 m_evt[pos + 1] &= ~ievt::PSTR;
1019 }
1020 }
1021 else
1022 {
1023 _c4dbgpf("{}/{}: container key", m_evt_pos, m_evt_size);
1024 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, (m_evt[m_evt_prev] & (ievt::EMAP|ievt::ESEQ)));
1025 int32_t pos;
1026 _c4dbgpf("{}/{}: find matching open for {}", m_evt_pos, m_evt_size, m_evt_prev);
1027 if((m_evt[m_evt_prev] & ievt::EMAP) == ievt::EMAP)
1028 {
1029 pos = _find_matching_open(ievt::BMAP, ievt::EMAP, m_evt_prev);
1030 }
1031 else
1032 {
1033 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, (m_evt[m_evt_prev] & ievt::ESEQ));
1034 pos = _find_matching_open(ievt::BSEQ, ievt::ESEQ, m_evt_prev);
1035 }
1036 _c4dbgpf("{}/{}: matching open for {}={}", m_evt_pos, m_evt_size, m_evt_prev, pos);
1037 _RYML_CHECK_BASIC_(m_stack.m_callbacks, pos >= 0); // internal error
1038 _RYML_CHECK_BASIC_(m_stack.m_callbacks, pos < m_evt_prev); // internal error
1039 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, (m_evt[pos] & ievt::ESEQ) == (m_evt[m_evt_prev] & ievt::BSEQ));
1040 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, (m_evt[pos] & ievt::EMAP) == (m_evt[m_evt_prev] & ievt::BMAP));
1041 // shift the array one position to the right, starting at pos
1042 int32_t posp1 = pos + 1;
1043 if(m_evt_pos + 1 < m_evt_size)
1044 {
1045 int32_t num_move = m_evt_pos + 1 - pos;
1046 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, num_move > 0);
1047 memmove(m_evt + posp1, m_evt + pos, (size_t)num_move * sizeof(ievt::DataType));
1048 }
1049 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, posp1 < m_evt_pos);
1050 // start the map
1051 m_evt[pos] = ievt::BMAP|ievt::FLOW|ievt::VAL_;
1052 // set next as key, not val
1053 m_evt[posp1] |= ievt::KEY_;
1054 m_evt[posp1] &= ~ievt::VAL_;
1055 // move PSTR to pos
1056 if(m_evt[posp1] & ievt::PSTR)
1057 {
1058 m_evt[pos] |= ievt::PSTR;
1059 m_evt[posp1] &= ~ievt::PSTR;
1060 }
1061 }
1062 }
1063 m_curr->evt_id = m_evt_pos - 2;
1064 ++m_evt_prev;
1065 ++m_evt_pos;
1067 _push();
1068 }
1069
1070 /** like its flow counterpart, but this function can only be
1071 * called after the end of a flow-val at root or doc level.
1072 *
1073 * See the documentation for @ref doc_event_handlers, which has
1074 * important notes about this event.
1075 */
1077 {
1078 _c4dbgpf("{}/{}: prev={} actually_val_is_first_key_of_new_map_block", m_evt_pos, m_evt_size, m_evt_prev);
1079 if(m_evt_pos < m_evt_size)
1080 {
1081 // interpolate BMAP|VAL|BLCK after the last BDOC
1082 int32_t pos = _find_last_bdoc(m_evt_pos);
1083 if(pos >= 0)
1084 {
1085 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, pos < m_evt_size);
1086 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, pos < m_evt_pos);
1087 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, (m_evt[pos] & ievt::BDOC) == ievt::BDOC);
1088 if(m_evt_pos < m_evt_size)
1089 {
1090 ++pos; // add 1 to write after BDOC
1091 int32_t num_move = m_evt_pos - pos;
1092 int32_t posp1 = pos + 1;
1093 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, ((m_evt[pos] & ievt::BSEQ) == ievt::BSEQ) || ((m_evt[pos] & ievt::BMAP) == ievt::BMAP));
1094 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, num_move > 0);
1095 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, 0 == (m_evt[posp1] & ievt::PSTR));
1096 memmove(m_evt + posp1, m_evt + pos, (size_t)num_move * sizeof(ievt::DataType));
1097 m_evt[pos] = ievt::VAL_|ievt::BMAP|ievt::BLCK;
1098 m_evt[posp1] &= ~ievt::VAL_;
1099 m_evt[posp1] |= ievt::KEY_;
1100 }
1101 }
1102 }
1103 ++m_curr->evt_id;
1104 ++m_evt_prev;
1105 ++m_evt_pos;
1106 _push();
1107 }
1108
1109 /** @} */
1110
1111public:
1112
1113 /** @name arena events */
1114 /** @{ */
1115
1117 {
1118 return m_arena.first(m_arena_pos < m_arena.len ? m_arena_pos : m_arena.len);
1119 }
1120 substr arena_rem() // NOLINT
1121 {
1122 return C4_LIKELY(m_arena_pos <= m_arena.len) ? m_arena.sub(m_arena_pos) : m_arena.last(0);
1123 }
1124 /** this may fail, in which case an empty string is returned */
1126 {
1127 substr s = arena_rem();
1128 if(C4_LIKELY(len <= s.len))
1129 s.len = len;
1130 else
1131 s.str = nullptr;
1132 m_arena_pos += len;
1133 return s;
1134 }
1135
1136 /** @} */
1137
1138public:
1139
1140 /** @name implementation helpers */
1141 /** @{ */
1142
1143 /** push a new parent, add a child to the new parent, and set the
1144 * child as the current node */
1145 void _push()
1146 {
1147 _stack_push();
1148 m_curr->evt_type = {};
1149 }
1150
1151 /** end the current scope */
1152 void _pop()
1153 {
1154 _stack_pop();
1155 }
1156
1157 template<c4::yml::type_bits bits> C4_ALWAYS_INLINE void _enable__() noexcept
1158 {
1159 m_curr->evt_type |= bits;
1160 }
1161 template<c4::yml::type_bits bits> C4_ALWAYS_INLINE void _disable__() noexcept
1162 {
1163 m_curr->evt_type &= ~bits;
1164 }
1165 template<c4::yml::type_bits bits> C4_ALWAYS_INLINE bool _has_any__() const noexcept
1166 {
1167 return (m_curr->evt_type & bits) != c4::yml::type_bits(0);
1168 }
1169
1170 C4_ALWAYS_INLINE int32_t _next(int32_t pos) const noexcept
1171 {
1172 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, pos < m_evt_size);
1173 return pos + ((m_evt[pos] & ievt::WSTR) ? 3 : 1);
1174 }
1175
1176 C4_ALWAYS_INLINE int32_t _prev(int32_t pos) const noexcept
1177 {
1178 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, pos < m_evt_size);
1179 return pos - ((m_evt[pos] & ievt::PSTR) ? 3 : 1);
1180 }
1181
1182 C4_ALWAYS_INLINE bool _is_sub_(csubstr str) const noexcept
1183 {
1184 return (!str.str || str.is_sub(m_src) || str.is_sub(m_arena));
1185 }
1186
1187 C4_ALWAYS_INLINE void _send_flag_only_(ievt::DataType flags)
1188 {
1189 _c4dbgpf("{}/{}: flag only", m_evt_pos, m_evt_size);
1190 if(m_evt_pos < m_evt_size)
1191 m_evt[m_evt_pos] |= flags;
1192 m_curr->evt_id = m_evt_pos;
1193 m_evt_prev = m_evt_pos;
1194 ++m_evt_pos;
1195 if(m_evt_pos < m_evt_size)
1196 m_evt[m_evt_pos] = {};
1197 }
1198
1199 C4_ALWAYS_INLINE void _send_str_(csubstr scalar, ievt::DataType flags)
1200 {
1201 _c4dbgpf("{}/{}: send str", m_evt_pos, m_evt_size);
1202 if(m_evt_pos + 3 < m_evt_size)
1203 {
1204 m_evt[m_evt_pos] |= flags;
1205 _add_scalar_(m_evt_pos, scalar);
1206 }
1207 m_curr->evt_id = m_evt_pos;
1208 m_evt_prev = m_evt_pos;
1209 m_evt_pos += 3;
1210 }
1211
1213 {
1214 if(m_parent)
1215 m_parent->has_children = true;
1216 }
1217
1218 C4_ALWAYS_INLINE csubstr _get_latest_empty_scalar() const
1219 {
1220 // ideally we should search back in the latest event that has
1221 // a scalar, then select a zero-length scalar immediately
1222 // after that scalar. But this also works for now:
1223 return m_src.first(0);
1224 }
1225
1226 int32_t _find_last_bdoc(int32_t pos) const
1227 {
1228 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, pos < m_evt_size); // it's safe to read from the array
1229 while(pos >= 0)
1230 {
1231 ievt::DataType e = m_evt[pos];
1232 if((e & ievt::BDOC) == ievt::BDOC)
1233 return pos;
1234 pos -= (e & ievt::PSTR) ? 3 : 1;
1235 }
1236 return -1; // LCOV_EXCL_LINE
1237 }
1238
1239 int32_t _find_matching_open(ievt::DataType open, ievt::DataType close, int32_t pos) const
1240 {
1241 _c4dbgpf("find_matching: start at {}", pos);
1242 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, pos < m_evt_size);
1243 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, (m_evt[pos] & close) == close);
1244 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, (m_evt[pos] & open) == (close & ~ievt::END_));
1245 pos = _prev(pos); // don't count the starting close token
1246 uint32_t count = 0;
1247 while(pos >= 0)
1248 {
1249 ievt::DataType e = m_evt[pos];
1250 _c4dbgpf("find_matching: pos={} count={} e={}", pos, count, m_evt[pos]);
1251 if((e & close) == close)
1252 {
1253 _c4dbgpf(".............: pos={} close! count={} e={}", pos, count, m_evt[pos]);
1254 ++count;
1255 }
1256 else if((e & open) == open)
1257 {
1258 _c4dbgpf(".............: pos={} open! count={} e={}", pos, count, m_evt[pos]);
1259 if(!count)
1260 return pos;
1261 else
1262 --count;
1263 }
1264 pos = _prev(pos);
1265 }
1266 _c4dbgpf("find_matching: not found!", 0); // LCOV_EXCL_LINE
1267 return -1; // LCOV_EXCL_LINE
1268 }
1269
1271 {
1272 _RYML_ASSERT_BASIC_(m_stack.m_callbacks, pos < m_evt_size);
1273 int32_t prev = _prev(pos);
1274 while((prev > 0) && (m_evt[prev] & (ievt::TAG_|ievt::ANCH)))
1275 {
1276 _c4dbgpf("{}/{}: {} is anchor/tag. extend to {}", m_evt_pos, m_evt_size, prev, prev);
1277 pos = prev;
1278 prev = _prev(prev);
1279 }
1280 return pos;
1281 }
1282
1283 /** @} */
1284
1285#undef _enable_
1286#undef _disable_
1287#undef _has_any_
1288#undef _add_scalar_
1289
1290};
1291
1292/** @} */
1293
1294} // namespace extra
1295} // namespace yml
1296} // namespace c4
1297
1298
1299// NOLINTEND(hicpp-signed-bitwise)
1300C4_SUPPRESS_WARNING_GCC_CLANG_POP
1301
1302#endif /* _C4_YML_EXTRA_EVENT_HANDLER_INTS_HPP_ */
#define _has_any_(bits)
#define RYML_EXPORT
Definition export.hpp:18
Callbacks const & get_callbacks()
get the global callbacks
Definition common.cpp:94
int32_t estimate_events_ints_size(csubstr src)
Read YAML source and, without undergoing a full parse, estimate the size of the integer buffer requir...
uint32_t type_bits
the integral type necessary to cover all the bits for NodeType_e
Definition node_type.hpp:30
@ VALANCH
the val has an &anchor
Definition node_type.hpp:46
@ KEY_DQUO
mark key scalar as double quoted "
Definition node_type.hpp:69
@ VALREF
a *reference: the val references an &anchor
Definition node_type.hpp:44
@ VALNIL
the val is null (eg {a : } results in a null val)
Definition node_type.hpp:50
@ MAP
a map: a parent of KEYVAL/KEYSEQ/KEYMAP nodes
Definition node_type.hpp:39
@ KEY
is member of a map
Definition node_type.hpp:37
@ VAL_FOLDED
mark val scalar as multiline, block folded >
Definition node_type.hpp:66
@ KEYTAG
the key has a tag
Definition node_type.hpp:47
@ FLOW_SL
mark container with single-line flow style (seqs as '[val1,val2], maps as '{key: val,...
Definition node_type.hpp:60
@ VAL
a scalar: has a scalar (ie string) value, possibly empty. must be a leaf node, and cannot be MAP or S...
Definition node_type.hpp:38
@ VALTAG
the val has a tag
Definition node_type.hpp:48
@ SEQ
a seq: a parent of VAL/SEQ/MAP nodes
Definition node_type.hpp:40
@ VAL_SQUO
mark val scalar as single quoted '
Definition node_type.hpp:68
@ VAL_PLAIN
mark val scalar as plain scalar (unquoted, even when multiline)
Definition node_type.hpp:72
@ KEYREF
a *reference: the key references an &anchor
Definition node_type.hpp:43
@ BLOCK
mark container with block style (seqs as '- val ', maps as 'key: val')
Definition node_type.hpp:62
@ KEYANCH
the key has an &anchor
Definition node_type.hpp:45
@ VAL_DQUO
mark val scalar as double quoted "
Definition node_type.hpp:70
@ KEY_SQUO
mark key scalar as single quoted '
Definition node_type.hpp:67
@ VAL_LITERAL
mark val scalar as multiline, block literal |
Definition node_type.hpp:64
@ KEY_LITERAL
mark key scalar as multiline, block literal |
Definition node_type.hpp:63
@ KEY_PLAIN
mark key scalar as plain scalar (unquoted, even when multiline)
Definition node_type.hpp:71
@ KEY_FOLDED
mark key scalar as multiline, block folded >
Definition node_type.hpp:65
@ KEYNIL
the key is null (eg { : b} results in a null key)
Definition node_type.hpp:49
@ DOC
a document
Definition node_type.hpp:41
basic_substring< char > substr
a mutable string view
Definition substr.hpp:2356
basic_substring< const char > csubstr
an immutable string view
Definition substr.hpp:2357
int32_t DataType
data type for integer events.
EventFlags
enumeration of integer event bits.
@ SCLR
scalar (=VAL in test suite events)
@ LITL
scalar: block literal (|)
@ UNFILT
special flag to mark a scalar as unfiltered (when the parser is set not to filter).
@ EMAP
end map (-MAP in test suite events)
@ DQUO
scalar: double-quoted ("")
@ FOLD
scalar: block folded (>)
@ BMAP
begin map (+MAP in test suite events)
@ TAGH
tag directive, handle: \TAG <handle> ........
@ MASK
a mask of all bits in this enumeration
@ ESTR
end stream (-STR in test suite events)
@ BSTR
begin stream (+STR in test suite events)
@ BSEQ
begin seq (+SEQ in test suite events)
@ ESEQ
end seq (-SEQ in test suite events)
@ WSTR
WithSTRing: mask of all the events that encode a string following the event. For such events,...
@ FLOW
container: flow: [] for seqs or {} for maps
@ TAGP
tag directive, prefix: \TAG ........ <prefix>
@ VAL_
as value special flag to enable look-back in the event array. it signifies that the previous event ha...
@ BDOC
begin doc (+DOC in test suite events)
@ AREN
IMPORTANT. Marks events whose string was placed in the arena. This happens when the filtered string i...
@ YAML
yaml directive: \YAML <version>
@ EDOC
end doc (-DOC in test suite events)
@ LAST
the last flag defined above
@ EXPL
--- (with BDOC) or ... (with EDOC)
@ SQUO
scalar: single-quoted (')
@ RTOP
reading at top level
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition common.hpp:249
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition common.cpp:14
bool begins_with(const C c) const noexcept
true if the first character of the string is c
Definition substr.hpp:851
size_t len
the length of the substring
Definition substr.hpp:218
basic_substring last(size_t num) const noexcept
return the last num elements: [len-num,len[
Definition substr.hpp:537
basic_substring first(size_t num) const noexcept
return the first num elements: [0,num[
Definition substr.hpp:530
basic_substring sub(size_t first) const noexcept
return [first,len[
Definition substr.hpp:503
C * str
a restricted pointer to the first character of the substring
Definition substr.hpp:216
A c-style callbacks class to customize behavior on errors or allocation.
Definition common.hpp:546
Use this class a base of implementations of event handler to simplify the stack logic.
Accelerator structure to reduce memory requirements by enabling reuse of resolved tags.
Definition tag.hpp:71
void clear() noexcept
Definition tag.hpp:93
void clear() noexcept
Definition tag.cpp:373
TagDirective const * add(csubstr handle, csubstr prefix, id_type doc_id) noexcept
Definition tag.cpp:358
void begin_doc_expl()
explicit doc start, with —
void end_doc_expl()
explicit doc end, with ...
void add_directive_tag(csubstr handle, csubstr prefix)
void start_parse(const char *filename, substr src)
int32_t _next(int32_t pos) const noexcept
bool fits_buffers() const
Predicate to test if the event and arena buffers successfully accomodated all the parse events.
int32_t _find_matching_open(ievt::DataType open, ievt::DataType close, int32_t pos) const
void begin_doc()
implicit doc start (without —)
void _send_flag_only_(ievt::DataType flags)
substr alloc_arena(size_t len)
this may fail, in which case an empty string is returned
void actually_val_is_first_key_of_new_map_flow()
set the previous val as the first key of a new map, with flow style.
EventHandlerInts(c4::yml::Callbacks const &cb)
void actually_val_is_first_key_of_new_map_block()
like its flow counterpart, but this function can only be called after the end of a flow-val at root o...
int required_size_events() const
get the size needed for the event buffer from the previous parse
int32_t _extend_left_to_include_tag_and_or_anchor(int32_t pos) const
void _send_str_(csubstr scalar, ievt::DataType flags)
size_t required_size_arena() const
get the size needed for the arena from the previous parse
int32_t _prev(int32_t pos) const noexcept
int32_t _find_last_bdoc(int32_t pos) const
bool _is_sub_(csubstr str) const noexcept
void end_doc()
implicit doc end (without ...)
void add_directive_yaml(csubstr yaml_version)
void _push()
push a new parent, add a child to the new parent, and set the child as the current node
void reset(substr str, substr arena, ievt::DataType *dst, int32_t dst_size)