rapidyaml  0.12.1
parse and emit YAML, and do it fast
event_handler_ints.hpp
Go to the documentation of this file.
1 #ifndef _C4_YML_EXTRA_EVENT_HANDLER_INTS_HPP_
2 #define _C4_YML_EXTRA_EVENT_HANDLER_INTS_HPP_
3 
4 /** @file event_handler_ints.hpp An event handler that creates an
5  * integer buffer with a very compact representation of the YAML tree
6  * in a source buffer. This is not part of the main rapidyaml library.
7  *
8  * @see c4::yml::extra::ievt::EventFlags
9  * @see c4::yml::extra::EventHandlerInts
10  * */
11 
12 #ifndef RYML_SINGLE_HEADER
13 #ifndef _C4_YML_NODE_TYPE_HPP_
14 #include <c4/yml/node_type.hpp>
15 #endif
16 #ifndef _C4_YML_EVENT_HANDLER_STACK_HPP_
18 #endif
19 #ifndef _C4_YML_TAG_HPP_
20 #include <c4/yml/tag.hpp>
21 #endif
22 #ifndef _C4_YML_DETAIL_DBGPRINT_HPP_
23 #include <c4/yml/detail/dbgprint.hpp>
24 #endif
25 #endif
26 
27 // NOLINTBEGIN(hicpp-signed-bitwise)
28 
29 namespace c4 {
30 namespace yml {
31 namespace extra {
32 
33 /** @addtogroup doc_event_handlers
34  * @{ */
35 
36 namespace ievt {
37 
38 /** data type for integer events. This is set to a 32 bit signed
39  * integer to allow compatibility with a wide range of processing
40  * languages. */
41 using DataType = int32_t;
42 
43 /** enumeration of integer event bits. */
44 typedef enum : DataType {
45 
46  // Structure flags
47  KEY_ = (1 << 0), ///< as key
48  VAL_ = (1 << 1), ///< as value
49  /// special flag to enable look-back in the event array. it
50  /// signifies that the previous event has a string, meaning that
51  /// the jump back to that event is 3 positions. without this flag it
52  /// would be impossible to jump to the previous event.
53  /// see also @ref WSTR
54  PSTR = (1 << 2),
55  /// IMPORTANT. Marks events whose string was placed in the
56  /// arena. This happens when the filtered string is larger than the
57  /// original string in the YAML code (eg from tags that resolve to
58  /// a larger string, or from "\L" or "\P" in double quotes, which
59  /// expand from two to three bytes). Because of this size
60  /// expansion, the filtered string cannot be placed in the original
61  /// source and needs to be placed in the arena.
62  AREN = (1 << 3),
63 
64  // Event scopes
65  BEG_ = (1 << 5), ///< scope: begin
66  END_ = (1 << 6), ///< scope: end
67  SEQ_ = (1 << 7), ///< scope: seq
68  MAP_ = (1 << 8), ///< scope: map
69  DOC_ = (1 << 9), ///< scope: doc
70  EXPL = (1 << 10), ///< `---` (with BDOC) or `...` (with EDOC)
71  STRM = (1 << 11), ///< scope: stream
72  BSEQ = BEG_|SEQ_, ///< begin seq (+SEQ in test suite events)
73  ESEQ = END_|SEQ_, ///< end seq (-SEQ in test suite events)
74  BMAP = BEG_|MAP_, ///< begin map (+MAP in test suite events)
75  EMAP = END_|MAP_, ///< end map (-MAP in test suite events)
76  BSTR = BEG_|STRM, ///< begin stream (+STR in test suite events)
77  ESTR = END_|STRM, ///< end stream (-STR in test suite events)
78  BDOC = BEG_|DOC_, ///< begin doc (+DOC in test suite events)
79  EDOC = END_|DOC_, ///< end doc (-DOC in test suite events)
80 
81  // Single events
82  SCLR = (1 << 12), ///< scalar (=VAL in test suite events)
83  ALIA = (1 << 13), ///< *ref (reference)
84  ANCH = (1 << 14), ///< &anchor
85  TAG_ = (1 << 15), ///< !tag
86 
87  // Style flags
88  PLAI = (1 << 16), ///< scalar: plain
89  SQUO = (1 << 17), ///< scalar: single-quoted (')
90  DQUO = (1 << 18), ///< scalar: double-quoted ("")
91  LITL = (1 << 19), ///< scalar: block literal (|)
92  FOLD = (1 << 20), ///< scalar: block folded (>)
93  FLOW = (1 << 21), ///< container: flow: [] for seqs or {} for maps
94  BLCK = (1 << 22), ///< container: block
95 
96  // Directive flags
97  YAML = (1 << 23), ///< yaml directive: `\%YAML <version>`
98  TAGH = (1 << 24), ///< tag directive, handle: `\%TAG <handle> ........`
99  TAGP = (1 << 25), ///< tag directive, prefix: `\%TAG ........ <prefix>`
100 
101  /// special flag to mark a scalar as unfiltered (when the parser
102  /// is set not to filter).
103  UNFILT = (1 << 26),
104 
105  // Utility flags/masks
106  /// the last flag defined above
108  /// a mask of all bits in this enumeration
109  MASK = (LAST << 1) - 1,
110 
111  /// WithSTRing: mask of all the events that encode a string
112  /// following the event. For such events, the next two integers
113  /// will provide respectively the string's offset and length. See
114  /// also @ref PSTR.
116 
117 } EventFlags;
118 
119 } // namespace ievt
120 
121 /** @} */
122 
123 } // namespace extra
124 } // namespace yml
125 } // namespace c4
126 
127 
128 //-----------------------------------------------------------------------------
129 //-----------------------------------------------------------------------------
130 //-----------------------------------------------------------------------------
131 
132 namespace c4 {
133 namespace yml {
134 namespace extra {
135 
136 /** @addtogroup doc_event_handlers
137  * @{ */
138 
139 /** Read YAML source and, without undergoing a full parse, estimate
140  * the size of the integer buffer required for @ref
141  * EventHandlerInts. This estimation is meant to exceed the actual
142  * number of required events.
143  *
144  * @note This function must overpredict. It does so for every case in
145  * the hundreds/thousands of extensive tests of rapidyaml -- both for
146  * the YAML test suite and the internal cases. If you find a case
147  * where that does not hold, it is a bug. Please report it at
148  * https://github.com/biojppm/rapidyaml/issues! */
149 RYML_EXPORT int32_t estimate_events_ints_size(csubstr src);
150 
151 /** @} */
152 
153 } // namespace extra
154 } // namespace yml
155 } // namespace c4
156 
157 
158 //-----------------------------------------------------------------------------
159 //-----------------------------------------------------------------------------
160 //-----------------------------------------------------------------------------
161 
162 C4_SUPPRESS_WARNING_GCC_CLANG_PUSH
163 C4_SUPPRESS_WARNING_GCC_CLANG("-Wold-style-cast")
164 C4_SUPPRESS_WARNING_GCC("-Wuseless-cast")
165 
166 namespace c4 {
167 namespace yml {
168 namespace extra {
169 
170 
171 /** @addtogroup doc_event_handlers
172  * @{ */
173 
174 /** @cond dev */
175 struct EventHandlerIntsState : public c4::yml::ParserState
176 {
177  c4::yml::type_bits evt_type;
178  int32_t evt_id;
179 };
180 /** @endcond */
181 
182 
183 /** A parser event handler that creates a compact representation of
184  * the YAML tree in a contiguous buffer of integers. The integers are
185  * @ref ievt::EventFlags containing masks (to represent events),
186  * interleaved with offset+length (to represent strings in the source
187  * buffer).
188  *
189  * This is meant for use by other programming languages, and supports
190  * container keys (unlike the ryml tree). It parses faster than the ryml
191  * tree parser, because the resulting data structure is much simpler.
192  *
193  * The resulting integer buffer is a linear array of integers containing
194  * events (as a mask of @ref ievt::EventFlags), which in some cases (see
195  * @ref ievt::WSTR) are followed by an encoded string (encoded as an
196  * offset and length to the parsed source buffer).
197  *
198  * For example, parsing `[a, bb, ccc]` results in the following event
199  * buffer (grouped to highlight the event sequence structure):
200  *
201  * ```c++
202  * using namespace c4::yml::extra::ievt;
203  * const DataType arr[] = { // result of parsing: [a, bb, ccc]
204  * BSTR, // begin stream
205  * BDOC, // begin doc
206  * VAL_|BSEQ|FLOW, // begin seq as val, flow
207  * VAL_|SCLR|PLAI, 1, 1, // val scalar, plain style: "a" starts at offset 1 and has length 1
208  * VAL_|SCLR|PLAI|PSTR, 4, 2, // val scalar, plain style: "bb" starts at offset 4 and has length 2; preceded by a string event (PSTR)
209  * VAL_|SCLR|PLAI|PSTR, 8, 3, // val scalar, plain style: "ccc" starts at offset 8 and has length 3; preceded by a string event (PSTR)
210  * ESEQ|PSTR, // end seq; preceded by a string event (PSTR)
211  * EDOC, // end doc
212  * ESTR, // end stream
213  * };
214  * ```
215  *
216  * Here is a sketch clarifying the meaning of this event sequence:
217  *
218 @code
219 source : [a, bb, ccc]
220  has a string........
221  | offset "a"
222  | | length "a"
223  | | |
224  event0 event1 event2 [ event3 "a"......|..|
225  | | | | | |
226 (start) +--------+-------+------------------+---------------+--+-----> (continued)
227 arr[i] : BSTR BDOC VAL_|BSEQ|FLOW VAL_|SCLR|PLAI..1..1
228 i : 0 1 2 3 4 5
229 
230 
231  has a string............. has a string.............
232  | offset "bb" | offset "ccc"
233  | | length "bb" | | length "ccc"
234  | | | | | |
235  event4 "bb"..........|..| event5 "ccc".........|..|
236  | | | | | |
237  (cont)--> -----+--------------------+--+--------------+--------------------+--+-----> (continued)
238 arr[i] : VAL_|SCLR|PLAI|PSTR..4..2 VAL_|SCLR|PLAI|PSTR..8..3
239 i : 6 | 7 8 9 | 10 11
240  | |
241  prev event has string prev event has string
242  (to get to prev, jump (to get to prev, jump
243  back 3 slots: ie 6->3) back 3 slots: ie 9->6)
244 
245 
246 
247  event6 ] event7 event8
248  | | |
249  (cont)--> -----+-------------+--------+-----| (end)
250 arr[i] : ESEQ|PSTR EDOC ESTR
251 i : 12 | 13 14
252  |
253  prev event has string
254  (to get to it, jump
255  back 3 slots: ie 12->9)
256 @endcode
257  *
258  * Note that the buffer contains both events and strings encoded as
259  * integer pairs. That is, events that have an associated string are
260  * immediately followed by two integers providing the offset and length
261  * of that string in the source buffer. (In the example above, this
262  * happens in the events for the strings `a`, `bb`, and `ccc` at
263  * positions 3, 6 and 9, respectively).
264  *
265  * The flag @ref ievt::PSTR and the mask @ref ievt::WSTR are provided to
266  * enable easier iteration over the array: you can use them to test for
267  * presence of a string when iterating over the array.
268  *
269  * The flag @ref ievt::PSTR announces that an event is *preceded* by a
270  * string. That is, the previous event has a string, so that when this
271  * flag is found while iterating right-to-left, a jump of -3 should be
272  * used to get at the bitmask of the previous event. (In the example
273  * above, this flag is present for the events for `bb` and `ccc`, but not
274  * `a` because it is not preceded by a string).
275  *
276  * Likewise, to signify that the current event is *followed* by a string,
277  * there is the mask @ref ievt::WSTR, which is a mask of all the flags of
278  * events that have a string: @ref ievt::SCLR, @ref ievt::ALIA, @ref
279  * ievt::ANCH and @ref ievt::TAG_. While iterating left-to-right in the
280  * array, presence of any of the bits in the mask @ref ievt::WSTR means
281  * that a jump of +3 should be employed to get at the bitmask of the next
282  * event.
283  *
284  * Here's another example with the result of parsing `a: bb`
285  * ```c++
286  * const DataType arr[] = { // result of parsing: `a: bb`
287  * BSTR, // begin stream
288  * BDOC, // begin doc
289  * VAL_|BMAP|BLCK, // begin map as val, block
290  * KEY_|SCLR|PLAI, 0, 1, // key scalar, plain style: "a" starts at offset 0 and has length 1
291  * VAL_|SCLR|PLAI|PSTR, 3, 2, // val scalar, plain style: "bb" starts at offset 3 and has length 2
292  * EMAP|PSTR, // end map
293  * EDOC, // end doc
294  * ESTR, // end stream
295  * };
296  * ```
297  *
298  * Typical code to iterate left-to-right over the array will look like
299  * this:
300  *
301  * ```c++
302  * // source buffer, modified in place during parsing (IMPORTANT!)
303  * substr src = ...;
304  * substr arena = ...; // arena used for scalars/tags that are extended during filtering
305  * // events resulting from parsing
306  * const int events[] = {...};
307  * int events_size = ...;
308  * for(int i = 0; i < events_size; ++i)
309  * {
310  * if(events[i] & ievt::WSTR) // this event has a string following it
311  * {
312  * size_t offset = (size_t)events[i+1];
313  * size_t length = (size_t)events[i+2];
314  * csubstr region = (events[i] & ievt::AREN) ? arena : src; // is the string in the arena?
315  * csubstr str = region.sub(offset, length); // get the string
316  * ...
317  * i += 2; // skip the two ints of the string
318  * // (the jump is three places; the loop adds the other place)
319  * }
320  * else // this is a single-int event
321  * {
322  * ...
323  * }
324  * }
325  * ```
326  *
327  * This handler must be initialized with the input source buffer, the
328  * output arena, and the output event buffer. This handler will not take
329  * ownership nor attempt to resize the output buffer. If the size
330  * required for the output buffer or arena are larger than their actual
331  * size, parsing goes all way to the end, determining the required buffer
332  * sizes without writing anything past the end of the respective
333  * buffer. After parsing is finished, the user must ensure that the
334  * buffer size was enough to accomodate all the data that needs to be
335  * written into it, or react accordingly (eg, throw an error, or resize
336  * the buffer then retry the parse).
337  *
338  * A couple of functions will be helpful to do this. After parsing, @ref
339  * EventHandlerInts::fits_buffers() must be used to verify that the
340  * output buffers were enough to accomodate the results. Then, @ref
341  * EventHandlerInts::required_size_events() and @ref
342  * EventHandlerInts::required_size_arena() can be used to retrieve to
343  * necessary information. To get an estimation of the number of events
344  * before parsing, see @ref estimate_events_ints_size().
345  *
346  * Typical code to parse YAML with this handler will look like this:
347  *
348  * ```c++
349  * csubstr filename = ...;
350  * substr src = ...;
351  * // estimate the size required for the events buffer,
352  * // overpredicting it to be safe.
353  * int estimated_size = extra::estimate_events_ints_size(src);
354  * extra::EventHandlerInts handler;
355  * ParseEngine<extra::EventHandlerInts> parser(&handler);
356  * // example with a vector
357  * std::vector<int> evts;
358  * // ensure we have a fighting chance to acommodate the events
359  * evts.resize((size_t)estimated_size);
360  * // arena to place scalars/tags that may have been extended after filtering
361  * std::vector<char> arena;
362  * arena.resize(src.len); // this is generally enough
363  * // initialize the handler
364  * handler.reset(src, arena, evts.data(), (int)evts.size());
365  * // parse the YAML
366  * parser.parse_in_place_ev(filename, src);
367  * if(handler.fits_buffers()) // were the buffer sizes enough?
368  * {
369  * evts.resize((size_t)handler.required_size_events()); // trim the vector
370  * ...
371  * }
372  * else
373  * {
374  * // event size estimation underpredicted, or arena is too small!
375  * // for the first case, open an issue at
376  * // https://github.com/biojppm/rapidyaml/issues
377  * error("buffer could not accomodate all the events");
378  * // NOTE: see below for notes on doing a parse retry.
379  * }
380  * ```
381  *
382  * The result of @ref estimate_events_ints_size() (click to see more
383  * info) must be an overprediction: it overpredicts for every single
384  * case among the many hundreds covered in the unit tests. This is
385  * deliberate, and aims at ensuring that a retry parse is not
386  * needed. But conceivably, it may underpredict in some instances not
387  * found in the out tests. What to do then?
388  *
389  * First, [open an issue](https://github.com/biojppm/rapidyaml/issues) to
390  * allow the estimation to be improved! Second, there are two ways to
391  * handle this situation in code:
392  *
393  * 1) throw an error (as sketched above)
394  *
395  * 2) grow the buffer to the required size (see @ref
396  * EventHandlerInts::required_size_events()), and then parse
397  * again
398  *
399  * If your code must be able to handle any case including where the
400  * prediction undershoots before the estimate function is fixed (after
401  * you open the issue), that is, if you are considering a parse retry,
402  * there is something important that needs attention. The YAML source
403  * buffer is mutated in-place during the parse, and cannot be used to
404  * parse again. So if you want to retry, you need to keep a pristine
405  * copy of the source, and use it for the retry:
406  *
407  * ```c++
408  * const std::string src = ...; // the YAML code to be parsed
409  * std::string parsed_src = src; // this is where we will parse (filter during parsing)
410  * std::vector<int> evts((size_t)estimated_size); // ensure we have a fighting change to acommodate the events
411  * std::vector<char> arena(src.size()); // ensure we have a fighting change to acommodate the events
412  * ParseEngine<extra::EventHandlerInts> parser(&handler);
413  * handler.reset(to_substr(parsed_src), to_substr(arena), evts.data(), (int)evts.size());
414  * parser.parse_in_place_ev(filename, to_substr(parsed_src));
415  * if(handler.fits_buffers()) // were the buffer sizes enough?
416  * {
417  * evts.resize((size_t)handler.required_size()); // trim the vector
418  * ...
419  * }
420  * else
421  * {
422  * evts.resize((size_t)handler.required_size_events()); // buffer size was not enough.
423  * arena.resize(handler.required_size_arena()); // buffer size was not enough.
424  * // copy again
425  * parsed_src = src;
426  * // retry parse
427  * handler.reset(to_substr(parsed_src), to_substr(arena), evts.data(), (int)evts.size());
428  * parser.parse_in_place_ev(filename, to_substr(parsed_src));
429  * assert((size_t)handler.fits_buffers()); // must always be true
430  * }
431  * ```
432  *
433  * When bringing this to other programming languages, the semantics
434  * will be very similar to this.
435  */
436 struct EventHandlerInts : public c4::yml::EventHandlerStack<EventHandlerInts, EventHandlerIntsState>
437 {
438 
439  /** @name types
440  * @{ */
441 
443  using state = EventHandlerIntsState; // our internal state must inherit from parser state
444  enum { requires_strings_on_buffers = true };
445 
446  /** @} */
447 
448 public:
449 
450  /** @cond dev */
451  ievt::DataType * m_evt;
452  int32_t m_evt_pos;
453  int32_t m_evt_prev;
454  int32_t m_evt_size;
455  substr m_arena;
456  size_t m_arena_pos;
457  id_type m_curr_doc;
458  TagDirectives m_tag_directives;
459  TagCache m_tag_cache;
460 
461  // undefined at the end
462  #define _enable_(bits) _enable__<bits>()
463  #define _disable_(bits) _disable__<bits>()
464  #define _has_any_(bits) _has_any__<bits>()
465  /** @endcond */
466 
467 public:
468 
469  /** @name construction and resetting
470  * @{ */
471 
473  : EventHandlerStack(cb)
474  {
475  reset(substr{}, substr{}, nullptr, 0);
476  }
479  {
480  }
481 
482  void reset(substr str, substr arena, ievt::DataType *dst, int32_t dst_size)
483  {
484  _stack_reset_root();
485  m_curr->flags |= c4::yml::RUNK|c4::yml::RTOP;
486  m_curr->evt_type = {};
487  m_curr->evt_id = 0;
488  m_arena = arena;
489  m_arena_pos = 0;
490  m_src = str;
491  m_evt = dst;
492  m_evt_size = dst_size;
493  m_evt_pos = 0;
494  m_evt_prev = 0;
495  m_curr_doc = 0;
496  m_tag_directives.clear();
497  m_tag_cache.clear();
498  }
499 
500  /** get the size needed for the event buffer from the previous parse
501  * @warning this is valid only until the next parse */
503  {
504  return m_evt_pos;
505  }
506 
507  /** get the size needed for the arena from the previous parse
508  * @warning this is valid only until the next parse */
509  size_t required_size_arena() const
510  {
511  return m_arena_pos;
512  }
513 
514  /** Predicate to test if the event and arena buffers successfully
515  * accomodated all the parse events.
516  *
517  * @warning this is valid only until the next parse */
518  bool fits_buffers() const
519  {
520  return m_evt_pos <= m_evt_size && m_arena_pos <= m_arena.len;
521  }
522 
523  void reserve_arena(int /*arena_size*/)
524  {
525  // does not apply here
526  }
527 
528  C4_ALWAYS_INLINE TagDirectives &tag_directives() { return m_tag_directives; }
529  C4_ALWAYS_INLINE TagCache &tag_cache() { return m_tag_cache; }
530 
531  /** @} */
532 
533 public:
534 
535  /** @name parse events
536  * @{ */
537 
538  void start_parse(const char* filename, substr src)
539  {
540  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, src.str == m_src.str);
541  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, src.len == m_src.len);
542  this->_stack_start_parse(filename, src);
543  }
544 
546  {
547  this->_stack_finish_parse();
548  }
549 
551  {
552  while(m_stack.size() > 1)
553  _pop();
554  }
555 
556  /** @} */
557 
558 public:
559 
560  /** @name YAML stream events */
561  /** @{ */
562 
564  {
565  _send_flag_only_(ievt::BSTR);
566  }
567 
568  void end_stream()
569  {
570  _send_flag_only_(ievt::ESTR);
571  }
572 
573  /** @} */
574 
575 public:
576 
577  /** @name YAML document events */
578  /** @{ */
579 
580  /** implicit doc start (without ---) */
581  void begin_doc()
582  {
583  _c4dbgpf("{}/{}: begin_doc", m_evt_pos, m_evt_size);
584  _send_flag_only_(ievt::BDOC);
585  if(_stack_should_push_on_begin_doc())
586  {
587  _c4dbgp("push!");
588  _push();
589  _enable_(DOC);
590  }
591  }
592  /** implicit doc end (without ...) */
593  void end_doc()
594  {
595  _c4dbgpf("{}/{}: end_doc", m_evt_pos, m_evt_size);
596  _send_flag_only_(ievt::EDOC);
597  if(_stack_should_pop_on_end_doc())
598  {
599  _c4dbgp("pop!");
600  _pop();
601  }
602  ++m_curr_doc;
603  }
604 
605  /** explicit doc start, with --- */
607  {
608  _c4dbgpf("{}/{}: begin_doc_expl", m_evt_pos, m_evt_size);
609  _send_flag_only_(ievt::BDOC|ievt::EXPL);
610  _c4dbgp("push!");
611  _push();
612  _enable_(DOC);
613  }
614  /** explicit doc end, with ... */
616  {
617  _c4dbgpf("{}/{}: end_doc_expl", m_evt_pos, m_evt_size);
618  _send_flag_only_(ievt::EDOC|ievt::EXPL);
619  if(_stack_should_pop_on_end_doc())
620  {
621  _c4dbgp("pop!");
622  _pop();
623  }
624  ++m_curr_doc;
625  }
626 
627  /** @} */
628 
629 public:
630 
631  /** @name YAML map functions */
632  /** @{ */
633 
635  {
636  _c4dbgpf("{}/{}: bmap key flow", m_evt_pos, m_evt_size);
637  _send_flag_only_(ievt::KEY_|ievt::BMAP|ievt::FLOW);
638  _mark_parent_with_children_();
640  _push();
641  }
643  {
644  _c4dbgpf("{}/{}: bmap key block", m_evt_pos, m_evt_size);
645  _send_flag_only_(ievt::KEY_|ievt::BMAP|ievt::BLCK);
646  _mark_parent_with_children_();
648  _push();
649  }
650 
652  {
653  _c4dbgpf("{}/{}: bmap flow", m_evt_pos, m_evt_size);
654  _send_flag_only_(ievt::VAL_|ievt::BMAP|ievt::FLOW);
655  _mark_parent_with_children_();
656  _enable_(c4::yml::MAP|c4::yml::FLOW_SL);
657  _push();
658  }
660  {
661  _c4dbgpf("{}/{}: bmap block", m_evt_pos, m_evt_size);
662  _send_flag_only_(ievt::VAL_|ievt::BMAP|ievt::BLCK);
663  _mark_parent_with_children_();
664  _enable_(c4::yml::MAP|c4::yml::BLOCK);
665  _push();
666  }
667 
669  {
670  _pop();
671  _send_flag_only_(ievt::EMAP);
672  }
673 
674  void end_map_flow(bool /*multiline*/)
675  {
676  _pop();
677  _send_flag_only_(ievt::EMAP);
678  }
679 
680  /** @} */
681 
682 public:
683 
684  /** @name YAML seq events */
685  /** @{ */
686 
688  {
689  _c4dbgpf("{}/{}: bseq key flow", m_evt_pos, m_evt_size);
690  _send_flag_only_(ievt::KEY_|ievt::BSEQ|ievt::FLOW);
691  _mark_parent_with_children_();
693  _push();
694  }
696  {
697  _c4dbgpf("{}/{}: bseq key block", m_evt_pos, m_evt_size);
698  _send_flag_only_(ievt::KEY_|ievt::BSEQ|ievt::BLCK);
699  _mark_parent_with_children_();
701  _push();
702  }
703 
705  {
706  _c4dbgpf("{}/{}: bseq flow", m_evt_pos, m_evt_size);
707  _send_flag_only_(ievt::VAL_|ievt::BSEQ|ievt::FLOW);
708  _mark_parent_with_children_();
709  _enable_(c4::yml::SEQ|c4::yml::FLOW_SL);
710  _push();
711  }
713  {
714  _c4dbgpf("{}/{}: bseq block", m_evt_pos, m_evt_size);
715  _send_flag_only_(ievt::VAL_|ievt::BSEQ|ievt::BLCK);
716  _mark_parent_with_children_();
717  _enable_(c4::yml::SEQ|c4::yml::BLOCK);
718  _push();
719  }
720 
722  {
723  _pop();
724  _send_flag_only_(ievt::ESEQ);
725  }
726 
727  void end_seq_flow(bool /*multiline*/)
728  {
729  _pop();
730  _send_flag_only_(ievt::ESEQ);
731  }
732 
733  /** @} */
734 
735 public:
736 
737  /** @name YAML structure events */
738  /** @{ */
739 
740  void add_sibling()
741  {
742  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, m_parent);
743  m_curr->evt_type = {};
744  }
745 
746  /** @} */
747 
748 public:
749 
750  /** @name YAML scalar events */
751  /** @{ */
752 
753 
754  C4_ALWAYS_INLINE void set_key_scalar_plain_empty()
755  {
756  _c4dbgpf("{}/{}: set_key_scalar_plain_empty", m_evt_pos, m_evt_size);
757  _send_str_(_get_latest_empty_scalar(), ievt::KEY_|ievt::SCLR|ievt::PLAI);
759  }
760  C4_ALWAYS_INLINE void set_val_scalar_plain_empty()
761  {
762  _c4dbgpf("{}/{}: set_val_scalar_plain_empty", m_evt_pos, m_evt_size);
763  _send_str_(_get_latest_empty_scalar(), ievt::VAL_|ievt::SCLR|ievt::PLAI);
765  }
766 
767 
768  C4_ALWAYS_INLINE void set_key_scalar_plain(csubstr scalar)
769  {
770  _c4dbgpf("{}/{}: set_key_scalar_plain: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str-m_src.str, scalar.len, scalar);
771  _send_str_(scalar, ievt::KEY_|ievt::SCLR|ievt::PLAI);
773  }
774  C4_ALWAYS_INLINE void set_val_scalar_plain(csubstr scalar)
775  {
776  _c4dbgpf("{}/{}: set_val_scalar_plain: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str-m_src.str, scalar.len, scalar);
777  _send_str_(scalar, ievt::VAL_|ievt::SCLR|ievt::PLAI);
779  }
780 
781 
782  C4_ALWAYS_INLINE void set_key_scalar_dquoted(csubstr scalar)
783  {
784  _c4dbgpf("{}/{}: set_key_scalar_dquo: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str?size_t(scalar.str-m_src.str):m_src.len, scalar.len, scalar.str?scalar:csubstr{});
785  _send_str_(scalar, ievt::KEY_|ievt::SCLR|ievt::DQUO);
787  }
788  C4_ALWAYS_INLINE void set_val_scalar_dquoted(csubstr scalar)
789  {
790  _c4dbgpf("{}/{}: set_val_scalar_dquo: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str?size_t(scalar.str-m_src.str):m_src.len, scalar.len, scalar.str?scalar:csubstr{});
791  _send_str_(scalar, ievt::VAL_|ievt::SCLR|ievt::DQUO);
793  }
794 
795 
796  C4_ALWAYS_INLINE void set_key_scalar_squoted(csubstr scalar)
797  {
798  _c4dbgpf("{}/{}: set_key_scalar_squo: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str-m_src.str, scalar.len, scalar);
799  _send_str_(scalar, ievt::KEY_|ievt::SCLR|ievt::SQUO);
801  }
802  C4_ALWAYS_INLINE void set_val_scalar_squoted(csubstr scalar)
803  {
804  _c4dbgpf("{}/{}: set_val_scalar_squo: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str-m_src.str, scalar.len, scalar);
805  _send_str_(scalar, ievt::VAL_|ievt::SCLR|ievt::SQUO);
807  }
808 
809 
810  C4_ALWAYS_INLINE void set_key_scalar_literal(csubstr scalar)
811  {
812  _c4dbgpf("{}/{}: set_key_scalar_literal: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str?size_t(scalar.str-m_src.str):m_src.len, scalar.len, scalar.str?scalar:csubstr{});
813  _send_str_(scalar, ievt::KEY_|ievt::SCLR|ievt::LITL);
815  }
816  C4_ALWAYS_INLINE void set_val_scalar_literal(csubstr scalar)
817  {
818  _c4dbgpf("{}/{}: set_val_scalar_literal: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str?size_t(scalar.str-m_src.str):m_src.len, scalar.len, scalar.str?scalar:csubstr{});
819  _send_str_(scalar, ievt::VAL_|ievt::SCLR|ievt::LITL);
821  }
822 
823 
824  C4_ALWAYS_INLINE void set_key_scalar_folded(csubstr scalar)
825  {
826  _c4dbgpf("{}/{}: set_key_scalar_folded: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str?size_t(scalar.str-m_src.str):m_src.len, scalar.len, scalar.str?scalar:csubstr{});
827  _send_str_(scalar, ievt::KEY_|ievt::SCLR|ievt::FOLD);
829  }
830  C4_ALWAYS_INLINE void set_val_scalar_folded(csubstr scalar)
831  {
832  _c4dbgpf("{}/{}: set_val_scalar_folded: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str?size_t(scalar.str-m_src.str):m_src.len, scalar.len, scalar.str?scalar:csubstr{});
833  _send_str_(scalar, ievt::VAL_|ievt::SCLR|ievt::FOLD);
835  }
836 
837 
838  C4_ALWAYS_INLINE void mark_key_scalar_unfiltered()
839  {
840  _c4dbgpf("{}/{}: mark_key_scalar_unfiltered", m_evt_pos, m_evt_size);
841  if(m_evt_pos < m_evt_size)
842  m_evt[m_evt_pos] |= ievt::UNFILT;
843  }
844  C4_ALWAYS_INLINE void mark_val_scalar_unfiltered()
845  {
846  _c4dbgpf("{}/{}: mark_val_scalar_unfiltered", m_evt_pos, m_evt_size);
847  if(m_evt_pos < m_evt_size)
848  m_evt[m_evt_pos] |= ievt::UNFILT;
849  }
850 
851  /** @} */
852 
853 private:
854 
855  /** @cond dev*/
856  #define _add_scalar_(i, scalar) \
857  _c4dbgpf("{}/{}: scalar!", i, m_evt_size); \
858  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, _is_sub_(scalar)); \
859  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, m_evt[i] & ievt::WSTR); \
860  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, i + 3 < m_evt_size); \
861  if(C4_LIKELY(scalar.is_sub(m_src))) \
862  { \
863  m_evt[i + 1] = (ievt::DataType)(scalar.str - m_src.str); \
864  } \
865  else \
866  { \
867  m_evt[i] |= ievt::AREN; \
868  m_evt[i + 1] = (ievt::DataType)(scalar.str - m_arena.str); \
869  _c4dbgpf("{}/{}: arena! ->{}", i, m_evt_size, m_evt[i+1]); \
870  } \
871  m_evt[i + 2] = (ievt::DataType)scalar.len; \
872  m_evt[i + 3] = ievt::PSTR
873  /** @endcond */
874 
875 public:
876 
877  /** @name YAML anchor/reference events */
878  /** @{ */
879 
880  void set_key_anchor(csubstr anchor)
881  {
882  _c4dbgpf("{}/{}: set_key_anchor: {}", m_evt_pos, m_evt_size, anchor);
883  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, !_has_any_(KEYREF));
884  _enable_(c4::yml::KEYANCH);
885  if(m_evt_pos + 3 < m_evt_size)
886  {
887  m_evt[m_evt_pos] |= ievt::KEY_|ievt::ANCH;
888  _add_scalar_(m_evt_pos, anchor);
889  }
890  m_evt_prev = m_evt_pos;
891  m_evt_pos += 3;
892  }
893  void set_val_anchor(csubstr anchor)
894  {
895  _c4dbgpf("{}/{}: set_val_anchor: {}", m_evt_pos, m_evt_size, anchor);
896  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, !_has_any_(VALREF));
897  _enable_(c4::yml::VALANCH);
898  if(m_evt_pos + 3 < m_evt_size)
899  {
900  m_evt[m_evt_pos] |= ievt::VAL_|ievt::ANCH;
901  _add_scalar_(m_evt_pos, anchor);
902  }
903  m_evt_prev = m_evt_pos;
904  m_evt_pos += 3;
905  }
906 
907  void set_key_ref(csubstr ref)
908  {
909  _c4dbgpf("{}/{}: set_key_ref: {}", m_evt_pos, m_evt_size, ref);
910  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, ref.begins_with('*'));
911  if(C4_UNLIKELY(_has_any_(KEYANCH)))
912  _RYML_ERR_PARSE_(m_stack.m_callbacks, m_curr->pos, "key cannot have both anchor and ref");
913  _enable_(c4::yml::KEY|c4::yml::KEYREF);
914  _send_str_(ref.sub(1), ievt::KEY_|ievt::ALIA); // skip the leading *
915  }
916  void set_val_ref(csubstr ref)
917  {
918  _c4dbgpf("{}/{}: set_val_ref: {}", m_evt_pos, m_evt_size, ref);
919  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, ref.begins_with('*'));
920  if(C4_UNLIKELY(_has_any_(VALANCH)))
921  _RYML_ERR_PARSE_(m_stack.m_callbacks, m_curr->pos, "val cannot have both anchor and ref");
922  _enable_(c4::yml::VAL|c4::yml::VALREF);
923  _send_str_(ref.sub(1), ievt::VAL_|ievt::ALIA); // skip the leading *
924  }
925 
926  /** @} */
927 
928 public:
929 
930  /** @name YAML tag events */
931  /** @{ */
932 
933  void set_key_tag(csubstr tag)
934  {
935  _c4dbgpf("{}/{}: set key tag [{}]~~~{}~~~", m_evt_pos, m_evt_size, tag.len, tag.str ? tag : csubstr("(arena full)"));
936  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, _is_sub_(tag));
937  _enable_(c4::yml::KEYTAG);
938  _send_str_(tag, ievt::KEY_|ievt::TAG_);
939  }
940  void set_val_tag(csubstr tag)
941  {
942  _c4dbgpf("{}/{}: set val tag [{}]~~~{}~~~", m_evt_pos, m_evt_size, tag.len, tag.str ? tag : csubstr("(arena full)"));
943  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, _is_sub_(tag));
944  _enable_(c4::yml::VALTAG);
945  _send_str_(tag, ievt::VAL_|ievt::TAG_);
946  }
947 
948  /** @} */
949 
950 public:
951 
952  /** @name YAML directive events */
953  /** @{ */
954 
955  void add_directive_yaml(csubstr yaml_version)
956  {
957  _c4dbgpf("{}/{}: %YAML directive! version={}", m_evt_pos, m_evt_size, yaml_version);
958  _send_str_(yaml_version, ievt::YAML);
959  }
960 
961  void add_directive_tag(csubstr handle, csubstr prefix)
962  {
963  _c4dbgpf("{}/{}: %TAG directive! handle={} prefix={} doc_id={}", m_evt_pos, m_evt_size, handle, prefix, m_curr_doc);
964  if(C4_UNLIKELY(!m_tag_directives.add(handle, prefix, m_curr_doc)))
965  _RYML_ERR_PARSE_(m_stack.m_callbacks, m_curr->pos, "too many %TAG directives");
966  _send_str_(handle, ievt::TAGH);
967  _send_str_(prefix, ievt::TAGP);
968  }
969 
970  /** @} */
971 
972 public:
973 
974  /** @name YAML structure events */
975  /** @{ */
976 
977  /** set the previous val as the first key of a new map, with flow style.
978  *
979  * See the documentation for @ref doc_event_handlers, which has
980  * important notes about this event.
981  */
983  {
984  _c4dbgpf("{}/{}: prev={} actually_val_is_first_key_of_new_map_flow", m_evt_pos, m_evt_size, m_evt_prev);
985  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, m_evt_pos > 2);
986  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, m_evt_prev > 0);
987  // BEFORE
988  // ... flag start len (free)
989  // | |
990  // prev curr
991  // AFTER
992  // ... BMAP flag start len (free)
993  // | |
994  // prev curr
995  if(m_evt_pos < m_evt_size)
996  {
997  if(m_evt[m_evt_prev] & ievt::WSTR)
998  {
999  _c4dbgpf("{}/{}: WSTR", m_evt_pos, m_evt_size);
1000  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, m_evt_prev > 0);
1001  int32_t pos = _extend_left_to_include_tag_and_or_anchor(m_evt_prev);
1002  if(m_evt_pos + 1 < m_evt_size)
1003  {
1004  for(int32_t i = pos; i <= m_evt_prev; i = _next(i))
1005  {
1006  m_evt[i] |= ievt::KEY_;
1007  m_evt[i] &= ~ievt::VAL_;
1008  }
1009  int32_t num_move = m_evt_pos + 1 - pos;
1010  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, num_move > 0);
1011  memmove(m_evt + pos + 1, m_evt + pos, (size_t)num_move * sizeof(ievt::DataType));
1012  }
1013  m_evt[pos] = ievt::BMAP|ievt::FLOW|ievt::VAL_;
1014  // move PSTR to prev
1015  if(m_evt[pos + 1] & ievt::PSTR)
1016  {
1017  m_evt[pos ] |= ievt::PSTR;
1018  m_evt[pos + 1] &= ~ievt::PSTR;
1019  }
1020  }
1021  else
1022  {
1023  _c4dbgpf("{}/{}: container key", m_evt_pos, m_evt_size);
1024  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, (m_evt[m_evt_prev] & (ievt::EMAP|ievt::ESEQ)));
1025  int32_t pos;
1026  _c4dbgpf("{}/{}: find matching open for {}", m_evt_pos, m_evt_size, m_evt_prev);
1027  if((m_evt[m_evt_prev] & ievt::EMAP) == ievt::EMAP)
1028  {
1029  pos = _find_matching_open(ievt::BMAP, ievt::EMAP, m_evt_prev);
1030  }
1031  else
1032  {
1033  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, (m_evt[m_evt_prev] & ievt::ESEQ));
1034  pos = _find_matching_open(ievt::BSEQ, ievt::ESEQ, m_evt_prev);
1035  }
1036  _c4dbgpf("{}/{}: matching open for {}={}", m_evt_pos, m_evt_size, m_evt_prev, pos);
1037  _RYML_CHECK_BASIC_(m_stack.m_callbacks, pos >= 0); // internal error
1038  _RYML_CHECK_BASIC_(m_stack.m_callbacks, pos < m_evt_prev); // internal error
1039  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, (m_evt[pos] & ievt::ESEQ) == (m_evt[m_evt_prev] & ievt::BSEQ));
1040  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, (m_evt[pos] & ievt::EMAP) == (m_evt[m_evt_prev] & ievt::BMAP));
1041  // shift the array one position to the right, starting at pos
1042  int32_t posp1 = pos + 1;
1043  if(m_evt_pos + 1 < m_evt_size)
1044  {
1045  int32_t num_move = m_evt_pos + 1 - pos;
1046  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, num_move > 0);
1047  memmove(m_evt + posp1, m_evt + pos, (size_t)num_move * sizeof(ievt::DataType));
1048  }
1049  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, posp1 < m_evt_pos);
1050  // start the map
1051  m_evt[pos] = ievt::BMAP|ievt::FLOW|ievt::VAL_;
1052  // set next as key, not val
1053  m_evt[posp1] |= ievt::KEY_;
1054  m_evt[posp1] &= ~ievt::VAL_;
1055  // move PSTR to pos
1056  if(m_evt[posp1] & ievt::PSTR)
1057  {
1058  m_evt[pos] |= ievt::PSTR;
1059  m_evt[posp1] &= ~ievt::PSTR;
1060  }
1061  }
1062  }
1063  m_curr->evt_id = m_evt_pos - 2;
1064  ++m_evt_prev;
1065  ++m_evt_pos;
1066  _enable_(c4::yml::MAP|c4::yml::FLOW_SL);
1067  _push();
1068  }
1069 
1070  /** like its flow counterpart, but this function can only be
1071  * called after the end of a flow-val at root or doc level.
1072  *
1073  * See the documentation for @ref doc_event_handlers, which has
1074  * important notes about this event.
1075  */
1077  {
1078  _c4dbgpf("{}/{}: prev={} actually_val_is_first_key_of_new_map_block", m_evt_pos, m_evt_size, m_evt_prev);
1079  if(m_evt_pos < m_evt_size)
1080  {
1081  // interpolate BMAP|VAL|BLCK after the last BDOC
1082  int32_t pos = _find_last_bdoc(m_evt_pos);
1083  if(pos >= 0)
1084  {
1085  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, pos < m_evt_size);
1086  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, pos < m_evt_pos);
1087  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, (m_evt[pos] & ievt::BDOC) == ievt::BDOC);
1088  if(m_evt_pos < m_evt_size)
1089  {
1090  ++pos; // add 1 to write after BDOC
1091  int32_t num_move = m_evt_pos - pos;
1092  int32_t posp1 = pos + 1;
1093  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, ((m_evt[pos] & ievt::BSEQ) == ievt::BSEQ) || ((m_evt[pos] & ievt::BMAP) == ievt::BMAP));
1094  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, num_move > 0);
1095  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, 0 == (m_evt[posp1] & ievt::PSTR));
1096  memmove(m_evt + posp1, m_evt + pos, (size_t)num_move * sizeof(ievt::DataType));
1097  m_evt[pos] = ievt::VAL_|ievt::BMAP|ievt::BLCK;
1098  m_evt[posp1] &= ~ievt::VAL_;
1099  m_evt[posp1] |= ievt::KEY_;
1100  }
1101  }
1102  }
1103  ++m_curr->evt_id;
1104  ++m_evt_prev;
1105  ++m_evt_pos;
1106  _push();
1107  }
1108 
1109  /** @} */
1110 
1111 public:
1112 
1113  /** @name arena events */
1114  /** @{ */
1115 
1116  substr arena()
1117  {
1118  return m_arena.first(m_arena_pos < m_arena.len ? m_arena_pos : m_arena.len);
1119  }
1120  substr arena_rem() // NOLINT
1121  {
1122  return C4_LIKELY(m_arena_pos <= m_arena.len) ? m_arena.sub(m_arena_pos) : m_arena.last(0);
1123  }
1124  /** this may fail, in which case an empty string is returned */
1125  substr alloc_arena(size_t len)
1126  {
1127  substr s = arena_rem();
1128  if(C4_LIKELY(len <= s.len))
1129  s.len = len;
1130  else
1131  s.str = nullptr;
1132  m_arena_pos += len;
1133  return s;
1134  }
1135 
1136  /** @} */
1137 
1138 public:
1139 
1140  /** @name implementation helpers */
1141  /** @{ */
1142 
1143  /** push a new parent, add a child to the new parent, and set the
1144  * child as the current node */
1145  void _push()
1146  {
1147  _stack_push();
1148  m_curr->evt_type = {};
1149  }
1150 
1151  /** end the current scope */
1152  void _pop()
1153  {
1154  _stack_pop();
1155  }
1156 
1157  template<c4::yml::type_bits bits> C4_ALWAYS_INLINE void _enable__() noexcept
1158  {
1159  m_curr->evt_type |= bits;
1160  }
1161  template<c4::yml::type_bits bits> C4_ALWAYS_INLINE void _disable__() noexcept
1162  {
1163  m_curr->evt_type &= ~bits;
1164  }
1165  template<c4::yml::type_bits bits> C4_ALWAYS_INLINE bool _has_any__() const noexcept
1166  {
1167  return (m_curr->evt_type & bits) != c4::yml::type_bits(0);
1168  }
1169 
1170  C4_ALWAYS_INLINE int32_t _next(int32_t pos) const noexcept
1171  {
1172  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, pos < m_evt_size);
1173  return pos + ((m_evt[pos] & ievt::WSTR) ? 3 : 1);
1174  }
1175 
1176  C4_ALWAYS_INLINE int32_t _prev(int32_t pos) const noexcept
1177  {
1178  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, pos < m_evt_size);
1179  return pos - ((m_evt[pos] & ievt::PSTR) ? 3 : 1);
1180  }
1181 
1182  C4_ALWAYS_INLINE bool _is_sub_(csubstr str) const noexcept
1183  {
1184  return (!str.str || str.is_sub(m_src) || str.is_sub(m_arena));
1185  }
1186 
1187  C4_ALWAYS_INLINE void _send_flag_only_(ievt::DataType flags)
1188  {
1189  _c4dbgpf("{}/{}: flag only", m_evt_pos, m_evt_size);
1190  if(m_evt_pos < m_evt_size)
1191  m_evt[m_evt_pos] |= flags;
1192  m_curr->evt_id = m_evt_pos;
1193  m_evt_prev = m_evt_pos;
1194  ++m_evt_pos;
1195  if(m_evt_pos < m_evt_size)
1196  m_evt[m_evt_pos] = {};
1197  }
1198 
1199  C4_ALWAYS_INLINE void _send_str_(csubstr scalar, ievt::DataType flags)
1200  {
1201  _c4dbgpf("{}/{}: send str", m_evt_pos, m_evt_size);
1202  if(m_evt_pos + 3 < m_evt_size)
1203  {
1204  m_evt[m_evt_pos] |= flags;
1205  _add_scalar_(m_evt_pos, scalar);
1206  }
1207  m_curr->evt_id = m_evt_pos;
1208  m_evt_prev = m_evt_pos;
1209  m_evt_pos += 3;
1210  }
1211 
1213  {
1214  if(m_parent)
1215  m_parent->has_children = true;
1216  }
1217 
1218  C4_ALWAYS_INLINE csubstr _get_latest_empty_scalar() const
1219  {
1220  // ideally we should search back in the latest event that has
1221  // a scalar, then select a zero-length scalar immediately
1222  // after that scalar. But this also works for now:
1223  return m_src.first(0);
1224  }
1225 
1226  int32_t _find_last_bdoc(int32_t pos) const
1227  {
1228  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, pos < m_evt_size); // it's safe to read from the array
1229  while(pos >= 0)
1230  {
1231  ievt::DataType e = m_evt[pos];
1232  if((e & ievt::BDOC) == ievt::BDOC)
1233  return pos;
1234  pos -= (e & ievt::PSTR) ? 3 : 1;
1235  }
1236  return -1; // LCOV_EXCL_LINE
1237  }
1238 
1239  int32_t _find_matching_open(ievt::DataType open, ievt::DataType close, int32_t pos) const
1240  {
1241  _c4dbgpf("find_matching: start at {}", pos);
1242  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, pos < m_evt_size);
1243  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, (m_evt[pos] & close) == close);
1244  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, (m_evt[pos] & open) == (close & ~ievt::END_));
1245  pos = _prev(pos); // don't count the starting close token
1246  uint32_t count = 0;
1247  while(pos >= 0)
1248  {
1249  ievt::DataType e = m_evt[pos];
1250  _c4dbgpf("find_matching: pos={} count={} e={}", pos, count, m_evt[pos]);
1251  if((e & close) == close)
1252  {
1253  _c4dbgpf(".............: pos={} close! count={} e={}", pos, count, m_evt[pos]);
1254  ++count;
1255  }
1256  else if((e & open) == open)
1257  {
1258  _c4dbgpf(".............: pos={} open! count={} e={}", pos, count, m_evt[pos]);
1259  if(!count)
1260  return pos;
1261  else
1262  --count;
1263  }
1264  pos = _prev(pos);
1265  }
1266  _c4dbgpf("find_matching: not found!", 0); // LCOV_EXCL_LINE
1267  return -1; // LCOV_EXCL_LINE
1268  }
1269 
1271  {
1272  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, pos < m_evt_size);
1273  int32_t prev = _prev(pos);
1274  while((prev > 0) && (m_evt[prev] & (ievt::TAG_|ievt::ANCH)))
1275  {
1276  _c4dbgpf("{}/{}: {} is anchor/tag. extend to {}", m_evt_pos, m_evt_size, prev, prev);
1277  pos = prev;
1278  prev = _prev(prev);
1279  }
1280  return pos;
1281  }
1282 
1283  /** @} */
1284 
1285 #undef _enable_
1286 #undef _disable_
1287 #undef _has_any_
1288 #undef _add_scalar_
1289 
1290 };
1291 
1292 /** @} */
1293 
1294 } // namespace extra
1295 } // namespace yml
1296 } // namespace c4
1297 
1298 
1299 // NOLINTEND(hicpp-signed-bitwise)
1300 C4_SUPPRESS_WARNING_GCC_CLANG_POP
1301 
1302 #endif /* _C4_YML_EXTRA_EVENT_HANDLER_INTS_HPP_ */
#define _has_any_(bits)
#define RYML_EXPORT
Definition: export.hpp:15
Callbacks const & get_callbacks()
get the global callbacks
Definition: common.cpp:94
int32_t estimate_events_ints_size(csubstr src)
Read YAML source and, without undergoing a full parse, estimate the size of the integer buffer requir...
uint32_t type_bits
the integral type necessary to cover all the bits for NodeType_e
Definition: node_type.hpp:30
@ VALANCH
the val has an &anchor
Definition: node_type.hpp:46
@ KEY_DQUO
mark key scalar as double quoted "
Definition: node_type.hpp:69
@ VALREF
a *reference: the val references an &anchor
Definition: node_type.hpp:44
@ VALNIL
the val is null (eg {a : } results in a null val)
Definition: node_type.hpp:50
@ MAP
a map: a parent of KEYVAL/KEYSEQ/KEYMAP nodes
Definition: node_type.hpp:39
@ KEY
is member of a map
Definition: node_type.hpp:37
@ VAL_FOLDED
mark val scalar as multiline, block folded >
Definition: node_type.hpp:66
@ KEYTAG
the key has a tag
Definition: node_type.hpp:47
@ FLOW_SL
mark container with single-line flow style (seqs as '[val1,val2], maps as '{key: val,...
Definition: node_type.hpp:60
@ VAL
a scalar: has a scalar (ie string) value, possibly empty. must be a leaf node, and cannot be MAP or S...
Definition: node_type.hpp:38
@ VALTAG
the val has a tag
Definition: node_type.hpp:48
@ SEQ
a seq: a parent of VAL/SEQ/MAP nodes
Definition: node_type.hpp:40
@ VAL_SQUO
mark val scalar as single quoted '
Definition: node_type.hpp:68
@ VAL_PLAIN
mark val scalar as plain scalar (unquoted, even when multiline)
Definition: node_type.hpp:72
@ KEYREF
a *reference: the key references an &anchor
Definition: node_type.hpp:43
@ BLOCK
mark container with block style (seqs as '- val ', maps as 'key: val')
Definition: node_type.hpp:62
@ KEYANCH
the key has an &anchor
Definition: node_type.hpp:45
@ VAL_DQUO
mark val scalar as double quoted "
Definition: node_type.hpp:70
@ KEY_SQUO
mark key scalar as single quoted '
Definition: node_type.hpp:67
@ VAL_LITERAL
mark val scalar as multiline, block literal |
Definition: node_type.hpp:64
@ KEY_LITERAL
mark key scalar as multiline, block literal |
Definition: node_type.hpp:63
@ KEY_PLAIN
mark key scalar as plain scalar (unquoted, even when multiline)
Definition: node_type.hpp:71
@ KEY_FOLDED
mark key scalar as multiline, block folded >
Definition: node_type.hpp:65
@ KEYNIL
the key is null (eg { : b} results in a null key)
Definition: node_type.hpp:49
@ DOC
a document
Definition: node_type.hpp:41
EventFlags
enumeration of integer event bits.
@ SCLR
scalar (=VAL in test suite events)
@ LITL
scalar: block literal (|)
@ UNFILT
special flag to mark a scalar as unfiltered (when the parser is set not to filter).
@ EMAP
end map (-MAP in test suite events)
@ DQUO
scalar: double-quoted ("")
@ FOLD
scalar: block folded (>)
@ BMAP
begin map (+MAP in test suite events)
@ TAGH
tag directive, handle: \TAG <handle> ........
@ MASK
a mask of all bits in this enumeration
@ ESTR
end stream (-STR in test suite events)
@ BSTR
begin stream (+STR in test suite events)
@ BSEQ
begin seq (+SEQ in test suite events)
@ ESEQ
end seq (-SEQ in test suite events)
@ WSTR
WithSTRing: mask of all the events that encode a string following the event. For such events,...
@ FLOW
container: flow: [] for seqs or {} for maps
@ TAGP
tag directive, prefix: \TAG ........ <prefix>
@ VAL_
as value special flag to enable look-back in the event array. it signifies that the previous event ha...
@ BDOC
begin doc (+DOC in test suite events)
@ BLCK
container: block
@ AREN
IMPORTANT. Marks events whose string was placed in the arena. This happens when the filtered string i...
@ YAML
yaml directive: \YAML <version>
@ ALIA
*ref (reference)
@ EDOC
end doc (-DOC in test suite events)
@ LAST
the last flag defined above
@ EXPL
--- (with BDOC) or ... (with EDOC)
@ SQUO
scalar: single-quoted (')
int32_t DataType
data type for integer events.
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition: common.hpp:244
@ RTOP
reading at top level
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition: common.cpp:14
A c-style callbacks class to customize behavior on errors or allocation.
Definition: common.hpp:538
Use this class a base of implementations of event handler to simplify the stack logic.
Accelerator structure to reduce memory requirements by enabling reuse of resolved tags.
Definition: tag.hpp:69
void clear() noexcept
Definition: tag.hpp:91
void clear() noexcept
Definition: tag.cpp:373
TagDirective const * add(csubstr handle, csubstr prefix, id_type doc_id) noexcept
Definition: tag.cpp:358
A parser event handler that creates a compact representation of the YAML tree in a contiguous buffer ...
void begin_doc_expl()
explicit doc start, with —
void end_doc_expl()
explicit doc end, with ...
void add_directive_tag(csubstr handle, csubstr prefix)
void set_key_scalar_literal(csubstr scalar)
void start_parse(const char *filename, substr src)
int32_t _next(int32_t pos) const noexcept
bool fits_buffers() const
Predicate to test if the event and arena buffers successfully accomodated all the parse events.
int32_t _find_matching_open(ievt::DataType open, ievt::DataType close, int32_t pos) const
void begin_doc()
implicit doc start (without —)
void _send_flag_only_(ievt::DataType flags)
substr alloc_arena(size_t len)
this may fail, in which case an empty string is returned
void actually_val_is_first_key_of_new_map_flow()
set the previous val as the first key of a new map, with flow style.
void set_key_scalar_squoted(csubstr scalar)
void set_key_scalar_folded(csubstr scalar)
EventHandlerInts(c4::yml::Callbacks const &cb)
void actually_val_is_first_key_of_new_map_block()
like its flow counterpart, but this function can only be called after the end of a flow-val at root o...
void set_val_scalar_literal(csubstr scalar)
int required_size_events() const
get the size needed for the event buffer from the previous parse
void set_val_scalar_folded(csubstr scalar)
int32_t _extend_left_to_include_tag_and_or_anchor(int32_t pos) const
void set_val_scalar_dquoted(csubstr scalar)
void _pop()
end the current scope
void _send_str_(csubstr scalar, ievt::DataType flags)
void set_key_scalar_dquoted(csubstr scalar)
size_t required_size_arena() const
get the size needed for the arena from the previous parse
int32_t _prev(int32_t pos) const noexcept
int32_t _find_last_bdoc(int32_t pos) const
bool _is_sub_(csubstr str) const noexcept
void end_doc()
implicit doc end (without ...)
void add_directive_yaml(csubstr yaml_version)
void _push()
push a new parent, add a child to the new parent, and set the child as the current node
void set_val_scalar_squoted(csubstr scalar)
void reset(substr str, substr arena, ievt::DataType *dst, int32_t dst_size)