rapidyaml  0.11.1
parse and emit YAML, and do it fast
event_handler_ints.hpp
Go to the documentation of this file.
1 #ifndef _C4_YML_EXTRA_EVENT_HANDLER_INTS_HPP_
2 #define _C4_YML_EXTRA_EVENT_HANDLER_INTS_HPP_
3 
4 /** @file event_handler_ints.hpp An event handler that creates an
5  * integer buffer with a very compact representation of the YAML tree
6  * in a source buffer. This is not part of the main rapidyaml library.
7  *
8  * @see c4::yml::extra::ievt::EventFlags
9  * @see c4::yml::extra::EventHandlerInts
10  * */
11 
12 #ifndef RYML_SINGLE_HEADER
13 #ifndef _C4_YML_NODE_TYPE_HPP_
14 #include <c4/yml/node_type.hpp>
15 #endif
16 #ifndef _C4_YML_EVENT_HANDLER_STACK_HPP_
18 #endif
19 #ifndef _C4_YML_TAG_HPP_
20 #include <c4/yml/tag.hpp>
21 #endif
22 #ifndef _C4_YML_DETAIL_DBGPRINT_HPP_
23 #include <c4/yml/detail/dbgprint.hpp>
24 #endif
25 #endif
26 
27 // NOLINTBEGIN(hicpp-signed-bitwise)
28 
29 namespace c4 {
30 namespace yml {
31 namespace extra {
32 
33 /** @addtogroup doc_event_handlers
34  * @{ */
35 
36 namespace ievt {
37 
38 /** data type for integer events. This is set to a 32 bit signed
39  * integer to allow compatibility with a wide range of processing
40  * languages. */
41 using DataType = int32_t;
42 
43 /** enumeration of integer event bits. */
44 typedef enum : DataType {
45 
46  // Structure flags
47  KEY_ = (1 << 0), ///< as key
48  VAL_ = (1 << 1), ///< as value
49  /// special flag to enable look-back in the event array. it
50  /// signifies that the previous event has a string, meaning that
51  /// the jump back to that event is 3 positions. without this flag it
52  /// would be impossible to jump to the previous event.
53  /// see also @ref WSTR
54  PSTR = (1 << 2),
55  /// IMPORTANT. Marks events whose string was placed in the
56  /// arena. This happens when the filtered string is larger than the
57  /// original string in the YAML code (eg from tags that resolve to
58  /// a larger string, or from "\L" or "\P" in double quotes, which
59  /// expand from two to three bytes). Because of this size
60  /// expansion, the filtered string cannot be placed in the original
61  /// source and needs to be placed in the arena.
62  AREN = (1 << 3),
63 
64  // Event scopes
65  BEG_ = (1 << 5), ///< scope: begin
66  END_ = (1 << 6), ///< scope: end
67  SEQ_ = (1 << 7), ///< scope: seq
68  MAP_ = (1 << 8), ///< scope: map
69  DOC_ = (1 << 9), ///< scope: doc
70  EXPL = (1 << 10), ///< `---` (with BDOC) or `...` (with EDOC)
71  STRM = (1 << 11), ///< scope: stream
72  BSEQ = BEG_|SEQ_, ///< begin seq (+SEQ in test suite events)
73  ESEQ = END_|SEQ_, ///< end seq (-SEQ in test suite events)
74  BMAP = BEG_|MAP_, ///< begin map (+MAP in test suite events)
75  EMAP = END_|MAP_, ///< end map (-MAP in test suite events)
76  BSTR = BEG_|STRM, ///< begin stream (+STR in test suite events)
77  ESTR = END_|STRM, ///< end stream (-STR in test suite events)
78  BDOC = BEG_|DOC_, ///< begin doc (+DOC in test suite events)
79  EDOC = END_|DOC_, ///< end doc (-DOC in test suite events)
80 
81  // Single events
82  SCLR = (1 << 12), ///< scalar (=VAL in test suite events)
83  ALIA = (1 << 13), ///< *ref (reference)
84  ANCH = (1 << 14), ///< &anchor
85  TAG_ = (1 << 15), ///< !tag
86 
87  // Style flags
88  PLAI = (1 << 16), ///< scalar: plain
89  SQUO = (1 << 17), ///< scalar: single-quoted (')
90  DQUO = (1 << 18), ///< scalar: double-quoted ("")
91  LITL = (1 << 19), ///< scalar: block literal (|)
92  FOLD = (1 << 20), ///< scalar: block folded (>)
93  FLOW = (1 << 21), ///< container: flow: [] for seqs or {} for maps
94  BLCK = (1 << 22), ///< container: block
95 
96  // Directive flags
97  YAML = (1 << 23), ///< yaml directive: `\%YAML <version>`
98  TAGD = (1 << 24), ///< tag directive, name : `\%TAG <name> .......`
99  TAGV = (1 << 25), ///< tag directive, value: `\%TAG ...... <value>`
100 
101  /// special flag to mark a scalar as unfiltered (when the parser
102  /// is set not to filter).
103  UNFILT = (1 << 26),
104 
105  // Utility flags/masks
106  /// the last flag defined above
108  /// a mask of all bits in this enumeration
109  MASK = (LAST << 1) - 1,
110 
111  /// WithSTRing: mask of all the events that encode a string
112  /// following the event. For such events, the next two integers
113  /// will provide respectively the string's offset and length. See
114  /// also @ref PSTR.
116 
117 } EventFlags;
118 
119 } // namespace ievt
120 
121 /** @} */
122 
123 } // namespace extra
124 } // namespace yml
125 } // namespace c4
126 
127 
128 //-----------------------------------------------------------------------------
129 //-----------------------------------------------------------------------------
130 //-----------------------------------------------------------------------------
131 
132 namespace c4 {
133 namespace yml {
134 namespace extra {
135 
136 /** @addtogroup doc_event_handlers
137  * @{ */
138 
139 /** Read YAML source and, without undergoing a full parse, estimate
140  * the size of the integer buffer required for @ref
141  * EventHandlerInts. This estimation is meant to exceed the actual
142  * number of required events.
143  *
144  * @note This function must overpredict. It does so for every case in
145  * the hundreds/thousands of extensive tests of rapidyaml -- both for
146  * the YAML test suite and the internal cases. If you find a case
147  * where that does not hold, it is a bug. Please report it at
148  * https://github.com/biojppm/rapidyaml/issues! */
149 RYML_EXPORT int32_t estimate_events_ints_size(csubstr src);
150 
151 /** @} */
152 
153 } // namespace extra
154 } // namespace yml
155 } // namespace c4
156 
157 
158 //-----------------------------------------------------------------------------
159 //-----------------------------------------------------------------------------
160 //-----------------------------------------------------------------------------
161 
162 C4_SUPPRESS_WARNING_GCC_CLANG_PUSH
163 C4_SUPPRESS_WARNING_GCC_CLANG("-Wold-style-cast")
164 C4_SUPPRESS_WARNING_GCC("-Wuseless-cast")
165 
166 namespace c4 {
167 namespace yml {
168 namespace extra {
169 
170 
171 /** @addtogroup doc_event_handlers
172  * @{ */
173 
174 /** @cond dev */
175 struct EventHandlerIntsState : public c4::yml::ParserState
176 {
177  c4::yml::type_bits evt_type;
178  int32_t evt_id;
179 };
180 /** @endcond */
181 
182 
183 /** A parser event handler that creates a compact representation of
184  * the YAML tree in a buffer of integers (see @ref ievt::EventFlags)
185  * containing masks (to represent events) and offset+length (to
186  * represent strings in the source buffer).
187  *
188  * This is meant for use by other programming languages, and supports
189  * container keys (unlike the ryml tree). It parses faster than the ryml
190  * tree parser, because the resulting data structure is much simpler.
191  *
192  * The resulting integer buffer is a linear array of integers containing
193  * events (as a mask of @ref ievt::EventFlags), which in some cases (see
194  * @ref ievt::WSTR) are followed by an encoded string (encoded as an
195  * offset and length to the parsed source buffer).
196  *
197  * For example, parsing `[a, bb, ccc]` results in the following event
198  * buffer (grouped to highlight the event sequence structure):
199  *
200  * ```c++
201  * using namespace c4::yml::extra::ievt;
202  * const DataType arr[] = { // result of parsing: [a, bb, ccc]
203  * BSTR, // begin stream
204  * BDOC, // begin doc
205  * VAL_|BSEQ|FLOW, // begin seq as val, flow
206  * VAL_|SCLR|PLAI, 1, 1, // val scalar, plain style: "a" starts at offset 1 and has length 1
207  * VAL_|SCLR|PLAI|PSTR, 4, 2, // val scalar, plain style: "bb" starts at offset 4 and has length 2; preceded by a string event (PSTR)
208  * VAL_|SCLR|PLAI|PSTR, 8, 3, // val scalar, plain style: "ccc" starts at offset 8 and has length 3; preceded by a string event (PSTR)
209  * ESEQ|PSTR, // end seq; preceded by a string event (PSTR)
210  * EDOC, // end doc
211  * ESTR, // end stream
212  * };
213  * ```
214  *
215  * Here is a sketch clarifying the meaning of this event sequence:
216  *
217 @code
218 source : [a, bb, ccc]
219  has a string........
220  | offset "a"
221  | | length "a"
222  | | |
223  event0 event1 event2 [ event3 "a"......|..|
224  | | | | | |
225 (start) +--------+-------+------------------+---------------+--+-----> (continued)
226 arr[i] : BSTR BDOC VAL_|BSEQ|FLOW VAL_|SCLR|PLAI..1..1
227 i : 0 1 2 3 4 5
228 
229 
230  has a string............. has a string.............
231  | offset "bb" | offset "ccc"
232  | | length "bb" | | length "ccc"
233  | | | | | |
234  event4 "bb"..........|..| event5 "ccc".........|..|
235  | | | | | |
236  (cont)--> -----+--------------------+--+--------------+--------------------+--+-----> (continued)
237 arr[i] : VAL_|SCLR|PLAI|PSTR..4..2 VAL_|SCLR|PLAI|PSTR..8..3
238 i : 6 | 7 8 9 | 10 11
239  | |
240  prev event has string prev event has string
241  (to get to prev, jump (to get to prev, jump
242  back 3 slots: ie 6->3) back 3 slots: ie 9->6)
243 
244 
245 
246  event6 ] event7 event8
247  | | |
248  (cont)--> -----+-------------+--------+-----| (end)
249 arr[i] : ESEQ|PSTR EDOC ESTR
250 i : 12 | 13 14
251  |
252  prev event has string
253  (to get to it, jump
254  back 3 slots: ie 12->9)
255 @endcode
256  *
257  * Note that the buffer contains both events and strings encoded as
258  * integer pairs. That is, events that have an associated string are
259  * immediately followed by two integers providing the offset and length
260  * of that string in the source buffer. (In the example above, this
261  * happens in the events for the strings `a`, `bb`, and `ccc` at
262  * positions 3, 6 and 9, respectively).
263  *
264  * The flag @ref ievt::PSTR and the mask @ref ievt::WSTR are provided to
265  * enable easier iteration over the array: you can use them to test for
266  * presence of a string when iterating over the array.
267  *
268  * The flag @ref ievt::PSTR announces that an event is *preceded* by a
269  * string. That is, the previous event has a string, so that when this
270  * flag is found while iterating right-to-left, a jump of -3 should be
271  * used to get at the bitmask of the previous event. (In the example
272  * above, this flag is present for the events for `bb` and `ccc`, but not
273  * `a` because it is not preceded by a string).
274  *
275  * Likewise, to signify that the current event is *followed* by a string,
276  * there is the mask @ref ievt::WSTR, which is a mask of all the flags of
277  * events that have a string: @ref ievt::SCLR, @ref ievt::ALIA, @ref
278  * ievt::ANCH and @ref ievt::TAG_. While iterating left-to-right in the
279  * array, presence of any of the bits in the mask @ref ievt::WSTR means
280  * that a jump of +3 should be employed to get at the bitmask of the next
281  * event.
282  *
283  * Here's another example with the result of parsing `a: bb`
284  * ```c++
285  * const DataType arr[] = { // result of parsing: `a: bb`
286  * BSTR, // begin stream
287  * BDOC, // begin doc
288  * VAL_|BMAP|BLCK, // begin map as val, block
289  * KEY_|SCLR|PLAI, 0, 1, // key scalar, plain style: "a" starts at offset 0 and has length 1
290  * VAL_|SCLR|PLAI|PSTR, 3, 2, // val scalar, plain style: "bb" starts at offset 3 and has length 2
291  * EMAP|PSTR, // end map
292  * EDOC, // end doc
293  * ESTR, // end stream
294  * };
295  * ```
296  *
297  * Typical code to iterate left-to-right over the array will look like
298  * this:
299  *
300  * ```c++
301  * // source buffer, modified in place during parsing (IMPORTANT!)
302  * substr src = ...;
303  * substr arena = ...; // arena used for scalars/tags that are extended during filtering
304  * // events resulting from parsing
305  * const int events[] = {...};
306  * int events_size = ...;
307  * for(int i = 0; i < events_size; ++i)
308  * {
309  * if(events[i] & ievt::WSTR) // this event has a string following it
310  * {
311  * size_t offset = (size_t)events[i+1];
312  * size_t length = (size_t)events[i+2];
313  * csubstr region = (events[i] & ievt::AREN) ? arena : src; // is the string in the arena?
314  * csubstr str = region.sub(offset, length); // get the string
315  * ...
316  * i += 2; // skip the two ints of the string
317  * // (the jump is three places; the loop adds the other place)
318  * }
319  * else // this is a single-int event
320  * {
321  * ...
322  * }
323  * }
324  * ```
325  *
326  * This handler must be initialized with the input source buffer, the
327  * output arena, and the output event buffer. This handler will not take
328  * ownership nor attempt to resize the output buffer. If the size
329  * required for the output buffer or arena are larger than their actual
330  * size, parsing goes all way to the end, determining the required buffer
331  * sizes without writing anything past the end of the respective
332  * buffer. After parsing is finished, the user must ensure that the
333  * buffer size was enough to accomodate all the data that needs to be
334  * written into it, or react accordingly (eg, throw an error, or resize
335  * the buffer then retry the parse).
336  *
337  * A couple of functions will be helpful to do this. After parsing, @ref
338  * EventHandlerInts::fits_buffers() must be used to verify that the
339  * output buffers were enough to accomodate the results. Then, @ref
340  * EventHandlerInts::required_size_events() and @ref
341  * EventHandlerInts::required_size_arena() can be used to retrieve to
342  * necessary information. To get an estimation of the number of events
343  * before parsing, see @ref estimate_events_ints_size().
344  *
345  * Typical code to parse YAML with this handler will look like this:
346  *
347  * ```c++
348  * csubstr filename = ...;
349  * substr src = ...;
350  * // estimate the size required for the events buffer,
351  * // overpredicting it to be safe.
352  * int estimated_size = extra::estimate_events_ints_size(src);
353  * extra::EventHandlerInts handler;
354  * ParseEngine<extra::EventHandlerInts> parser(&handler);
355  * // example with a vector
356  * std::vector<int> evts;
357  * // ensure we have a fighting chance to acommodate the events
358  * evts.resize((size_t)estimated_size);
359  * // arena to place scalars/tags that may have been extended after filtering
360  * std::vector<char> arena;
361  * arena.resize(src.len); // this is generally enough
362  * // initialize the handler
363  * handler.reset(src, arena, evts.data(), (int)evts.size());
364  * // parse the YAML
365  * parser.parse_in_place_ev(filename, src);
366  * if(handler.fits_buffers()) // were the buffer sizes enough?
367  * {
368  * evts.resize((size_t)handler.required_size_events()); // trim the vector
369  * ...
370  * }
371  * else
372  * {
373  * // event size estimation underpredicted, or arena is too small!
374  * // for the first case, open an issue at
375  * // https://github.com/biojppm/rapidyaml/issues
376  * error("buffer could not accomodate all the events");
377  * // NOTE: see below for notes on doing a parse retry.
378  * }
379  * ```
380  *
381  * The result of @ref estimate_events_ints_size() (click to see more
382  * info) must be an overprediction: it overpredicts for every single
383  * case among the many hundreds covered in the unit tests. This is
384  * deliberate, and aims at ensuring that a retry parse is not
385  * needed. But conceivably, it may underpredict in some instances not
386  * found in the out tests. What to do then?
387  *
388  * First, [open an issue](https://github.com/biojppm/rapidyaml/issues) to
389  * allow the estimation to be improved! Second, there are two ways to
390  * handle this situation in code:
391  *
392  * 1) throw an error (as sketched above)
393  *
394  * 2) grow the buffer to the required size (see @ref
395  * EventHandlerInts::required_size_events()), and then parse
396  * again
397  *
398  * If your code must be able to handle any case including where the
399  * prediction undershoots before the estimate function is fixed (after
400  * you open the issue), that is, if you are considering a parse retry,
401  * there is something important that needs attention. The YAML source
402  * buffer is mutated in-place during the parse, and cannot be used to
403  * parse again. So if you want to retry, you need to keep a pristine
404  * copy of the source, and use it for the retry:
405  *
406  * ```c++
407  * const std::string src = ...; // the YAML code to be parsed
408  * std::string parsed_src = src; // this is where we will parse (filter during parsing)
409  * std::vector<int> evts((size_t)estimated_size); // ensure we have a fighting change to acommodate the events
410  * std::vector<char> arena(src.size()); // ensure we have a fighting change to acommodate the events
411  * ParseEngine<extra::EventHandlerInts> parser(&handler);
412  * handler.reset(to_substr(parsed_src), to_substr(arena), evts.data(), (int)evts.size());
413  * parser.parse_in_place_ev(filename, to_substr(parsed_src));
414  * if(handler.fits_buffers()) // were the buffer sizes enough?
415  * {
416  * evts.resize((size_t)handler.required_size()); // trim the vector
417  * ...
418  * }
419  * else
420  * {
421  * evts.resize((size_t)handler.required_size_events()); // buffer size was not enough.
422  * arena.resize(handler.required_size_arena()); // buffer size was not enough.
423  * // copy again
424  * parsed_src = src;
425  * // retry parse
426  * handler.reset(to_substr(parsed_src), to_substr(arena), evts.data(), (int)evts.size());
427  * parser.parse_in_place_ev(filename, to_substr(parsed_src));
428  * assert((size_t)handler.fits_buffers()); // must always be true
429  * }
430  * ```
431  *
432  * When bringing this to other programming languages, the semantics
433  * will be very similar to this.
434  */
435 struct EventHandlerInts : public c4::yml::EventHandlerStack<EventHandlerInts, EventHandlerIntsState>
436 {
437 
438  /** @name types
439  * @{ */
440 
442  using state = EventHandlerIntsState; // our internal state must inherit from parser state
443 
444  /** @} */
445 
446 public:
447 
448  /** @cond dev */
449  ievt::DataType * m_evt;
450  int32_t m_evt_pos;
451  int32_t m_evt_prev;
452  int32_t m_evt_size;
453  substr m_arena;
454  size_t m_arena_pos;
455  TagDirective m_tag_directives[RYML_MAX_TAG_DIRECTIVES];
456  bool m_has_yaml_directive;
457  bool m_has_docs;
458 
459  // undefined at the end
460  #define _enable_(bits) _enable__<bits>()
461  #define _disable_(bits) _disable__<bits>()
462  #define _has_any_(bits) _has_any__<bits>()
463  /** @endcond */
464 
465 public:
466 
467  /** @name construction and resetting
468  * @{ */
469 
471  : EventHandlerStack(cb)
472  {
473  reset(csubstr{}, substr{}, nullptr, 0);
474  }
477  {
478  }
479 
480  void reset(csubstr str, substr arena, ievt::DataType *dst, int32_t dst_size)
481  {
482  _stack_reset_root();
483  m_curr->flags |= c4::yml::RUNK|c4::yml::RTOP;
484  m_curr->evt_type = {};
485  m_curr->evt_id = 0;
486  m_arena = arena;
487  m_arena_pos = 0;
488  m_src = str;
489  m_evt = dst;
490  m_evt_size = dst_size;
491  m_evt_pos = 0;
492  m_evt_prev = 0;
493  m_has_docs = false;
494  m_has_yaml_directive = false;
495  for(TagDirective &td : m_tag_directives)
496  td = {};
497  }
498 
499  /** get the size needed for the event buffer from the previous parse
500  * @warning this is valid only until the next parse */
502  {
503  return m_evt_pos;
504  }
505 
506  /** get the size needed for the arena from the previous parse
507  * @warning this is valid only until the next parse */
508  size_t required_size_arena() const
509  {
510  return m_arena_pos;
511  }
512 
513  /** Predicate to test if the event buffer successfully accomodated
514  * all the parse events.
515  * @warning this is valid only until the next parse */
516  bool fits_buffers() const
517  {
518  return m_evt_pos <= m_evt_size && m_arena_pos <= m_arena.len;
519  }
520 
521  void reserve_arena(int /*arena_size*/)
522  {
523  // does not apply here
524  }
525 
526  /** @} */
527 
528 public:
529 
530  /** @name parse events
531  * @{ */
532 
533  void start_parse(const char* filename, csubstr src, c4::yml::detail::pfn_relocate_arena relocate_arena, void *relocate_arena_data)
534  {
535  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, src.str == m_src.str);
536  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, src.len == m_src.len);
537  this->_stack_start_parse(filename, src, relocate_arena, relocate_arena_data);
538  }
539 
541  {
542  if((_num_tag_directives() || m_has_yaml_directive) && !m_has_docs)
543  _RYML_ERR_PARSE_(m_stack.m_callbacks, m_curr->pos, "directives cannot be used without a document");
544  this->_stack_finish_parse();
545  }
546 
548  {
549  while(m_stack.size() > 1)
550  _pop();
551  }
552 
553  /** @} */
554 
555 public:
556 
557  /** @name YAML stream events */
558  /** @{ */
559 
561  {
562  _send_flag_only_(ievt::BSTR);
563  }
564 
565  void end_stream()
566  {
567  _send_flag_only_(ievt::ESTR);
568  }
569 
570  /** @} */
571 
572 public:
573 
574  /** @name YAML document events */
575  /** @{ */
576 
577  /** implicit doc start (without ---) */
578  void begin_doc()
579  {
580  _c4dbgpf("{}/{}: begin_doc", m_evt_pos, m_evt_size);
581  _send_flag_only_(ievt::BDOC);
582  if(_stack_should_push_on_begin_doc())
583  {
584  _c4dbgp("push!");
585  _push();
586  _enable_(DOC);
587  }
588  m_has_docs = true;
589  }
590  /** implicit doc end (without ...) */
591  void end_doc()
592  {
593  _c4dbgpf("{}/{}: end_doc", m_evt_pos, m_evt_size);
594  _send_flag_only_(ievt::EDOC);
595  if(_stack_should_pop_on_end_doc())
596  {
597  _c4dbgp("pop!");
598  _pop();
599  }
600  }
601 
602  /** explicit doc start, with --- */
604  {
605  _c4dbgpf("{}/{}: begin_doc_expl", m_evt_pos, m_evt_size);
606  _send_flag_only_(ievt::BDOC|ievt::EXPL);
607  _c4dbgp("push!");
608  _push();
609  _enable_(DOC);
610  m_has_docs = true;
611  }
612  /** explicit doc end, with ... */
614  {
615  _c4dbgpf("{}/{}: end_doc_expl", m_evt_pos, m_evt_size);
616  _send_flag_only_(ievt::EDOC|ievt::EXPL);
617  if(_stack_should_pop_on_end_doc())
618  {
619  _c4dbgp("pop!");
620  _pop();
621  }
622  m_has_yaml_directive = false;
623  }
624 
625  /** @} */
626 
627 public:
628 
629  /** @name YAML map functions */
630  /** @{ */
631 
633  {
634  _c4dbgpf("{}/{}: bmap key flow", m_evt_pos, m_evt_size);
635  _send_flag_only_(ievt::KEY_|ievt::BMAP|ievt::FLOW);
636  _mark_parent_with_children_();
638  _push();
639  }
641  {
642  _c4dbgpf("{}/{}: bmap key block", m_evt_pos, m_evt_size);
643  _send_flag_only_(ievt::KEY_|ievt::BMAP|ievt::BLCK);
644  _mark_parent_with_children_();
646  _push();
647  }
648 
650  {
651  _c4dbgpf("{}/{}: bmap flow", m_evt_pos, m_evt_size);
652  _send_flag_only_(ievt::VAL_|ievt::BMAP|ievt::FLOW);
653  _mark_parent_with_children_();
654  _enable_(c4::yml::MAP|c4::yml::FLOW_SL);
655  _push();
656  }
658  {
659  _c4dbgpf("{}/{}: bmap block", m_evt_pos, m_evt_size);
660  _send_flag_only_(ievt::VAL_|ievt::BMAP|ievt::BLCK);
661  _mark_parent_with_children_();
662  _enable_(c4::yml::MAP|c4::yml::BLOCK);
663  _push();
664  }
665 
667  {
668  _pop();
669  _send_flag_only_(ievt::EMAP);
670  }
671 
672  void end_map_flow(bool /*multiline*/)
673  {
674  _pop();
675  _send_flag_only_(ievt::EMAP);
676  }
677 
678  /** @} */
679 
680 public:
681 
682  /** @name YAML seq events */
683  /** @{ */
684 
686  {
687  _c4dbgpf("{}/{}: bseq key flow", m_evt_pos, m_evt_size);
688  _send_flag_only_(ievt::KEY_|ievt::BSEQ|ievt::FLOW);
689  _mark_parent_with_children_();
691  _push();
692  }
694  {
695  _c4dbgpf("{}/{}: bseq key block", m_evt_pos, m_evt_size);
696  _send_flag_only_(ievt::KEY_|ievt::BSEQ|ievt::BLCK);
697  _mark_parent_with_children_();
699  _push();
700  }
701 
703  {
704  _c4dbgpf("{}/{}: bseq flow", m_evt_pos, m_evt_size);
705  _send_flag_only_(ievt::VAL_|ievt::BSEQ|ievt::FLOW);
706  _mark_parent_with_children_();
707  _enable_(c4::yml::SEQ|c4::yml::FLOW_SL);
708  _push();
709  }
711  {
712  _c4dbgpf("{}/{}: bseq block", m_evt_pos, m_evt_size);
713  _send_flag_only_(ievt::VAL_|ievt::BSEQ|ievt::BLCK);
714  _mark_parent_with_children_();
715  _enable_(c4::yml::SEQ|c4::yml::BLOCK);
716  _push();
717  }
718 
720  {
721  _pop();
722  _send_flag_only_(ievt::ESEQ);
723  }
724 
725  void end_seq_flow(bool /*multiline*/)
726  {
727  _pop();
728  _send_flag_only_(ievt::ESEQ);
729  }
730 
731  /** @} */
732 
733 public:
734 
735  /** @name YAML structure events */
736  /** @{ */
737 
738  void add_sibling()
739  {
740  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, m_parent);
741  m_curr->evt_type = {};
742  }
743 
744  /** set the previous val as the first key of a new map, with flow style.
745  *
746  * See the documentation for @ref doc_event_handlers, which has
747  * important notes about this event.
748  */
750  {
751  _c4dbgpf("{}/{}: prev={} actually_val_is_first_key_of_new_map_flow", m_evt_pos, m_evt_size, m_evt_prev);
752  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, m_evt_pos > 2);
753  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, m_evt_prev > 0);
754  // BEFORE
755  // ... flag start len (free)
756  // | |
757  // prev curr
758  // AFTER
759  // ... BMAP flag start len (free)
760  // | |
761  // prev curr
762  if(m_evt_prev + 1 < m_evt_size)
763  {
764  if(m_evt[m_evt_prev] & ievt::WSTR)
765  {
766  _c4dbgpf("{}/{}: WSTR", m_evt_pos, m_evt_size);
767  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, m_evt_prev > 0);
768  int32_t pos = _extend_left_to_include_tag_and_or_anchor(m_evt_prev);
769  if(m_evt_pos + 1 < m_evt_size)
770  {
771  for(int32_t i = pos; i <= m_evt_prev; i = _next(i))
772  {
773  m_evt[i] |= ievt::KEY_;
774  m_evt[i] &= ~ievt::VAL_;
775  }
776  int32_t num_move = m_evt_pos + 1 - pos;
777  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, num_move > 0);
778  memmove(m_evt + pos + 1, m_evt + pos, (size_t)num_move * sizeof(ievt::DataType));
779  }
780  m_evt[pos] = ievt::BMAP|ievt::FLOW|ievt::VAL_;
781  // move PSTR to prev
782  if(m_evt[pos + 1] & ievt::PSTR)
783  {
784  m_evt[pos ] |= ievt::PSTR;
785  m_evt[pos + 1] &= ~ievt::PSTR;
786  }
787  }
788  else
789  {
790  _c4dbgpf("{}/{}: container key", m_evt_pos, m_evt_size);
791  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, (m_evt[m_evt_prev] & (ievt::EMAP|ievt::ESEQ)));
792  int32_t pos;
793  _c4dbgpf("{}/{}: find matching open for {}", m_evt_pos, m_evt_size, m_evt_prev);
794  if((m_evt[m_evt_prev] & ievt::EMAP) == ievt::EMAP)
795  {
796  pos = _find_matching_open(ievt::BMAP, ievt::EMAP, m_evt_prev);
797  }
798  else
799  {
800  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, (m_evt[m_evt_prev] & ievt::ESEQ));
801  pos = _find_matching_open(ievt::BSEQ, ievt::ESEQ, m_evt_prev);
802  }
803  _c4dbgpf("{}/{}: matching open for {}={}", m_evt_pos, m_evt_size, m_evt_prev, pos);
804  _RYML_CHECK_BASIC_(m_stack.m_callbacks, pos >= 0); // internal error
805  _RYML_CHECK_BASIC_(m_stack.m_callbacks, pos < m_evt_prev); // internal error
806  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, (m_evt[pos] & ievt::ESEQ) == (m_evt[m_evt_prev] & ievt::BSEQ));
807  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, (m_evt[pos] & ievt::EMAP) == (m_evt[m_evt_prev] & ievt::BMAP));
808  // shift the array one position to the right, starting at pos
809  int32_t posp1 = pos + 1;
810  if(m_evt_pos + 1 < m_evt_size)
811  {
812  int32_t num_move = m_evt_pos + 1 - pos;
813  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, num_move > 0);
814  memmove(m_evt + posp1, m_evt + pos, (size_t)num_move * sizeof(ievt::DataType));
815  }
816  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, posp1 < m_evt_pos);
817  // start the map
818  m_evt[pos] = ievt::BMAP|ievt::FLOW|ievt::VAL_;
819  // set next as key, not val
820  m_evt[posp1] |= ievt::KEY_;
821  m_evt[posp1] &= ~ievt::VAL_;
822  // move PSTR to pos
823  if(m_evt[posp1] & ievt::PSTR)
824  {
825  m_evt[pos] |= ievt::PSTR;
826  m_evt[posp1] &= ~ievt::PSTR;
827  }
828  }
829  }
830  m_curr->evt_id = m_evt_pos - 2;
831  ++m_evt_prev;
832  ++m_evt_pos;
833  _enable_(c4::yml::MAP|c4::yml::FLOW_SL);
834  _push();
835  }
836 
837  /** like its flow counterpart, but this function can only be
838  * called after the end of a flow-val at root or doc level.
839  *
840  * See the documentation for @ref doc_event_handlers, which has
841  * important notes about this event.
842  */
844  {
845  _c4dbgpf("{}/{}: prev={} actually_val_is_first_key_of_new_map_block", m_evt_pos, m_evt_size, m_evt_prev);
846  if(m_evt_prev < m_evt_size)
847  {
848  // interpolate BMAP|VAL|BLCK after the last BDOC
849  int32_t pos = _find_last_bdoc(m_evt_pos);
850  if(pos >= 0)
851  {
852  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, pos < m_evt_size);
853  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, pos < m_evt_pos);
854  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, (m_evt[pos] & ievt::BDOC) == ievt::BDOC);
855  if(m_evt_pos < m_evt_size)
856  {
857  ++pos; // add 1 to write after BDOC
858  int32_t num_move = m_evt_pos - pos;
859  int32_t posp1 = pos + 1;
860  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, ((m_evt[pos] & ievt::BSEQ) == ievt::BSEQ) || ((m_evt[pos] & ievt::BMAP) == ievt::BMAP));
861  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, num_move > 0);
862  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, 0 == (m_evt[posp1] & ievt::PSTR));
863  memmove(m_evt + posp1, m_evt + pos, (size_t)num_move * sizeof(ievt::DataType));
864  m_evt[pos] = ievt::VAL_|ievt::BMAP|ievt::BLCK;
865  m_evt[posp1] &= ~ievt::VAL_;
866  m_evt[posp1] |= ievt::KEY_;
867  }
868  }
869  }
870  ++m_curr->evt_id;
871  ++m_evt_prev;
872  ++m_evt_pos;
873  _push();
874  }
875 
876  /** @} */
877 
878 public:
879 
880  /** @cond dev */
881  int32_t _find_last_bdoc(int32_t pos) const
882  {
883  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, pos < m_evt_size); // it's safe to read from the array
884  while(pos >= 0)
885  {
886  ievt::DataType e = m_evt[pos];
887  if((e & ievt::BDOC) == ievt::BDOC)
888  return pos;
889  pos -= (e & ievt::PSTR) ? 3 : 1;
890  }
891  return -1; // LCOV_EXCL_LINE
892  }
893  int32_t _find_matching_open(ievt::DataType open, ievt::DataType close, int32_t pos) const
894  {
895  _c4dbgpf("find_matching: start at {}", pos);
896  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, pos < m_evt_size);
897  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, (m_evt[pos] & close) == close);
898  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, (m_evt[pos] & open) == (close & ~ievt::END_));
899  pos = _prev(pos); // don't count the starting close token
900  uint32_t count = 0;
901  while(pos >= 0)
902  {
903  ievt::DataType e = m_evt[pos];
904  _c4dbgpf("find_matching: pos={} count={} e={}", pos, count, m_evt[pos]);
905  if((e & close) == close)
906  {
907  _c4dbgpf(".............: pos={} close! count={} e={}", pos, count, m_evt[pos]);
908  ++count;
909  }
910  else if((e & open) == open)
911  {
912  _c4dbgpf(".............: pos={} open! count={} e={}", pos, count, m_evt[pos]);
913  if(!count)
914  return pos;
915  else
916  --count;
917  }
918  pos = _prev(pos);
919  }
920  _c4dbgpf("find_matching: not found!", 0); // LCOV_EXCL_LINE
921  return -1; // LCOV_EXCL_LINE
922  }
923  int32_t _extend_left_to_include_tag_and_or_anchor(int32_t pos) const
924  {
925  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, pos < m_evt_size);
926  int32_t prev = _prev(pos);
927  while((prev > 0) && (m_evt[prev] & (ievt::TAG_|ievt::ANCH)))
928  {
929  _c4dbgpf("{}/{}: {} is anchor/tag. extend to {}", m_evt_pos, m_evt_size, prev, prev);
930  pos = prev;
931  prev = _prev(prev);
932  }
933  return pos;
934  }
935  C4_ALWAYS_INLINE int32_t _next(int32_t pos) const noexcept
936  {
937  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, pos < m_evt_size);
938  return pos + ((m_evt[pos] & ievt::WSTR) ? 3 : 1);
939  }
940  C4_ALWAYS_INLINE int32_t _prev(int32_t pos) const noexcept
941  {
942  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, pos < m_evt_size);
943  return pos - ((m_evt[pos] & ievt::PSTR) ? 3 : 1);
944  }
945  /** @endcond */
946 
947 public:
948 
949  /** @name YAML scalar events */
950  /** @{ */
951 
952 
953  C4_ALWAYS_INLINE void set_key_scalar_plain_empty()
954  {
955  _c4dbgpf("{}/{}: set_key_scalar_plain_empty", m_evt_pos, m_evt_size);
956  _send_key_scalar_(_get_latest_empty_scalar(), ievt::PLAI);
958  }
959  C4_ALWAYS_INLINE void set_val_scalar_plain_empty()
960  {
961  _c4dbgpf("{}/{}: set_val_scalar_plain_empty", m_evt_pos, m_evt_size);
962  _send_val_scalar_(_get_latest_empty_scalar(), ievt::PLAI);
964  }
965  C4_ALWAYS_INLINE csubstr _get_latest_empty_scalar() const
966  {
967  // ideally we should search back in the latest event that has
968  // a scalar, then select a zero-length scalar immediately
969  // after that scalar. But this also works for now:
970  return m_src.first(0);
971  }
972 
973 
974  C4_ALWAYS_INLINE void set_key_scalar_plain(csubstr scalar)
975  {
976  _c4dbgpf("{}/{}: set_key_scalar_plain: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str-m_src.str, scalar.len, scalar);
977  _send_key_scalar_(scalar, ievt::PLAI);
979  }
980  C4_ALWAYS_INLINE void set_val_scalar_plain(csubstr scalar)
981  {
982  _c4dbgpf("{}/{}: set_val_scalar_plain: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str-m_src.str, scalar.len, scalar);
983  _send_val_scalar_(scalar, ievt::PLAI);
985  }
986 
987 
988  C4_ALWAYS_INLINE void set_key_scalar_dquoted(csubstr scalar)
989  {
990  _c4dbgpf("{}/{}: set_key_scalar_dquo: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str?size_t(scalar.str-m_src.str):m_src.len, scalar.len, scalar.str?scalar:csubstr{});
991  _send_key_scalar_(scalar, ievt::DQUO);
993  }
994  C4_ALWAYS_INLINE void set_val_scalar_dquoted(csubstr scalar)
995  {
996  _c4dbgpf("{}/{}: set_val_scalar_dquo: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str?size_t(scalar.str-m_src.str):m_src.len, scalar.len, scalar.str?scalar:csubstr{});
997  _send_val_scalar_(scalar, ievt::DQUO);
999  }
1000 
1001 
1002  C4_ALWAYS_INLINE void set_key_scalar_squoted(csubstr scalar)
1003  {
1004  _c4dbgpf("{}/{}: set_key_scalar_squo: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str-m_src.str, scalar.len, scalar);
1005  _send_key_scalar_(scalar, ievt::SQUO);
1006  _enable_(c4::yml::KEY|c4::yml::KEY_SQUO);
1007  }
1008  C4_ALWAYS_INLINE void set_val_scalar_squoted(csubstr scalar)
1009  {
1010  _c4dbgpf("{}/{}: set_val_scalar_squo: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str-m_src.str, scalar.len, scalar);
1011  _send_val_scalar_(scalar, ievt::SQUO);
1012  _enable_(c4::yml::VAL|c4::yml::VAL_SQUO);
1013  }
1014 
1015 
1016  C4_ALWAYS_INLINE void set_key_scalar_literal(csubstr scalar)
1017  {
1018  _c4dbgpf("{}/{}: set_key_scalar_literal: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str?size_t(scalar.str-m_src.str):m_src.len, scalar.len, scalar.str?scalar:csubstr{});
1019  _send_key_scalar_(scalar, ievt::LITL);
1021  }
1022  C4_ALWAYS_INLINE void set_val_scalar_literal(csubstr scalar)
1023  {
1024  _c4dbgpf("{}/{}: set_val_scalar_literal: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str?size_t(scalar.str-m_src.str):m_src.len, scalar.len, scalar.str?scalar:csubstr{});
1025  _send_val_scalar_(scalar, ievt::LITL);
1027  }
1028 
1029 
1030  C4_ALWAYS_INLINE void set_key_scalar_folded(csubstr scalar)
1031  {
1032  _c4dbgpf("{}/{}: set_key_scalar_folded: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str?size_t(scalar.str-m_src.str):m_src.len, scalar.len, scalar.str?scalar:csubstr{});
1033  _send_key_scalar_(scalar, ievt::FOLD);
1035  }
1036  C4_ALWAYS_INLINE void set_val_scalar_folded(csubstr scalar)
1037  {
1038  _c4dbgpf("{}/{}: set_val_scalar_folded: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str?size_t(scalar.str-m_src.str):m_src.len, scalar.len, scalar.str?scalar:csubstr{});
1039  _send_val_scalar_(scalar, ievt::FOLD);
1041  }
1042 
1043 
1044  C4_ALWAYS_INLINE void mark_key_scalar_unfiltered()
1045  {
1046  _c4dbgpf("{}/{}: mark_key_scalar_unfiltered", m_evt_pos, m_evt_size);
1047  if(m_evt_pos < m_evt_size)
1048  m_evt[m_evt_pos] |= ievt::UNFILT;
1049  }
1050  C4_ALWAYS_INLINE void mark_val_scalar_unfiltered()
1051  {
1052  _c4dbgpf("{}/{}: mark_val_scalar_unfiltered", m_evt_pos, m_evt_size);
1053  if(m_evt_pos < m_evt_size)
1054  m_evt[m_evt_pos] |= ievt::UNFILT;
1055  }
1056 
1057  /** @} */
1058 
1059 public:
1060 
1061  /** @cond dev*/
1062  #define _add_scalar_(i, scalar) \
1063  _c4dbgpf("{}/{}: scalar!", i, m_evt_size); \
1064  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, scalar.is_sub(m_src) || scalar.is_sub(m_arena) || (scalar.str == nullptr)); \
1065  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, m_evt[i] & ievt::WSTR); \
1066  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, i + 3 < m_evt_size); \
1067  if(C4_LIKELY(scalar.is_sub(m_src))) \
1068  { \
1069  m_evt[i + 1] = (ievt::DataType)(scalar.str - m_src.str); \
1070  } \
1071  else \
1072  { \
1073  m_evt[i] |= ievt::AREN; \
1074  m_evt[i + 1] = (ievt::DataType)(scalar.str - m_arena.str); \
1075  _c4dbgpf("{}/{}: arena! ->{}", i, m_evt_size, m_evt[i+1]); \
1076  } \
1077  m_evt[i + 2] = (ievt::DataType)scalar.len; \
1078  m_evt[i + 3] = ievt::PSTR
1079  /** @endcond */
1080 
1081  /** @name YAML anchor/reference events */
1082  /** @{ */
1083 
1084  void set_key_anchor(csubstr anchor)
1085  {
1086  _c4dbgpf("{}/{}: set_key_anchor", m_evt_pos, m_evt_size);
1087  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, !_has_any_(KEYREF));
1088  _enable_(c4::yml::KEYANCH);
1089  if(m_evt_pos + 3 < m_evt_size)
1090  {
1091  m_evt[m_evt_pos] |= ievt::KEY_|ievt::ANCH;
1092  _add_scalar_(m_evt_pos, anchor);
1093  }
1094  m_evt_prev = m_evt_pos;
1095  m_evt_pos += 3;
1096  }
1097  void set_val_anchor(csubstr anchor)
1098  {
1099  _c4dbgpf("{}/{}: set_val_anchor", m_evt_pos, m_evt_size);
1100  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, !_has_any_(VALREF));
1101  _enable_(c4::yml::VALANCH);
1102  if(m_evt_pos + 3 < m_evt_size)
1103  {
1104  m_evt[m_evt_pos] |= ievt::VAL_|ievt::ANCH;
1105  _add_scalar_(m_evt_pos, anchor);
1106  }
1107  m_evt_prev = m_evt_pos;
1108  m_evt_pos += 3;
1109  }
1110 
1111  void set_key_ref(csubstr ref)
1112  {
1113  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, ref.begins_with('*'));
1114  if(C4_UNLIKELY(_has_any_(KEYANCH)))
1115  _RYML_ERR_PARSE_(m_stack.m_callbacks, m_curr->pos, "key cannot have both anchor and ref");
1116  _enable_(c4::yml::KEY|c4::yml::KEYREF);
1117  _send_str_(ref.sub(1), ievt::KEY_|ievt::ALIA); // skip the leading *
1118  }
1119  void set_val_ref(csubstr ref)
1120  {
1121  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, ref.begins_with('*'));
1122  if(C4_UNLIKELY(_has_any_(VALANCH)))
1123  _RYML_ERR_PARSE_(m_stack.m_callbacks, m_curr->pos, "val cannot have both anchor and ref");
1124  _enable_(c4::yml::VAL|c4::yml::VALREF);
1125  _send_str_(ref.sub(1), ievt::VAL_|ievt::ALIA); // skip the leading *
1126  }
1127 
1128  /** @} */
1129 
1130 public:
1131 
1132  /** @name YAML tag events */
1133  /** @{ */
1134 
1135  void set_key_tag(csubstr tag)
1136  {
1137  _c4dbgpf("{}/{}: set key tag ~~~{}~~~", m_evt_pos, m_evt_size, tag);
1138  _enable_(c4::yml::KEYTAG);
1139  _set_tag(tag, ievt::KEY_);
1140  }
1141  void set_val_tag(csubstr tag)
1142  {
1143  _c4dbgpf("{}/{}: set val tag [{}]~~~{}~~~", m_evt_pos, m_evt_size, tag.len, tag);
1144  _enable_(c4::yml::VALTAG);
1145  _set_tag(tag, ievt::VAL_);
1146  }
1147  void _set_tag(csubstr tag, ievt::DataType which)
1148  {
1149  csubstr ttag = _transform_directive(tag);
1150  _c4dbgpf("{}/{}: transformed_tag [{}]~~~{}~~~", m_evt_pos, m_evt_size, ttag.len, ttag);
1151  if(m_evt_pos + 3 < m_evt_size)
1152  {
1153  m_evt[m_evt_pos] |= which|ievt::TAG_;
1154  _add_scalar_(m_evt_pos, ttag);
1155  }
1156  m_evt_prev = m_evt_pos;
1157  m_evt_pos += 3;
1158  }
1159 
1160  /** @} */
1161 
1162 public:
1163 
1164  /** @name YAML directive events */
1165  /** @{ */
1166 
1167  void add_directive(csubstr directive)
1168  {
1169  _c4dbgpf("{}/{}: add directive ~~~{}~~~", m_evt_pos, m_evt_size, directive);
1170  _RYML_ASSERT_BASIC_(m_stack.m_callbacks, directive.begins_with('%'));
1171  if(directive.begins_with("%TAG"))
1172  {
1173  const id_type pos = _num_tag_directives();
1174  if(C4_UNLIKELY(pos >= RYML_MAX_TAG_DIRECTIVES))
1175  _RYML_ERR_PARSE_(m_stack.m_callbacks, m_curr->pos, "too many directives");
1176  TagDirective &td = m_tag_directives[pos];
1177  if(C4_UNLIKELY(!td.create_from_str(directive)))
1178  _RYML_ERR_PARSE_(m_stack.m_callbacks, m_curr->pos, "failed to add directive");
1179  td.next_node_id = (id_type)m_evt_pos;
1180  _send_str_(td.handle, ievt::TAGD);
1181  _send_str_(td.prefix, ievt::TAGV);
1182  }
1183  else if(directive.begins_with("%YAML"))
1184  {
1185  _c4dbgpf("%YAML directive! ignoring...: {}", directive);
1186  if(C4_UNLIKELY(m_has_yaml_directive))
1187  _RYML_ERR_PARSE_(m_stack.m_callbacks, m_curr->pos, "multiple yaml directives");
1188  m_has_yaml_directive = true;
1189  csubstr rest = directive.sub(5).triml(' ');
1190  _send_str_(rest, ievt::YAML);
1191  }
1192  else
1193  {
1194  _c4dbgpf("unknown directive! ignoring... {}", directive);
1195  }
1196  }
1197 
1198  /** @} */
1199 
1200 public:
1201 
1202  /** @name YAML arena events */
1203  /** @{ */
1204 
1205  substr arena_rem()
1206  {
1207  return C4_LIKELY(m_arena_pos <= m_arena.len) ? m_arena.sub(m_arena_pos) : m_arena.last(0);
1208  }
1209 
1210  /** this may fail, in which case a an empty string is returned */
1211  substr alloc_arena(size_t len)
1212  {
1213  substr s = arena_rem();
1214  if(C4_LIKELY(len <= s.len))
1215  s = s.first(len);
1216  else
1217  s.str = nullptr;
1218  m_arena_pos += len;
1219  return s;
1220  }
1221 
1222  /** this may fail, in which case an empty string is returned */
1223  C4_ALWAYS_INLINE substr alloc_arena(size_t len, substr *relocated)
1224  {
1225  (void)relocated;
1226  return alloc_arena(len);
1227  }
1228 
1229  /** @} */
1230 
1231 public:
1232 
1233  /** push a new parent, add a child to the new parent, and set the
1234  * child as the current node */
1235  void _push()
1236  {
1237  _stack_push();
1238  m_curr->evt_type = {};
1239  }
1240 
1241  /** end the current scope */
1242  void _pop()
1243  {
1244  _stack_pop();
1245  }
1246 
1247  template<c4::yml::type_bits bits> C4_ALWAYS_INLINE void _enable__() noexcept
1248  {
1249  m_curr->evt_type |= bits;
1250  }
1251  template<c4::yml::type_bits bits> C4_ALWAYS_INLINE void _disable__() noexcept
1252  {
1253  m_curr->evt_type &= ~bits;
1254  }
1255  template<c4::yml::type_bits bits> C4_ALWAYS_INLINE bool _has_any__() const noexcept
1256  {
1257  return (m_curr->evt_type & bits) != c4::yml::type_bits(0);
1258  }
1259 
1261  {
1262  if(m_parent)
1263  m_parent->has_children = true;
1264  }
1265 
1266  C4_ALWAYS_INLINE void _send_flag_only_(ievt::DataType flags)
1267  {
1268  _c4dbgpf("{}/{}: flag only", m_evt_pos, m_evt_size);
1269  if(m_evt_pos < m_evt_size)
1270  m_evt[m_evt_pos] |= flags;
1271  m_curr->evt_id = m_evt_pos;
1272  m_evt_prev = m_evt_pos;
1273  ++m_evt_pos;
1274  if(m_evt_pos < m_evt_size)
1275  m_evt[m_evt_pos] = {};
1276  }
1277 
1278  C4_ALWAYS_INLINE void _send_key_scalar_(csubstr scalar, ievt::DataType flags)
1279  {
1280  _c4dbgpf("{}/{}: key scalar", m_evt_pos, m_evt_size);
1281  if(m_evt_pos + 3 < m_evt_size)
1282  {
1283  m_evt[m_evt_pos] |= ievt::SCLR|ievt::KEY_|flags;
1284  _add_scalar_(m_evt_pos, scalar);
1285  }
1286  m_curr->evt_id = m_evt_pos;
1287  m_evt_prev = m_evt_pos;
1288  m_evt_pos += 3;
1289  }
1290 
1291  C4_ALWAYS_INLINE void _send_val_scalar_(csubstr scalar, ievt::DataType flags)
1292  {
1293  _c4dbgpf("{}/{}: val scalar", m_evt_pos, m_evt_size);
1294  if(m_evt_pos + 3 < m_evt_size)
1295  {
1296  m_evt[m_evt_pos] |= ievt::SCLR|ievt::VAL_|flags;
1297  _add_scalar_(m_evt_pos, scalar);
1298  }
1299  m_curr->evt_id = m_evt_pos;
1300  m_evt_prev = m_evt_pos;
1301  m_evt_pos += 3;
1302  }
1303 
1304  C4_ALWAYS_INLINE void _send_str_(csubstr scalar, ievt::DataType flags)
1305  {
1306  _c4dbgpf("{}/{}: send str", m_evt_pos, m_evt_size);
1307  if(m_evt_pos + 3 < m_evt_size)
1308  {
1309  m_evt[m_evt_pos] |= flags;
1310  _add_scalar_(m_evt_pos, scalar);
1311  }
1312  m_curr->evt_id = m_evt_pos;
1313  m_evt_prev = m_evt_pos;
1314  m_evt_pos += 3;
1315  }
1316 
1318  {
1319  for(TagDirective &td : m_tag_directives)
1320  td = {};
1321  }
1322  C4_NODISCARD id_type _num_tag_directives() const
1323  {
1324  // this assumes we have a very small number of tag directives
1325  id_type i = 0;
1326  for(; i < RYML_MAX_TAG_DIRECTIVES; ++i)
1327  if(m_tag_directives[i].handle.empty())
1328  break;
1329  return i;
1330  }
1331  csubstr _transform_directive(csubstr tag)
1332  {
1333  // lookup from the end. We want to find the first directive that
1334  // matches the tag and has a target node id leq than the given
1335  // node_id.
1336  for(id_type i = RYML_MAX_TAG_DIRECTIVES-1; i != NONE; --i)
1337  {
1338  TagDirective const& td = m_tag_directives[i];
1339  if(td.handle.empty())
1340  continue;
1341  if(tag.begins_with(td.handle) && (td.handle != td.prefix))
1342  {
1343  substr rem = arena_rem();
1344  size_t len = td.transform(tag, rem, m_stack.m_callbacks, /*with_brackets*/false);
1345  if(len == 0)
1346  return tag;
1347  alloc_arena(len);
1348  return rem.first(len <= rem.len ? len : 0);
1349  }
1350  }
1351  if(tag.begins_with('!'))
1352  {
1353  if(is_custom_tag(tag))
1354  {
1355  _RYML_ERR_PARSE_(m_stack.m_callbacks, m_curr->pos, "tag not found");
1356  }
1357  }
1358  return tag;
1359  }
1360 #undef _enable_
1361 #undef _disable_
1362 #undef _has_any_
1363 #undef _add_scalar_
1364 
1365 };
1366 
1367 /** @} */
1368 
1369 } // namespace extra
1370 } // namespace yml
1371 } // namespace c4
1372 
1373 
1374 // NOLINTEND(hicpp-signed-bitwise)
1375 C4_SUPPRESS_WARNING_GCC_CLANG_POP
1376 
1377 #endif /* _C4_YML_EXTRA_EVENT_HANDLER_INTS_HPP_ */
#define _has_any_(bits)
#define RYML_EXPORT
Definition: export.hpp:15
Callbacks const & get_callbacks()
get the global callbacks
Definition: common.cpp:94
int32_t estimate_events_ints_size(csubstr src)
Read YAML source and, without undergoing a full parse, estimate the size of the integer buffer requir...
uint32_t type_bits
the integral type necessary to cover all the bits for NodeType_e
Definition: node_type.hpp:29
@ VALANCH
the val has an &anchor
Definition: node_type.hpp:45
@ KEY_DQUO
mark key scalar as double quoted "
Definition: node_type.hpp:68
@ VALREF
a *reference: the val references an &anchor
Definition: node_type.hpp:43
@ VALNIL
the val is null (eg {a : } results in a null val)
Definition: node_type.hpp:49
@ MAP
a map: a parent of KEYVAL/KEYSEQ/KEYMAP nodes
Definition: node_type.hpp:38
@ KEY
is member of a map
Definition: node_type.hpp:36
@ VAL_FOLDED
mark val scalar as multiline, block folded >
Definition: node_type.hpp:65
@ KEYTAG
the key has a tag
Definition: node_type.hpp:46
@ FLOW_SL
mark container with single-line flow style (seqs as '[val1,val2], maps as '{key: val,...
Definition: node_type.hpp:59
@ VAL
a scalar: has a scalar (ie string) value, possibly empty. must be a leaf node, and cannot be MAP or S...
Definition: node_type.hpp:37
@ VALTAG
the val has a tag
Definition: node_type.hpp:47
@ SEQ
a seq: a parent of VAL/SEQ/MAP nodes
Definition: node_type.hpp:39
@ VAL_SQUO
mark val scalar as single quoted '
Definition: node_type.hpp:67
@ VAL_PLAIN
mark val scalar as plain scalar (unquoted, even when multiline)
Definition: node_type.hpp:71
@ KEYREF
a *reference: the key references an &anchor
Definition: node_type.hpp:42
@ BLOCK
mark container with block style (seqs as '- val ', maps as 'key: val')
Definition: node_type.hpp:61
@ KEYANCH
the key has an &anchor
Definition: node_type.hpp:44
@ VAL_DQUO
mark val scalar as double quoted "
Definition: node_type.hpp:69
@ KEY_SQUO
mark key scalar as single quoted '
Definition: node_type.hpp:66
@ VAL_LITERAL
mark val scalar as multiline, block literal |
Definition: node_type.hpp:63
@ KEY_LITERAL
mark key scalar as multiline, block literal |
Definition: node_type.hpp:62
@ KEY_PLAIN
mark key scalar as plain scalar (unquoted, even when multiline)
Definition: node_type.hpp:70
@ KEY_FOLDED
mark key scalar as multiline, block folded >
Definition: node_type.hpp:64
@ KEYNIL
the key is null (eg { : b} results in a null key)
Definition: node_type.hpp:48
@ DOC
a document
Definition: node_type.hpp:40
bool is_custom_tag(csubstr tag)
Definition: tag.cpp:9
#define RYML_MAX_TAG_DIRECTIVES
the maximum number of tag directives in a Tree
Definition: tag.hpp:19
void(*)(void *, csubstr prev_arena, substr next_arena) pfn_relocate_arena
EventFlags
enumeration of integer event bits.
@ TAGD
tag directive, name : \TAG <name> .......
@ SCLR
scalar (=VAL in test suite events)
@ LITL
scalar: block literal (|)
@ UNFILT
special flag to mark a scalar as unfiltered (when the parser is set not to filter).
@ EMAP
end map (-MAP in test suite events)
@ DQUO
scalar: double-quoted ("")
@ FOLD
scalar: block folded (>)
@ BMAP
begin map (+MAP in test suite events)
@ MASK
a mask of all bits in this enumeration
@ ESTR
end stream (-STR in test suite events)
@ BSTR
begin stream (+STR in test suite events)
@ BSEQ
begin seq (+SEQ in test suite events)
@ ESEQ
end seq (-SEQ in test suite events)
@ WSTR
WithSTRing: mask of all the events that encode a string following the event. For such events,...
@ FLOW
container: flow: [] for seqs or {} for maps
@ VAL_
as value special flag to enable look-back in the event array. it signifies that the previous event ha...
@ BDOC
begin doc (+DOC in test suite events)
@ BLCK
container: block
@ AREN
IMPORTANT. Marks events whose string was placed in the arena. This happens when the filtered string i...
@ TAGV
tag directive, value: \TAG ...... <value>
@ YAML
yaml directive: \YAML <version>
@ ALIA
*ref (reference)
@ EDOC
end doc (-DOC in test suite events)
@ LAST
the last flag defined above
@ EXPL
--- (with BDOC) or ... (with EDOC)
@ SQUO
scalar: single-quoted (')
int32_t DataType
data type for integer events.
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition: common.hpp:244
@ RTOP
reading at top level
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ NONE
an index to none
Definition: common.hpp:251
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition: common.cpp:14
A c-style callbacks class to customize behavior on errors or allocation.
Definition: common.hpp:511
Use this class a base of implementations of event handler to simplify the stack logic.
bool create_from_str(csubstr directive_)
leaves next_node_id unfilled
Definition: tag.cpp:209
csubstr handle
Eg.
Definition: tag.hpp:60
id_type next_node_id
The next node to which this tag directive applies.
Definition: tag.hpp:64
csubstr prefix
Eg.
Definition: tag.hpp:62
size_t transform(csubstr tag, substr output, Callbacks const &callbacks, bool with_brackets=true) const
Definition: tag.cpp:230
A parser event handler that creates a compact representation of the YAML tree in a buffer of integers...
void begin_doc_expl()
explicit doc start, with —
void end_doc_expl()
explicit doc end, with ...
void _send_key_scalar_(csubstr scalar, ievt::DataType flags)
substr alloc_arena(size_t len, substr *relocated)
this may fail, in which case an empty string is returned
bool fits_buffers() const
Predicate to test if the event buffer successfully accomodated all the parse events.
void begin_doc()
implicit doc start (without —)
void _send_flag_only_(ievt::DataType flags)
substr alloc_arena(size_t len)
this may fail, in which case a an empty string is returned
void actually_val_is_first_key_of_new_map_flow()
set the previous val as the first key of a new map, with flow style.
void _set_tag(csubstr tag, ievt::DataType which)
void start_parse(const char *filename, csubstr src, c4::yml::detail::pfn_relocate_arena relocate_arena, void *relocate_arena_data)
EventHandlerInts(c4::yml::Callbacks const &cb)
void actually_val_is_first_key_of_new_map_block()
like its flow counterpart, but this function can only be called after the end of a flow-val at root o...
int required_size_events() const
get the size needed for the event buffer from the previous parse
void set_val_scalar_dquoted(csubstr scalar)
void _pop()
end the current scope
void _send_str_(csubstr scalar, ievt::DataType flags)
void set_key_scalar_dquoted(csubstr scalar)
void _send_val_scalar_(csubstr scalar, ievt::DataType flags)
size_t required_size_arena() const
get the size needed for the arena from the previous parse
void end_doc()
implicit doc end (without ...)
void _push()
push a new parent, add a child to the new parent, and set the child as the current node
void reset(csubstr str, substr arena, ievt::DataType *dst, int32_t dst_size)