rapidyaml  0.10.0
parse and emit YAML, and do it fast
event_handler_ints.hpp
Go to the documentation of this file.
1 #ifndef _C4_YML_EXTRA_EVENT_HANDLER_INTS_HPP_
2 #define _C4_YML_EXTRA_EVENT_HANDLER_INTS_HPP_
3 
4 /** @file event_handler_ints.hpp An event handler that creates an
5  * integer buffer with a very compact representation of the YAML tree
6  * in a source buffer. This is not part of the main rapidyaml library.
7  *
8  * @see c4::yml::extra::ievt::EventFlags
9  * @see c4::yml::extra::EventHandlerInts
10  * */
11 
12 #ifndef RYML_SINGLE_HEADER
13 #ifndef _C4_YML_NODE_TYPE_HPP_
14 #include <c4/yml/node_type.hpp>
15 #endif
16 #ifndef _C4_YML_EVENT_HANDLER_STACK_HPP_
18 #endif
19 #ifndef _C4_YML_TAG_HPP_
20 #include <c4/yml/tag.hpp>
21 #endif
22 #ifndef _C4_YML_DETAIL_DBGPRINT_HPP_
23 #include <c4/yml/detail/dbgprint.hpp>
24 #endif
25 #endif
26 
27 // NOLINTBEGIN(hicpp-signed-bitwise)
28 
29 namespace c4 {
30 namespace yml {
31 namespace extra {
32 
33 /** @addtogroup doc_event_handlers
34  * @{ */
35 
36 namespace ievt {
37 
38 /** data type for integer events. This is set to a 32 bit signed
39  * integer to allow compatibility with a wide range of processing
40  * languages. */
41 using DataType = int32_t;
42 
43 /** enumeration of integer event bits. */
44 typedef enum : DataType {
45 
46  // Event types
47  BSTR = (1 << 0), ///< +STR begin stream
48  ESTR = (1 << 1), ///< -STR end stream
49  BDOC = (1 << 2), ///< +DOC begin doc
50  EDOC = (1 << 3), ///< -DOC end doc
51  BMAP = (1 << 4), ///< +MAP begin map
52  EMAP = (1 << 5), ///< -MAP end map
53  BSEQ = (1 << 6), ///< +SEQ begin seq
54  ESEQ = (1 << 7), ///< -SEQ end seq
55  SCLR = (1 << 8), ///< =VAL scalar
56  ANCH = (1 << 9), ///< &anchor
57  ALIA = (1 << 10), ///< *ref =ALI alias (reference)
58  TAG_ = (1 << 11), ///< !tag
59 
60  // Style flags
61  PLAI = (1 << 12), ///< plain scalar
62  SQUO = (1 << 13), ///< single-quoted scalar (')
63  DQUO = (1 << 14), ///< double-quoted scalar ("")
64  LITL = (1 << 15), ///< block literal scalar (|)
65  FOLD = (1 << 16), ///< block folded scalar (>)
66  FLOW = (1 << 17), ///< flow container: [] for seqs or {} for maps
67  BLCK = (1 << 18), ///< block container
68 
69  // Structure flags
70  KEY_ = (1 << 19), ///< as key
71  VAL_ = (1 << 20), ///< as value
72  EXPL = (1 << 21), ///< `---` (with BDOC) or
73  ///< `...` (with EDOC)
74 
75  // Directive flags
76  YAML = (1 << 22), ///< `%YAML <version>`
77  TAGD = (1 << 23), ///< tag directive name : `%TAG <name> .......`
78  TAGV = (1 << 24), ///< tag directive value: `%TAG ...... <value>`
79 
80  // Buffer flags
81  /// IMPORTANT. Marks events whose string was placed in the
82  /// arena. This happens when the filtered string is larger than the
83  /// original string in the YAML code (eg from tags that resolve to
84  /// a larger string, or from "\L" or "\P" in double quotes, which
85  /// expand from two to three bytes). Because of this size
86  /// expansion, the filtered string cannot be placed in the original
87  /// source and needs to be placed in the arena.
88  AREN = (1 << 25),
89  /// special flag to enable look-back in the event array. it
90  /// signifies that the previous event has a string, meaning that
91  /// the jump back to that event is 3 positions. without this flag it
92  /// would be impossible to jump to the previous event.
93  PSTR = (1 << 26),
94  /// special flag to mark a scalar as unfiltered (when the parser
95  /// is set not to filter).
96  UNFILT = (1 << 27),
97 
98  // Utility flags/masks
99  LAST = UNFILT, ///< the last flag defined above
100  MASK = (LAST << 1) - 1, ///< a mask of all bits in this enumeration
101  /// with string: mask of all the events that encode a string
102  /// following the event. in the event has a string. the next two
103  /// integers will provide respectively the string's offset and
104  /// length. See also @ref PSTR.
106 } EventFlags;
107 
108 } // namespace ievt
109 
110 /** @} */
111 
112 } // namespace extra
113 } // namespace yml
114 } // namespace c4
115 
116 
117 //-----------------------------------------------------------------------------
118 //-----------------------------------------------------------------------------
119 //-----------------------------------------------------------------------------
120 
121 namespace c4 {
122 namespace yml {
123 namespace extra {
124 
125 /** @addtogroup doc_event_handlers
126  * @{ */
127 
128 /** Read YAML source and, without undergoing a full parse, estimate
129  * the size of the integer buffer required for @ref
130  * EventHandlerInts. This estimation is meant to exceed the actual
131  * number of required events.
132  *
133  * @note This function must overpredict. It does so for every case in
134  * the hundreds/thousands of extensive tests of rapidyaml -- both for
135  * the YAML test suite and the internal cases. If you find a case
136  * where that does not hold, it is a bug. Please report it at
137  * https://github.com/biojppm/rapidyaml/issues! */
138 RYML_EXPORT int32_t estimate_events_ints_size(csubstr src);
139 
140 /** @} */
141 
142 } // namespace extra
143 } // namespace yml
144 } // namespace c4
145 
146 
147 //-----------------------------------------------------------------------------
148 //-----------------------------------------------------------------------------
149 //-----------------------------------------------------------------------------
150 
151 C4_SUPPRESS_WARNING_GCC_CLANG_PUSH
152 C4_SUPPRESS_WARNING_GCC_CLANG("-Wold-style-cast")
153 C4_SUPPRESS_WARNING_GCC("-Wuseless-cast")
154 
155 namespace c4 {
156 namespace yml {
157 namespace extra {
158 
159 
160 /** @addtogroup doc_event_handlers
161  * @{ */
162 
163 /** @cond dev */
164 struct EventHandlerIntsState : public c4::yml::ParserState
165 {
166  c4::yml::type_bits evt_type;
167  int32_t evt_id;
168 };
169 /** @endcond */
170 
171 
172 /** A parser event handler that creates a compact representation of
173  * the YAML tree in a buffer of integers (see @ref ievt::EventFlags)
174  * containing masks (to represent events) and offset+length (to
175  * represent strings in the source buffer).
176  *
177  * This is meant for use by other programming languages, and supports
178  * container keys (unlike the ryml tree). It parses faster than the ryml
179  * tree parser, because the resulting data structure is much simpler.
180  *
181  * The resulting integer buffer is a linear array of integers containing
182  * events (as a mask of @ref ievt::EventFlags), which in some cases (see
183  * @ref ievt::WSTR) are followed by an encoded string (encoded as an
184  * offset and length to the parsed source buffer).
185  *
186  * For example, parsing `[a, bb, ccc]` results in the following event
187  * buffer:
188  *
189  * ```c++
190  * using namespace c4::yml::extra::ievt;
191  * const DataType arr[] = { // result of parsing: [a, bb, ccc]
192  * BSTR, // begin stream
193  * BDOC, // begin doc
194  * VAL_|BSEQ|FLOW, // begin seq as val, flow
195  * VAL_|SCLR|PLAI, 1, 1, // val scalar, plain style: "a" starts at offset 1 and has length 1
196  * VAL_|SCLR|PLAI|PSTR, 4, 2, // val scalar, plain style: "bb" starts at offset 4 and has length 2
197  * VAL_|SCLR|PLAI|PSTR, 8, 3, // val scalar, plain style: "ccc" starts at offset 8 and has length 3
198  * ESEQ|PSTR, // end seq
199  * EDOC, // end doc
200  * ESTR, // end stream
201  * };
202  * ```
203  *
204  * Here is a sketch clarifying the meaning of this event sequence:
205  *
206 @code
207 source : [a, bb, ccc]
208  string offset "a" string offset "bb" string offset "ccc"
209  | string length "a" | string length "bb" | string length "ccc"
210  | | | | | |
211  event event event [ event "a".......|..| event "bb"...........|..| event "ccc"..........|..| event ] event event
212  | | | | | | | | | | | | | | |
213  +--------+-------+------------------+---------------+--+------+--------------------+--+------+--------------------+--+------+-------------+--------+-----|
214 value : BSTR BDOC VAL_|BSEQ|FLOW VAL_|SCLR|PLAI..1..1 VAL_|SCLR|PLAI|PSTR..4..2 VAL_|SCLR|PLAI|PSTR..8..3 ESEQ|PSTR EDOC ESTR (array)
215 event # : 0 1 2 3 . . 4 | . . 5 | . . 6 7 8 (event #)
216 index/pos: 0 1 2 3 4 5 6 | 7 8 9 | 10 11 12 13 14 (index/pos)
217  \ | | \ | | | \ | | |
218  has a string--+--+ has a string-+-----+--+ has a string-+-----+--+
219  | |
220  prev event has string prev event has string
221  (jump back 3 to get to it) (jump back 3 to get to it)
222 @endcode
223  *
224  *
225  * Note that the buffer contains both events and strings encoded as
226  * integer pairs. That is, events that have an associated string are
227  * immediately followed by two integers providing the offset and length
228  * of that string in the source buffer. (In the example above, this
229  * happens in the events for the strings `a`, `bb`, and `ccc` at
230  * positions 3, 6 and 9, respectively).
231  *
232  * The flag @ref ievt::PSTR and the mask @ref ievt::WSTR are provided to
233  * enable easier iteration over the array: you can use them to test for
234  * presence of a string when iterating over the array.
235  *
236  * The flag @ref ievt::PSTR announces that an event is *preceded* by a
237  * string. That is, the previous event has a string, so that when this
238  * flag is found while iterating right-to-left, a jump of -3 should be
239  * used to get at the bitmask of the previous event. (In the example
240  * above, this flag is present for the events for `bb` and `ccc`, but not
241  * `a` because it is not preceded by a string).
242  *
243  * Likewise, to signify that the current event is *followed* by a string,
244  * there is the mask @ref ievt::WSTR, which is a mask of all the flags of
245  * events that have a string: @ref ievt::SCLR, @ref ievt::ALIA, @ref
246  * ievt::ANCH and @ref ievt::TAG_. While iterating left-to-right in the
247  * array, presence of any of the bits in the mask @ref ievt::WSTR means
248  * that a jump of +3 should be employed to get at the bitmask of the next
249  * event.
250  *
251  * Typical code to iterate left-to-right over the array will look like
252  * this:
253  *
254  * ```c++
255  * // source buffer, modified in place during parsing (IMPORTANT!)
256  * substr src = ...;
257  * substr arena = ...; // arena used for scalars/tags that are extended during filtering
258  * // events resulting from parsing
259  * const int events[] = {...};
260  * int events_size = ...;
261  * for(int i = 0; i < events_size; ++i)
262  * {
263  * if(events[i] & ievt::WSTR) // this event has a string following it
264  * {
265  * size_t offset = (size_t)events[i+1];
266  * size_t length = (size_t)events[i+2];
267  * csubstr region = (events[i] & ievt::AREN) ? arena : src; // is the string in the arena?
268  * csubstr str = region.sub(offset, length); // get the string
269  * ...
270  * i += 2; // skip the two ints of the string
271  * // (the jump is three places; the loop adds the other place)
272  * }
273  * else // this is a single-int event
274  * {
275  * ...
276  * }
277  * }
278  * ```
279  *
280  * This handler must be initialized with the input source buffer, the
281  * output arena, and the output event buffer. This handler will not take
282  * ownership nor attempt to resize the output buffer. If the size
283  * required for the output buffer or arena are larger than their actual
284  * size, parsing goes all way to the end, determining the required buffer
285  * sizes without writing anything past the end of the respective
286  * buffer. After parsing is finished, the user must ensure that the
287  * buffer size was enough to accomodate all the data that needs to be
288  * written into it, or react accordingly (eg, throw an error, or resize
289  * the buffer then retry the parse).
290  *
291  * A couple of functions will be helpful to do this. After parsing, @ref
292  * EventHandlerInts::fits_buffers() must be used to verify that the
293  * output buffers were enough to accomodate the results. Then, @ref
294  * EventHandlerInts::required_size_events() and @ref
295  * EventHandlerInts::required_size_arena() can be used to retrieve to
296  * necessary information. To get an estimation of the number of events
297  * before parsing, see @ref estimate_events_ints_size().
298  *
299  * Typical code to parse YAML with this handler will look like this:
300  *
301  * ```c++
302  * csubstr filename = ...;
303  * substr src = ...;
304  * // estimate the size required for the events buffer,
305  * // overpredicting it to be safe.
306  * int estimated_size = extra::estimate_events_ints_size(src);
307  * extra::EventHandlerInts handler;
308  * ParseEngine<extra::EventHandlerInts> parser(&handler);
309  * // example with a vector
310  * std::vector<int> evts;
311  * // ensure we have a fighting chance to acommodate the events
312  * evts.resize((size_t)estimated_size);
313  * // arena to place scalars/tags that may have been extended after filtering
314  * std::vector<char> arena;
315  * arena.resize(src.len); // this is generally enough
316  * // initialize the handler
317  * handler.reset(src, arena, evts.data(), (int)evts.size());
318  * // parse the YAML
319  * parser.parse_in_place_ev(filename, src);
320  * if(handler.fits_buffers()) // were the buffer sizes enough?
321  * {
322  * evts.resize((size_t)handler.required_size_events()); // trim the vector
323  * ...
324  * }
325  * else
326  * {
327  * // event size estimation underpredicted, or arena is too small!
328  * // for the first case, open an issue at
329  * // https://github.com/biojppm/rapidyaml/issues
330  * error("buffer could not accomodate all the events");
331  * // NOTE: see below for notes on doing a parse retry.
332  * }
333  * ```
334  *
335  * The result of @ref estimate_events_ints_size() (click to see more
336  * info) must be an overprediction: it overpredicts for every single case
337  * among the many hundreds of cases covered in the unit tests. This is
338  * deliberate, and aims at ensuring that a retry parse is not needed. But
339  * conceivably, it may underpredict in some instances not found in the out
340  * tests. What to do then?
341  *
342  * First, [open an issue](https://github.com/biojppm/rapidyaml/issues) to
343  * allow the estimation to be improved! Second, there are two ways to
344  * handle this situation in code:
345  *
346  * 1) throw an error (as sketched above)
347  *
348  * 2) grow the buffer to the required size (see @ref
349  * EventHandlerInts::required_size_events()), and then parse
350  * again
351  *
352  * If your code must be able to handle any case including where the
353  * prediction undershoots before the estimate function is fixed (after
354  * you open the issue), that is, if you are considering a parse retry,
355  * there is something important that needs attention. The YAML source
356  * buffer is mutated in-place during the parse, and cannot be used to
357  * parse again. So if you want to retry, you need to keep a pristine copy
358  * of the source, and use it for the retry:
359  *
360  * ```c++
361  * const std::string src = ...; // the YAML code to be parsed
362  * std::string parsed_src = src; // this is where we will parse (filter during parsring)
363  * std::vector<int> evts((size_t)estimated_size); // ensure we have a fighting change to acommodate the events
364  * std::vector<char> arena(src.size()); // ensure we have a fighting change to acommodate the events
365  * ParseEngine<extra::EventHandlerInts> parser(&handler);
366  * handler.reset(to_substr(parsed_src), to_substr(arena), evts.data(), (int)evts.size());
367  * parser.parse_in_place_ev(filename, to_substr(parsed_src));
368  * if(handler.fits_buffers()) // were the buffer sizes enough?
369  * {
370  * evts.resize((size_t)handler.required_size()); // trim the vector
371  * ...
372  * }
373  * else
374  * {
375  * evts.resize((size_t)handler.required_size_events()); // buffer size was not enough.
376  * arena.resize(handler.required_size_arena()); // buffer size was not enough.
377  * // copy again
378  * parsed_src = src;
379  * // retry parse
380  * handler.reset(to_substr(parsed_src), to_substr(arena), evts.data(), (int)evts.size());
381  * parser.parse_in_place_ev(filename, to_substr(parsed_src));
382  * assert((size_t)handler.fits_buffers()); // must always be true
383  * }
384  * ```
385  *
386  * When bringing this to other programming languages, the semantics
387  * will be very similar to this.
388  */
389 struct EventHandlerInts : public c4::yml::EventHandlerStack<EventHandlerInts, EventHandlerIntsState>
390 {
391 
392  /** @name types
393  * @{ */
394 
396  using state = EventHandlerIntsState; // our internal state must inherit from parser state
397 
398  /** @} */
399 
400 public:
401 
402  /** @cond dev */
403  csubstr m_str;
404  ievt::DataType * m_evt;
405  int32_t m_evt_pos;
406  int32_t m_evt_prev;
407  int32_t m_evt_size;
408  substr m_arena;
409  size_t m_arena_pos;
410  TagDirective m_tag_directives[RYML_MAX_TAG_DIRECTIVES];
411  bool m_has_yaml_directive;
412  bool m_has_docs;
413 
414  // undefined at the end
415  #define _enable_(bits) _enable__<bits>()
416  #define _disable_(bits) _disable__<bits>()
417  #define _has_any_(bits) _has_any__<bits>()
418  /** @endcond */
419 
420 public:
421 
422  /** @name construction and resetting
423  * @{ */
424 
426  : EventHandlerStack(cb)
427  {
428  reset(csubstr{}, substr{}, nullptr, 0);
429  }
432  {
433  }
434 
435  void reset(csubstr str, substr arena, ievt::DataType *dst, int32_t dst_size)
436  {
437  _stack_reset_root();
438  m_curr->flags |= c4::yml::RUNK|c4::yml::RTOP;
439  m_curr->evt_type = {};
440  m_curr->evt_id = 0;
441  m_arena = arena;
442  m_arena_pos = 0;
443  m_str = str;
444  m_evt = dst;
445  m_evt_size = dst_size;
446  m_evt_pos = 0;
447  m_evt_prev = 0;
448  m_has_docs = false;
449  m_has_yaml_directive = false;
450  for(TagDirective &td : m_tag_directives)
451  td = {};
452  }
453 
454  /** get the size needed for the event buffer from the previous parse
455  * @warning this is valid only until the next parse */
457  {
458  return m_evt_pos;
459  }
460 
461  /** get the size needed for the arena from the previous parse
462  * @warning this is valid only until the next parse */
463  size_t required_size_arena() const
464  {
465  return m_arena_pos;
466  }
467 
468  /** Predicate to test if the event buffer successfully accomodated
469  * all the parse events.
470  * @warning this is valid only until the next parse */
471  bool fits_buffers() const
472  {
473  return m_evt_pos <= m_evt_size && m_arena_pos <= m_arena.len;
474  }
475 
476  void reserve_arena(int /*arena_size*/)
477  {
478  // does not apply here
479  }
480 
481  /** @} */
482 
483 public:
484 
485  /** @name parse events
486  * @{ */
487 
488  void start_parse(const char* filename, c4::yml::detail::pfn_relocate_arena relocate_arena, void *relocate_arena_data)
489  {
490  this->_stack_start_parse(filename, relocate_arena, relocate_arena_data);
491  }
492 
494  {
495  if((_num_tag_directives() || m_has_yaml_directive) && !m_has_docs)
496  _RYML_CB_ERR_(m_stack.m_callbacks, "directives cannot be used without a document", {});
497  this->_stack_finish_parse();
498  }
499 
501  {
502  while(m_stack.size() > 1)
503  _pop();
504  }
505 
506  /** @} */
507 
508 public:
509 
510  /** @name YAML stream events */
511  /** @{ */
512 
514  {
515  _send_flag_only_(ievt::BSTR);
516  }
517 
518  void end_stream()
519  {
520  _send_flag_only_(ievt::ESTR);
521  }
522 
523  /** @} */
524 
525 public:
526 
527  /** @name YAML document events */
528  /** @{ */
529 
530  /** implicit doc start (without ---) */
531  void begin_doc()
532  {
533  _c4dbgpf("{}/{}: begin_doc", m_evt_pos, m_evt_size);
534  _send_flag_only_(ievt::BDOC);
535  if(_stack_should_push_on_begin_doc())
536  {
537  _c4dbgp("push!");
538  _push();
539  _enable_(DOC);
540  }
541  m_has_docs = true;
542  }
543  /** implicit doc end (without ...) */
544  void end_doc()
545  {
546  _c4dbgpf("{}/{}: end_doc", m_evt_pos, m_evt_size);
547  _send_flag_only_(ievt::EDOC);
548  if(_stack_should_pop_on_end_doc())
549  {
550  _c4dbgp("pop!");
551  _pop();
552  }
553  }
554 
555  /** explicit doc start, with --- */
557  {
558  _c4dbgpf("{}/{}: begin_doc_expl", m_evt_pos, m_evt_size);
559  _send_flag_only_(ievt::BDOC|ievt::EXPL);
560  _c4dbgp("push!");
561  _push();
562  _enable_(DOC);
563  m_has_docs = true;
564  }
565  /** explicit doc end, with ... */
567  {
568  _c4dbgpf("{}/{}: end_doc_expl", m_evt_pos, m_evt_size);
569  _send_flag_only_(ievt::EDOC|ievt::EXPL);
570  if(_stack_should_pop_on_end_doc())
571  {
572  _c4dbgp("pop!");
573  _pop();
574  }
575  m_has_yaml_directive = false;
576  }
577 
578  /** @} */
579 
580 public:
581 
582  /** @name YAML map functions */
583  /** @{ */
584 
586  {
587  _c4dbgpf("{}/{}: bmap key flow", m_evt_pos, m_evt_size);
588  _send_flag_only_(ievt::KEY_|ievt::BMAP|ievt::FLOW);
589  _mark_parent_with_children_();
591  _push();
592  }
594  {
595  _c4dbgpf("{}/{}: bmap key block", m_evt_pos, m_evt_size);
596  _send_flag_only_(ievt::KEY_|ievt::BMAP|ievt::BLCK);
597  _mark_parent_with_children_();
599  _push();
600  }
601 
603  {
604  _c4dbgpf("{}/{}: bmap flow", m_evt_pos, m_evt_size);
605  _send_flag_only_(ievt::VAL_|ievt::BMAP|ievt::FLOW);
606  _mark_parent_with_children_();
607  _enable_(c4::yml::MAP|c4::yml::FLOW_SL);
608  _push();
609  }
611  {
612  _c4dbgpf("{}/{}: bmap block", m_evt_pos, m_evt_size);
613  _send_flag_only_(ievt::VAL_|ievt::BMAP|ievt::BLCK);
614  _mark_parent_with_children_();
615  _enable_(c4::yml::MAP|c4::yml::BLOCK);
616  _push();
617  }
618 
619  void end_map()
620  {
621  _pop();
622  _send_flag_only_(ievt::EMAP);
623  }
624 
625  /** @} */
626 
627 public:
628 
629  /** @name YAML seq events */
630  /** @{ */
631 
633  {
634  _c4dbgpf("{}/{}: bseq key flow", m_evt_pos, m_evt_size);
635  _send_flag_only_(ievt::KEY_|ievt::BSEQ|ievt::FLOW);
636  _mark_parent_with_children_();
638  _push();
639  }
641  {
642  _c4dbgpf("{}/{}: bseq key block", m_evt_pos, m_evt_size);
643  _send_flag_only_(ievt::KEY_|ievt::BSEQ|ievt::BLCK);
644  _mark_parent_with_children_();
646  _push();
647  }
648 
650  {
651  _c4dbgpf("{}/{}: bseq flow", m_evt_pos, m_evt_size);
652  _send_flag_only_(ievt::VAL_|ievt::BSEQ|ievt::FLOW);
653  _mark_parent_with_children_();
654  _enable_(c4::yml::SEQ|c4::yml::FLOW_SL);
655  _push();
656  }
658  {
659  _c4dbgpf("{}/{}: bseq block", m_evt_pos, m_evt_size);
660  _send_flag_only_(ievt::VAL_|ievt::BSEQ|ievt::BLCK);
661  _mark_parent_with_children_();
662  _enable_(c4::yml::SEQ|c4::yml::BLOCK);
663  _push();
664  }
665 
666  void end_seq()
667  {
668  _pop();
669  _send_flag_only_(ievt::ESEQ);
670  }
671 
672  /** @} */
673 
674 public:
675 
676  /** @name YAML structure events */
677  /** @{ */
678 
679  void add_sibling()
680  {
681  _RYML_CB_ASSERT(m_stack.m_callbacks, m_parent);
682  m_curr->evt_type = {};
683  }
684 
685  /** set the previous val as the first key of a new map, with flow style.
686  *
687  * See the documentation for @ref doc_event_handlers, which has
688  * important notes about this event.
689  */
691  {
692  _c4dbgpf("{}/{}: prev={} actually_val_is_first_key_of_new_map_flow", m_evt_pos, m_evt_size, m_evt_prev);
693  _RYML_CB_ASSERT(m_stack.m_callbacks, m_evt_pos > 2);
694  _RYML_CB_ASSERT(m_stack.m_callbacks, m_evt_prev > 0);
695  // BEFORE
696  // ... flag start len (free)
697  // | |
698  // prev curr
699  // AFTER
700  // ... BMAP flag start len (free)
701  // | |
702  // prev curr
703  if(m_evt_prev + 1 < m_evt_size)
704  {
705  if(m_evt[m_evt_prev] & ievt::WSTR)
706  {
707  _c4dbgpf("{}/{}: WSTR", m_evt_pos, m_evt_size);
708  _RYML_CB_ASSERT(m_stack.m_callbacks, m_evt_prev > 0);
709  int32_t pos = _extend_left_to_include_tag_and_or_anchor(m_evt_prev);
710  if(m_evt_pos + 1 < m_evt_size)
711  {
712  for(int32_t i = pos; i <= m_evt_prev; i = _next(i))
713  {
714  m_evt[i] |= ievt::KEY_;
715  m_evt[i] &= ~ievt::VAL_;
716  }
717  int32_t num_move = m_evt_pos + 1 - pos;
718  _RYML_CB_ASSERT(m_stack.m_callbacks, num_move > 0);
719  memmove(m_evt + pos + 1, m_evt + pos, (size_t)num_move * sizeof(ievt::DataType));
720  }
721  m_evt[pos] = ievt::BMAP|ievt::FLOW|ievt::VAL_;
722  // move PSTR to prev
723  if(m_evt[pos + 1] & ievt::PSTR)
724  {
725  m_evt[pos ] |= ievt::PSTR;
726  m_evt[pos + 1] &= ~ievt::PSTR;
727  }
728  }
729  else
730  {
731  _c4dbgpf("{}/{}: container key", m_evt_pos, m_evt_size);
732  _RYML_CB_ASSERT(m_stack.m_callbacks, (m_evt[m_evt_prev] & (ievt::EMAP|ievt::ESEQ)));
733  int32_t pos;
734  _c4dbgpf("{}/{}: find matching open for {}", m_evt_pos, m_evt_size, m_evt_prev);
735  if(m_evt[m_evt_prev] & ievt::EMAP)
736  {
737  pos = _find_matching_open(ievt::BMAP, ievt::EMAP, m_evt_prev);
738  }
739  else
740  {
741  _RYML_CB_ASSERT(m_stack.m_callbacks, (m_evt[m_evt_prev] & ievt::ESEQ));
742  pos = _find_matching_open(ievt::BSEQ, ievt::ESEQ, m_evt_prev);
743  }
744  _c4dbgpf("{}/{}: matching open for {}={}", m_evt_pos, m_evt_size, m_evt_prev, pos);
745  _RYML_CB_CHECK(m_stack.m_callbacks, pos >= 0); // internal error
746  _RYML_CB_CHECK(m_stack.m_callbacks, pos < m_evt_prev); // internal error
747  _RYML_CB_ASSERT(m_stack.m_callbacks, (m_evt[pos] & ievt::ESEQ) == (m_evt[m_evt_prev] & ievt::BSEQ));
748  _RYML_CB_ASSERT(m_stack.m_callbacks, (m_evt[pos] & ievt::EMAP) == (m_evt[m_evt_prev] & ievt::BMAP));
749  // shift the array one position to the right, starting at pos
750  int32_t posp1 = pos + 1;
751  if(m_evt_pos + 1 < m_evt_size)
752  {
753  int32_t num_move = m_evt_pos + 1 - pos;
754  _RYML_CB_ASSERT(m_stack.m_callbacks, num_move > 0);
755  memmove(m_evt + posp1, m_evt + pos, (size_t)num_move * sizeof(ievt::DataType));
756  }
757  _RYML_CB_ASSERT(m_stack.m_callbacks, posp1 < m_evt_pos);
758  // start the map
759  m_evt[pos] = ievt::BMAP|ievt::FLOW|ievt::VAL_;
760  // set next as key, not val
761  m_evt[posp1] |= ievt::KEY_;
762  m_evt[posp1] &= ~ievt::VAL_;
763  // move PSTR to pos
764  if(m_evt[posp1] & ievt::PSTR)
765  {
766  m_evt[pos] |= ievt::PSTR;
767  m_evt[posp1] &= ~ievt::PSTR;
768  }
769  }
770  }
771  m_curr->evt_id = m_evt_pos - 2;
772  ++m_evt_prev;
773  ++m_evt_pos;
774  _enable_(c4::yml::MAP|c4::yml::FLOW_SL);
775  _push();
776  }
777 
778  /** like its flow counterpart, but this function can only be
779  * called after the end of a flow-val at root or doc level.
780  *
781  * See the documentation for @ref doc_event_handlers, which has
782  * important notes about this event.
783  */
785  {
786  if(m_evt_prev < m_evt_size)
787  {
788  // interpolate BMAP|VAL|BLCK after the last BDOC
789  int32_t pos = _find_last_bdoc(m_evt_pos);
790  if(pos >= 0)
791  {
792  _RYML_CB_ASSERT(m_stack.m_callbacks, pos < m_evt_size);
793  _RYML_CB_ASSERT(m_stack.m_callbacks, pos < m_evt_pos);
794  _RYML_CB_ASSERT(m_stack.m_callbacks, (m_evt[pos] & ievt::BDOC));
795  if(m_evt_pos < m_evt_size)
796  {
797  ++pos; // add 1 to write after BDOC
798  int32_t num_move = m_evt_pos - pos;
799  int32_t posp1 = pos + 1;
800  _RYML_CB_ASSERT(m_stack.m_callbacks, (m_evt[pos] & (ievt::BSEQ|ievt::BMAP)));
801  _RYML_CB_ASSERT(m_stack.m_callbacks, num_move > 0);
802  _RYML_CB_ASSERT(m_stack.m_callbacks, 0 == (m_evt[posp1] & ievt::PSTR));
803  memmove(m_evt + posp1, m_evt + pos, (size_t)num_move * sizeof(ievt::DataType));
804  m_evt[pos] = ievt::VAL_|ievt::BMAP|ievt::BLCK;
805  m_evt[posp1] &= ~ievt::VAL_;
806  m_evt[posp1] |= ievt::KEY_;
807  }
808  }
809  }
810  ++m_curr->evt_id;
811  ++m_evt_prev;
812  ++m_evt_pos;
813  _push();
814  }
815 
816  /** @} */
817 
818 public:
819 
820  /** @cond dev */
821  int32_t _find_last_bdoc(int32_t pos) const
822  {
823  _RYML_CB_ASSERT(m_stack.m_callbacks, m_evt_prev < m_evt_size); // it's safe to read from the array
824  while(pos >= 0)
825  {
826  ievt::DataType e = m_evt[pos];
827  if(e & ievt::BDOC)
828  return pos;
829  pos -= (e & ievt::PSTR) ? 3 : 1;
830  }
831  return -1; // LCOV_EXCL_LINE
832  }
833  int32_t _find_matching_open(ievt::DataType open, ievt::DataType close, int32_t pos) const
834  {
835  _c4dbgpf("find_matching: start at {}", pos);
836  _RYML_CB_ASSERT(m_stack.m_callbacks, (m_evt[pos] & close));
837  _RYML_CB_ASSERT(m_stack.m_callbacks, !(m_evt[pos] & open));
838  pos = _prev(pos); // don't count the starting close token
839  uint32_t count = 0;
840  while(pos >= 0)
841  {
842  ievt::DataType e = m_evt[pos];
843  _c4dbgpf("find_matching: pos={} count={} e={}", pos, count, m_evt[pos]);
844  if(e & close)
845  {
846  _c4dbgpf(".............: pos={} close! count={} e={}", pos, count, m_evt[pos]);
847  ++count;
848  }
849  else if(e & open)
850  {
851  _c4dbgpf(".............: pos={} open! count={} e={}", pos, count, m_evt[pos]);
852  if(!count)
853  return pos;
854  else
855  --count;
856  }
857  pos = _prev(pos);
858  }
859  _c4dbgpf("find_matching: not found!", 0); // LCOV_EXCL_LINE
860  return -1; // LCOV_EXCL_LINE
861  }
862  int32_t _extend_left_to_include_tag_and_or_anchor(int32_t pos) const
863  {
864  int32_t prev = _prev(pos);
865  while((prev > 0) && (m_evt[prev] & (ievt::TAG_|ievt::ANCH)))
866  {
867  _c4dbgpf("{}/{}: {} is anchor/tag. extend to {}", m_evt_pos, m_evt_size, prev, prev);
868  pos = prev;
869  prev = _prev(prev);
870  }
871  return pos;
872  }
873  C4_ALWAYS_INLINE int32_t _next(int32_t pos) const noexcept
874  {
875  _RYML_CB_ASSERT(m_stack.m_callbacks, pos < m_evt_size);
876  return pos + ((m_evt[pos] & ievt::WSTR) ? 3 : 1);
877  }
878  C4_ALWAYS_INLINE int32_t _prev(int32_t pos) const noexcept
879  {
880  _RYML_CB_ASSERT(m_stack.m_callbacks, pos < m_evt_size);
881  return pos - ((m_evt[pos] & ievt::PSTR) ? 3 : 1);
882  }
883  /** @endcond */
884 
885 public:
886 
887  /** @name YAML scalar events */
888  /** @{ */
889 
890 
891  C4_ALWAYS_INLINE void set_key_scalar_plain_empty()
892  {
893  _c4dbgpf("{}/{}: set_key_scalar_plain_empty", m_evt_pos, m_evt_size);
894  _send_key_scalar_(_get_latest_empty_scalar(), ievt::PLAI);
896  }
897  C4_ALWAYS_INLINE void set_val_scalar_plain_empty()
898  {
899  _c4dbgpf("{}/{}: set_val_scalar_plain_empty", m_evt_pos, m_evt_size);
900  _send_val_scalar_(_get_latest_empty_scalar(), ievt::PLAI);
902  }
903  C4_ALWAYS_INLINE csubstr _get_latest_empty_scalar() const
904  {
905  // ideally we should search back in the latest event that has
906  // a scalar, then select a zero-length scalar immediately
907  // after that scalar. But this also works for now:
908  return m_str.first(0);
909  }
910 
911 
912  C4_ALWAYS_INLINE void set_key_scalar_plain(csubstr scalar)
913  {
914  _c4dbgpf("{}/{}: set_key_scalar_plain: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str-m_str.str, scalar.len, scalar);
915  _send_key_scalar_(scalar, ievt::PLAI);
917  }
918  C4_ALWAYS_INLINE void set_val_scalar_plain(csubstr scalar)
919  {
920  _c4dbgpf("{}/{}: set_val_scalar_plain: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str-m_str.str, scalar.len, scalar);
921  _send_val_scalar_(scalar, ievt::PLAI);
923  }
924 
925 
926  C4_ALWAYS_INLINE void set_key_scalar_dquoted(csubstr scalar)
927  {
928  _c4dbgpf("{}/{}: set_key_scalar_dquo: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str-m_str.str, scalar.len, scalar);
929  _send_key_scalar_(scalar, ievt::DQUO);
931  }
932  C4_ALWAYS_INLINE void set_val_scalar_dquoted(csubstr scalar)
933  {
934  _c4dbgpf("{}/{}: set_val_scalar_dquo: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str-m_str.str, scalar.len, scalar);
935  _send_val_scalar_(scalar, ievt::DQUO);
937  }
938 
939 
940  C4_ALWAYS_INLINE void set_key_scalar_squoted(csubstr scalar)
941  {
942  _c4dbgpf("{}/{}: set_key_scalar_squo: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str-m_str.str, scalar.len, scalar);
943  _send_key_scalar_(scalar, ievt::SQUO);
945  }
946  C4_ALWAYS_INLINE void set_val_scalar_squoted(csubstr scalar)
947  {
948  _c4dbgpf("{}/{}: set_val_scalar_squo: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str-m_str.str, scalar.len, scalar);
949  _send_val_scalar_(scalar, ievt::SQUO);
951  }
952 
953 
954  C4_ALWAYS_INLINE void set_key_scalar_literal(csubstr scalar)
955  {
956  _c4dbgpf("{}/{}: set_key_scalar_literal: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str-m_str.str, scalar.len, scalar);
957  _send_key_scalar_(scalar, ievt::LITL);
959  }
960  C4_ALWAYS_INLINE void set_val_scalar_literal(csubstr scalar)
961  {
962  _c4dbgpf("{}/{}: set_val_scalar_literal: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str-m_str.str, scalar.len, scalar);
963  _send_val_scalar_(scalar, ievt::LITL);
965  }
966 
967 
968  C4_ALWAYS_INLINE void set_key_scalar_folded(csubstr scalar)
969  {
970  _c4dbgpf("{}/{}: set_key_scalar_folded: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str-m_str.str, scalar.len, scalar);
971  _send_key_scalar_(scalar, ievt::FOLD);
973  }
974  C4_ALWAYS_INLINE void set_val_scalar_folded(csubstr scalar)
975  {
976  _c4dbgpf("{}/{}: set_val_scalar_folded: @{} [{}]~~~{}~~~", m_evt_pos, m_evt_size, scalar.str-m_str.str, scalar.len, scalar);
977  _send_val_scalar_(scalar, ievt::FOLD);
979  }
980 
981 
982  C4_ALWAYS_INLINE void mark_key_scalar_unfiltered()
983  {
984  _c4dbgpf("{}/{}: mark_key_scalar_unfiltered", m_evt_pos, m_evt_size);
985  if(m_evt_pos < m_evt_size)
986  m_evt[m_evt_pos] |= ievt::UNFILT;
987  }
988  C4_ALWAYS_INLINE void mark_val_scalar_unfiltered()
989  {
990  _c4dbgpf("{}/{}: mark_val_scalar_unfiltered", m_evt_pos, m_evt_size);
991  if(m_evt_pos < m_evt_size)
992  m_evt[m_evt_pos] |= ievt::UNFILT;
993  }
994 
995  /** @} */
996 
997 public:
998 
999  /** @cond dev*/
1000 #define _add_scalar_(i, scalar) \
1001  _c4dbgpf("{}/{}: scalar!", i, m_evt_size); \
1002  _RYML_CB_ASSERT(m_stack.m_callbacks, scalar.is_sub(m_str) || scalar.is_sub(m_arena) || (scalar.str == nullptr)); \
1003  _RYML_CB_ASSERT(m_stack.m_callbacks, m_evt[i] & ievt::WSTR); \
1004  _RYML_CB_ASSERT(m_stack.m_callbacks, i + 3 < m_evt_size); \
1005  if(C4_LIKELY(scalar.is_sub(m_str))) \
1006  { \
1007  m_evt[i + 1] = (ievt::DataType)(scalar.str - m_str.str); \
1008  } \
1009  else \
1010  { \
1011  m_evt[i] |= ievt::AREN; \
1012  m_evt[i + 1] = (ievt::DataType)(scalar.str - m_arena.str); \
1013  _c4dbgpf("{}/{}: arena! ->{}", i, m_evt_size, m_evt[i+1]); \
1014  } \
1015  m_evt[i + 2] = (ievt::DataType)scalar.len; \
1016  m_evt[i + 3] = ievt::PSTR
1017  /** @endcond */
1018 
1019  /** @name YAML anchor/reference events */
1020  /** @{ */
1021 
1022  void set_key_anchor(csubstr anchor)
1023  {
1024  _c4dbgpf("{}/{}: set_key_anchor", m_evt_pos, m_evt_size);
1025  _RYML_CB_ASSERT(m_stack.m_callbacks, !_has_any_(KEYREF));
1026  _enable_(c4::yml::KEYANCH);
1027  if(m_evt_pos + 3 < m_evt_size)
1028  {
1029  m_evt[m_evt_pos] |= ievt::KEY_|ievt::ANCH;
1030  _add_scalar_(m_evt_pos, anchor);
1031  }
1032  m_evt_prev = m_evt_pos;
1033  m_evt_pos += 3;
1034  }
1035  void set_val_anchor(csubstr anchor)
1036  {
1037  _c4dbgpf("{}/{}: set_val_anchor", m_evt_pos, m_evt_size);
1038  _RYML_CB_ASSERT(m_stack.m_callbacks, !_has_any_(VALREF));
1039  _enable_(c4::yml::VALANCH);
1040  if(m_evt_pos + 3 < m_evt_size)
1041  {
1042  m_evt[m_evt_pos] |= ievt::VAL_|ievt::ANCH;
1043  _add_scalar_(m_evt_pos, anchor);
1044  }
1045  m_evt_prev = m_evt_pos;
1046  m_evt_pos += 3;
1047  }
1048 
1049  void set_key_ref(csubstr ref)
1050  {
1051  _RYML_CB_ASSERT(m_stack.m_callbacks, ref.begins_with('*'));
1052  if(C4_UNLIKELY(_has_any_(KEYANCH)))
1053  _RYML_CB_ERR_(m_stack.m_callbacks, "key cannot have both anchor and ref", m_curr->pos);
1054  _enable_(c4::yml::KEY|c4::yml::KEYREF);
1055  _send_str_(ref.sub(1), ievt::KEY_|ievt::ALIA); // skip the leading *
1056  }
1057  void set_val_ref(csubstr ref)
1058  {
1059  _RYML_CB_ASSERT(m_stack.m_callbacks, ref.begins_with('*'));
1060  if(C4_UNLIKELY(_has_any_(VALANCH)))
1061  _RYML_CB_ERR_(m_stack.m_callbacks, "val cannot have both anchor and ref", m_curr->pos);
1062  _enable_(c4::yml::VAL|c4::yml::VALREF);
1063  _send_str_(ref.sub(1), ievt::VAL_|ievt::ALIA); // skip the leading *
1064  }
1065 
1066  /** @} */
1067 
1068 public:
1069 
1070  /** @name YAML tag events */
1071  /** @{ */
1072 
1073  void set_key_tag(csubstr tag)
1074  {
1075  _c4dbgpf("{}/{}: set key tag ~~~{}~~~", m_evt_pos, m_evt_size, tag);
1076  _enable_(c4::yml::KEYTAG);
1077  _set_tag(tag, ievt::KEY_);
1078  }
1079  void set_val_tag(csubstr tag)
1080  {
1081  _c4dbgpf("{}/{}: set val tag [{}]~~~{}~~~", m_evt_pos, m_evt_size, tag.len, tag);
1082  _enable_(c4::yml::VALTAG);
1083  _set_tag(tag, ievt::VAL_);
1084  }
1085  void _set_tag(csubstr tag, ievt::DataType which)
1086  {
1087  csubstr ttag = _transform_directive(tag);
1088  _c4dbgpf("{}/{}: transformed_tag [{}]~~~{}~~~", m_evt_pos, m_evt_size, ttag.len, ttag);
1089  if(m_evt_pos + 3 < m_evt_size)
1090  {
1091  m_evt[m_evt_pos] |= which|ievt::TAG_;
1092  _add_scalar_(m_evt_pos, ttag);
1093  }
1094  m_evt_prev = m_evt_pos;
1095  m_evt_pos += 3;
1096  }
1097 
1098  /** @} */
1099 
1100 public:
1101 
1102  /** @name YAML directive events */
1103  /** @{ */
1104 
1105  void add_directive(csubstr directive)
1106  {
1107  _c4dbgpf("{}/{}: add directive ~~~{}~~~", m_evt_pos, m_evt_size, directive);
1108  _RYML_CB_ASSERT(m_stack.m_callbacks, directive.begins_with('%'));
1109  if(directive.begins_with("%TAG"))
1110  {
1111  const id_type pos = _num_tag_directives();
1112  if(C4_UNLIKELY(pos >= RYML_MAX_TAG_DIRECTIVES))
1113  _RYML_CB_ERR_(m_stack.m_callbacks, "too many directives", m_curr->pos);
1114  TagDirective &td = m_tag_directives[pos];
1115  if(C4_UNLIKELY(!td.create_from_str(directive)))
1116  _RYML_CB_ERR_(m_stack.m_callbacks, "failed to add directive", m_curr->pos);
1117  td.next_node_id = (id_type)m_evt_pos;
1118  _send_str_(td.handle, ievt::TAGD);
1119  _send_str_(td.prefix, ievt::TAGV);
1120  }
1121  else if(directive.begins_with("%YAML"))
1122  {
1123  _c4dbgpf("%YAML directive! ignoring...: {}", directive);
1124  if(C4_UNLIKELY(m_has_yaml_directive))
1125  _RYML_CB_ERR_(m_stack.m_callbacks, "multiple yaml directives", m_curr->pos);
1126  m_has_yaml_directive = true;
1127  csubstr rest = directive.sub(5).triml(' ');
1128  _send_str_(rest, ievt::YAML);
1129  }
1130  else
1131  {
1132  _c4dbgpf("unknown directive! ignoring... {}", directive);
1133  }
1134  }
1135 
1136  /** @} */
1137 
1138 public:
1139 
1140  /** @name YAML arena events */
1141  /** @{ */
1142 
1143  substr arena_rem()
1144  {
1145  return C4_LIKELY(m_arena_pos <= m_arena.len) ? m_arena.sub(m_arena_pos) : m_arena.last(0);
1146  }
1147 
1148  /** this may fail, in which case a an empty string is returned */
1149  substr alloc_arena(size_t len)
1150  {
1151  substr s = arena_rem();
1152  if(C4_LIKELY(len <= s.len))
1153  s = s.first(len);
1154  else
1155  s.str = nullptr;
1156  m_arena_pos += len;
1157  return s;
1158  }
1159 
1160  /** this may fail, in which case an empty string is returned */
1161  C4_ALWAYS_INLINE substr alloc_arena(size_t len, substr *relocated)
1162  {
1163  (void)relocated;
1164  return alloc_arena(len);
1165  }
1166 
1167  /** @} */
1168 
1169 public:
1170 
1171  /** push a new parent, add a child to the new parent, and set the
1172  * child as the current node */
1173  void _push()
1174  {
1175  _stack_push();
1176  m_curr->evt_type = {};
1177  }
1178 
1179  /** end the current scope */
1180  void _pop()
1181  {
1182  _stack_pop();
1183  }
1184 
1185  template<c4::yml::type_bits bits> C4_ALWAYS_INLINE void _enable__() noexcept
1186  {
1187  m_curr->evt_type |= bits;
1188  }
1189  template<c4::yml::type_bits bits> C4_ALWAYS_INLINE void _disable__() noexcept
1190  {
1191  m_curr->evt_type &= ~bits;
1192  }
1193  template<c4::yml::type_bits bits> C4_ALWAYS_INLINE bool _has_any__() const noexcept
1194  {
1195  return (m_curr->evt_type & bits) != c4::yml::type_bits(0);
1196  }
1197 
1199  {
1200  if(m_parent)
1201  m_parent->has_children = true;
1202  }
1203 
1204  C4_ALWAYS_INLINE void _send_flag_only_(ievt::DataType flags)
1205  {
1206  _c4dbgpf("{}/{}: flag only", m_evt_pos, m_evt_size);
1207  if(m_evt_pos < m_evt_size)
1208  m_evt[m_evt_pos] |= flags;
1209  m_curr->evt_id = m_evt_pos;
1210  m_evt_prev = m_evt_pos;
1211  ++m_evt_pos;
1212  if(m_evt_pos < m_evt_size)
1213  m_evt[m_evt_pos] = {};
1214  }
1215 
1216  C4_ALWAYS_INLINE void _send_key_scalar_(csubstr scalar, ievt::DataType flags)
1217  {
1218  _c4dbgpf("{}/{}: key scalar", m_evt_pos, m_evt_size);
1219  if(m_evt_pos + 3 < m_evt_size)
1220  {
1221  m_evt[m_evt_pos] |= ievt::SCLR|ievt::KEY_|flags;
1222  _add_scalar_(m_evt_pos, scalar);
1223  }
1224  m_curr->evt_id = m_evt_pos;
1225  m_evt_prev = m_evt_pos;
1226  m_evt_pos += 3;
1227  }
1228 
1229  C4_ALWAYS_INLINE void _send_val_scalar_(csubstr scalar, ievt::DataType flags)
1230  {
1231  _c4dbgpf("{}/{}: val scalar", m_evt_pos, m_evt_size);
1232  if(m_evt_pos + 3 < m_evt_size)
1233  {
1234  m_evt[m_evt_pos] |= ievt::SCLR|ievt::VAL_|flags;
1235  _add_scalar_(m_evt_pos, scalar);
1236  }
1237  m_curr->evt_id = m_evt_pos;
1238  m_evt_prev = m_evt_pos;
1239  m_evt_pos += 3;
1240  }
1241 
1242  C4_ALWAYS_INLINE void _send_str_(csubstr scalar, ievt::DataType flags)
1243  {
1244  _c4dbgpf("{}/{}: send str", m_evt_pos, m_evt_size);
1245  if(m_evt_pos + 3 < m_evt_size)
1246  {
1247  m_evt[m_evt_pos] |= flags;
1248  _add_scalar_(m_evt_pos, scalar);
1249  }
1250  m_curr->evt_id = m_evt_pos;
1251  m_evt_prev = m_evt_pos;
1252  m_evt_pos += 3;
1253  }
1254 
1256  {
1257  for(TagDirective &td : m_tag_directives)
1258  td = {};
1259  }
1260  C4_NODISCARD id_type _num_tag_directives() const
1261  {
1262  // this assumes we have a very small number of tag directives
1263  id_type i = 0;
1264  for(; i < RYML_MAX_TAG_DIRECTIVES; ++i)
1265  if(m_tag_directives[i].handle.empty())
1266  break;
1267  return i;
1268  }
1269  csubstr _transform_directive(csubstr tag)
1270  {
1271  // lookup from the end. We want to find the first directive that
1272  // matches the tag and has a target node id leq than the given
1273  // node_id.
1274  for(id_type i = RYML_MAX_TAG_DIRECTIVES-1; i != NONE; --i)
1275  {
1276  TagDirective const& td = m_tag_directives[i];
1277  if(td.handle.empty())
1278  continue;
1279  if(tag.begins_with(td.handle) && (td.handle != td.prefix))
1280  {
1281  substr rem = arena_rem();
1282  size_t len = td.transform(tag, rem, m_stack.m_callbacks, /*with_brackets*/false);
1283  if(len == 0)
1284  return tag;
1285  alloc_arena(len);
1286  return rem.first(len <= rem.len ? len : 0);
1287  }
1288  }
1289  if(tag.begins_with('!'))
1290  {
1291  if(is_custom_tag(tag))
1292  {
1293  _RYML_CB_ERR_(m_stack.m_callbacks, "tag not found", m_curr->pos);
1294  }
1295  }
1296  return tag;
1297  }
1298 #undef _enable_
1299 #undef _disable_
1300 #undef _has_any_
1301 #undef _add_scalar_
1302 
1303 };
1304 
1305 /** @} */
1306 
1307 } // namespace extra
1308 } // namespace yml
1309 } // namespace c4
1310 
1311 
1312 // NOLINTEND(hicpp-signed-bitwise)
1313 C4_SUPPRESS_WARNING_GCC_CLANG_POP
1314 
1315 #endif /* _C4_YML_EXTRA_EVENT_HANDLER_INTS_HPP_ */
#define _has_any_(bits)
#define RYML_EXPORT
Definition: export.hpp:15
Callbacks const & get_callbacks()
get the global callbacks
Definition: common.cpp:118
int32_t estimate_events_ints_size(csubstr src)
Read YAML source and, without undergoing a full parse, estimate the size of the integer buffer requir...
uint32_t type_bits
the integral type necessary to cover all the bits for NodeType_e
Definition: node_type.hpp:29
@ VALANCH
the val has an &anchor
Definition: node_type.hpp:45
@ KEY_DQUO
mark key scalar as double quoted "
Definition: node_type.hpp:68
@ VALREF
a *reference: the val references an &anchor
Definition: node_type.hpp:43
@ VALNIL
the val is null (eg {a : } results in a null val)
Definition: node_type.hpp:49
@ MAP
a map: a parent of KEYVAL/KEYSEQ/KEYMAP nodes
Definition: node_type.hpp:38
@ KEY
is member of a map
Definition: node_type.hpp:36
@ VAL_FOLDED
mark val scalar as multiline, block folded >
Definition: node_type.hpp:65
@ KEYTAG
the key has a tag
Definition: node_type.hpp:46
@ FLOW_SL
mark container with single-line flow style (seqs as '[val1,val2], maps as '{key: val,...
Definition: node_type.hpp:59
@ VAL
a scalar: has a scalar (ie string) value, possibly empty. must be a leaf node, and cannot be MAP or S...
Definition: node_type.hpp:37
@ VALTAG
the val has a tag
Definition: node_type.hpp:47
@ SEQ
a seq: a parent of VAL/SEQ/MAP nodes
Definition: node_type.hpp:39
@ VAL_SQUO
mark val scalar as single quoted '
Definition: node_type.hpp:67
@ VAL_PLAIN
mark val scalar as plain scalar (unquoted, even when multiline)
Definition: node_type.hpp:71
@ KEYREF
a *reference: the key references an &anchor
Definition: node_type.hpp:42
@ BLOCK
mark container with block style (seqs as '- val ', maps as 'key: val')
Definition: node_type.hpp:61
@ KEYANCH
the key has an &anchor
Definition: node_type.hpp:44
@ VAL_DQUO
mark val scalar as double quoted "
Definition: node_type.hpp:69
@ KEY_SQUO
mark key scalar as single quoted '
Definition: node_type.hpp:66
@ VAL_LITERAL
mark val scalar as multiline, block literal |
Definition: node_type.hpp:63
@ KEY_LITERAL
mark key scalar as multiline, block literal |
Definition: node_type.hpp:62
@ KEY_PLAIN
mark key scalar as plain scalar (unquoted, even when multiline)
Definition: node_type.hpp:70
@ KEY_FOLDED
mark key scalar as multiline, block folded >
Definition: node_type.hpp:64
@ KEYNIL
the key is null (eg { : b} results in a null key)
Definition: node_type.hpp:48
@ DOC
a document
Definition: node_type.hpp:40
bool is_custom_tag(csubstr tag)
Definition: tag.cpp:8
#define RYML_MAX_TAG_DIRECTIVES
the maximum number of tag directives in a Tree
Definition: tag.hpp:19
void(*)(void *, csubstr prev_arena, substr next_arena) pfn_relocate_arena
EventFlags
enumeration of integer event bits.
@ PSTR
special flag to enable look-back in the event array. it signifies that the previous event has a strin...
@ TAGD
tag directive name : TAG <name> .......
@ LITL
block literal scalar (|)
@ UNFILT
special flag to mark a scalar as unfiltered (when the parser is set not to filter).
@ DQUO
double-quoted scalar ("")
@ FOLD
block folded scalar (>)
@ MASK
a mask of all bits in this enumeration with string: mask of all the events that encode a string follo...
@ BSTR
+STR begin stream
@ FLOW
flow container: [] for seqs or {} for maps
@ AREN
IMPORTANT. Marks events whose string was placed in the arena. This happens when the filtered string i...
@ TAGV
tag directive value: TAG ...... <value>
@ ALIA
*ref =ALI alias (reference)
@ LAST
the last flag defined above
@ EXPL
--- (with BDOC) or ... (with EDOC)
@ SQUO
single-quoted scalar (')
int32_t DataType
data type for integer events.
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition: common.hpp:253
@ RTOP
reading at top level
@ RUNK
reading unknown state (when starting): must determine whether scalar, map or seq
@ NONE
an index to none
Definition: common.hpp:260
Definition: common.cpp:12
a c-style callbacks class.
Definition: common.hpp:377
Use this class a base of implementations of event handler to simplify the stack logic.
bool create_from_str(csubstr directive_)
leaves next_node_id unfilled
Definition: tag.cpp:203
csubstr handle
Eg.
Definition: tag.hpp:60
id_type next_node_id
The next node to which this tag directive applies.
Definition: tag.hpp:64
csubstr prefix
Eg.
Definition: tag.hpp:62
size_t transform(csubstr tag, substr output, Callbacks const &callbacks, bool with_brackets=true) const
Definition: tag.cpp:224
A parser event handler that creates a compact representation of the YAML tree in a buffer of integers...
void begin_doc_expl()
explicit doc start, with —
void end_doc_expl()
explicit doc end, with ...
void _send_key_scalar_(csubstr scalar, ievt::DataType flags)
void set_key_scalar_literal(csubstr scalar)
substr alloc_arena(size_t len, substr *relocated)
this may fail, in which case an empty string is returned
bool fits_buffers() const
Predicate to test if the event buffer successfully accomodated all the parse events.
void begin_doc()
implicit doc start (without —)
void _send_flag_only_(ievt::DataType flags)
substr alloc_arena(size_t len)
this may fail, in which case a an empty string is returned
void actually_val_is_first_key_of_new_map_flow()
set the previous val as the first key of a new map, with flow style.
void _set_tag(csubstr tag, ievt::DataType which)
void set_key_scalar_squoted(csubstr scalar)
void set_key_scalar_folded(csubstr scalar)
EventHandlerInts(c4::yml::Callbacks const &cb)
void actually_val_is_first_key_of_new_map_block()
like its flow counterpart, but this function can only be called after the end of a flow-val at root o...
void set_val_scalar_literal(csubstr scalar)
int required_size_events() const
get the size needed for the event buffer from the previous parse
void set_val_scalar_folded(csubstr scalar)
void set_val_scalar_dquoted(csubstr scalar)
void _pop()
end the current scope
void _send_str_(csubstr scalar, ievt::DataType flags)
void set_key_scalar_dquoted(csubstr scalar)
void _send_val_scalar_(csubstr scalar, ievt::DataType flags)
size_t required_size_arena() const
get the size needed for the arena from the previous parse
void start_parse(const char *filename, c4::yml::detail::pfn_relocate_arena relocate_arena, void *relocate_arena_data)
void end_doc()
implicit doc end (without ...)
void _push()
push a new parent, add a child to the new parent, and set the child as the current node
void set_val_scalar_squoted(csubstr scalar)
void reset(csubstr str, substr arena, ievt::DataType *dst, int32_t dst_size)