rapidyaml  0.11.0
parse and emit YAML, and do it fast
reference_resolver.cpp
Go to the documentation of this file.
2 #include "c4/yml/common.hpp"
3 #include "c4/yml/detail/dbgprint.hpp"
4 #ifdef RYML_DBG
5 #include "c4/yml/detail/print.hpp"
6 #else
7 #define _c4dbg_tree(...)
8 #define _c4dbg_node(...)
9 #endif
10 
11 namespace c4 {
12 namespace yml {
13 
14 /** @cond dev */
15 
16 id_type ReferenceResolver::count_anchors_and_refs_(id_type n)
17 {
18  id_type c = 0;
19  c += m_tree->has_key_anchor(n);
20  c += m_tree->has_val_anchor(n);
21  c += m_tree->is_key_ref(n);
22  c += m_tree->is_val_ref(n);
23  c += m_tree->has_key(n) && m_tree->key(n) == "<<";
24  for(id_type ch = m_tree->first_child(n); ch != NONE; ch = m_tree->next_sibling(ch))
25  c += count_anchors_and_refs_(ch);
26  return c;
27 }
28 
29 void ReferenceResolver::gather_anchors_and_refs__(id_type n)
30 {
31  // insert key refs BEFORE inserting val refs
32  if(m_tree->has_key(n))
33  {
34  if(!m_tree->is_key_quoted(n) && m_tree->key(n) == "<<")
35  {
36  _c4dbgpf("node[{}]: key is <<", n);
37  if(m_tree->has_val(n))
38  {
39  if(m_tree->is_val_ref(n))
40  {
41  _c4dbgpf("node[{}]: instance[{}]: val ref, inheriting! '{}'", n, m_refs.size(), m_tree->val_ref(n));
42  m_refs.push({VALREF, n, NONE, NONE, NONE, NONE});
43  //m_refs.push({KEYREF, n, NONE, NONE, NONE, NONE});
44  }
45  else
46  {
47  _c4dbgpf("node[{}]: not ref!", n);
48  }
49  }
50  else if(m_tree->is_seq(n))
51  {
52  // for merging multiple inheritance targets
53  // <<: [ *CENTER, *BIG ]
54  _c4dbgpf("node[{}]: is seq!", n);
55  for(id_type ich = m_tree->first_child(n); ich != NONE; ich = m_tree->next_sibling(ich))
56  {
57  _c4dbgpf("node[{}]: instance [{}]: val ref, inheriting multiple: {} '{}'", n, m_refs.size(), ich, m_tree->val_ref(ich));
58  if(C4_UNLIKELY(m_tree->is_container(ich)))
59  _RYML_ERR_VISIT_(m_tree->m_callbacks, m_tree, n, "child={}: refs for << cannot be containers.", ich);
60  m_refs.push({VALREF, ich, NONE, NONE, n, m_tree->next_sibling(n)});
61  }
62  return; // don't descend into the seq
63  }
64  else
65  {
66  _RYML_ERR_VISIT_(m_tree->m_callbacks, m_tree, n, "refs for << must be either val or seq");
67  }
68  }
69  else if(m_tree->is_key_ref(n))
70  {
71  _c4dbgpf("node[{}]: instance[{}]: key ref: '{}', key='{}'", n, m_refs.size(), m_tree->key_ref(n), m_tree->has_key(n) ? m_tree->key(n) : csubstr{"-"});
72  _RYML_ASSERT_VISIT_(m_tree->m_callbacks, m_tree->key(n) != "<<", m_tree, n);
73  _RYML_CHECK_VISIT_(m_tree->m_callbacks, (!m_tree->has_key(n)) || m_tree->key(n).ends_with(m_tree->key_ref(n)), m_tree, n);
74  m_refs.push({KEYREF, n, NONE, NONE, NONE, NONE});
75  }
76  }
77  // val ref
78  if(m_tree->is_val_ref(n) && (!m_tree->has_key(n) || m_tree->key(n) != "<<"))
79  {
80  _c4dbgpf("node[{}]: instance[{}]: val ref: '{}'", n, m_refs.size(), m_tree->val_ref(n));
81  _RYML_CHECK_VISIT((!m_tree->has_val(n)) || m_tree->val(n).ends_with(m_tree->val_ref(n)), m_tree, n);
82  m_refs.push({VALREF, n, NONE, NONE, NONE, NONE});
83  }
84  // anchors
85  if(m_tree->has_key_anchor(n))
86  {
87  _c4dbgpf("node[{}]: instance[{}]: key anchor: '{}'", n, m_refs.size(), m_tree->key_anchor(n));
88  _RYML_CHECK_VISIT(m_tree->has_key(n), m_tree, n);
89  m_refs.push({KEYANCH, n, NONE, NONE, NONE, NONE});
90  }
91  if(m_tree->has_val_anchor(n))
92  {
93  _c4dbgpf("node[{}]: instance[{}]: val anchor: '{}'", n, m_refs.size(), m_tree->val_anchor(n));
94  _RYML_CHECK_VISIT(m_tree->has_val(n) || m_tree->is_container(n), m_tree, n);
95  m_refs.push({VALANCH, n, NONE, NONE, NONE, NONE});
96  }
97  // recurse
98  for(id_type ch = m_tree->first_child(n); ch != NONE; ch = m_tree->next_sibling(ch))
99  gather_anchors_and_refs__(ch);
100 }
101 
102 void ReferenceResolver::gather_anchors_and_refs_()
103 {
104  _c4dbgp("gathering anchors and refs...");
105 
106  // minimize (re-)allocations by counting first
107  id_type num_anchors_and_refs = count_anchors_and_refs_(m_tree->root_id());
108  if(!num_anchors_and_refs)
109  return;
110  m_refs.reserve(num_anchors_and_refs);
111  m_refs.clear();
112 
113  // now descend through the hierarchy
114  gather_anchors_and_refs__(m_tree->root_id());
115 
116  _c4dbgpf("found {} anchors/refs", m_refs.size());
117 
118  // finally connect the reference list
119  id_type prev_anchor = NONE;
120  id_type count = 0;
121  for(auto &rd : m_refs)
122  {
123  rd.prev_anchor = prev_anchor;
124  if(rd.type.has_anchor())
125  prev_anchor = count;
126  ++count;
127  }
128  _c4dbgp("gathering anchors and refs: finished");
129 }
130 
131 id_type ReferenceResolver::lookup_(RefData const* C4_RESTRICT ra)
132 {
133  #ifdef RYML_DBG
134  id_type instance = static_cast<id_type>(ra-m_refs.m_stack);
135  id_type node = ra->node;
136  #endif
137  _RYML_ASSERT_VISIT_(m_tree->m_callbacks, ra->type.is_key_ref() || ra->type.is_val_ref(), m_tree, ra->node);
138  _RYML_ASSERT_VISIT_(m_tree->m_callbacks, ra->type.is_key_ref() != ra->type.is_val_ref(), m_tree, ra->node);
139  csubstr refname;
140  _c4dbgpf("instance[{}:node{}]: lookup from node={}...", instance, node, ra->node);
141  if(ra->type.is_val_ref())
142  {
143  refname = m_tree->val_ref(ra->node);
144  _c4dbgpf("instance[{}:node{}]: valref: '{}'", instance, node, refname);
145  }
146  else
147  {
148  _RYML_ASSERT_VISIT_(m_tree->m_callbacks, ra->type.is_key_ref(), m_tree, ra->node);
149  refname = m_tree->key_ref(ra->node);
150  _c4dbgpf("instance[{}:node{}]: keyref: '{}'", instance, node, refname);
151  }
152  while(ra->prev_anchor != NONE)
153  {
154  ra = &m_refs[ra->prev_anchor];
155  _c4dbgpf("instance[{}:node{}]: lookup '{}' at [{}:node{}]: keyref='{}' valref='{}'", instance, node, refname, ra-m_refs.m_stack, ra->node,
156  (m_tree->has_key_anchor(ra->node) ? m_tree->key_anchor(ra->node) : csubstr("~")),
157  (m_tree->has_val_anchor(ra->node) ? m_tree->val_anchor(ra->node) : csubstr("~")));
158  if(m_tree->has_anchor(ra->node, refname))
159  {
160  _c4dbgpf("instance[{}:node{}]: got it at [{}:node{}]!", instance, node, ra-m_refs.m_stack, ra->node);
161  return ra->node;
162  }
163  }
164  _RYML_ERR_VISIT_(m_tree->m_callbacks, m_tree, ra->node, "anchor not found: '{}'", refname);
165  C4_UNREACHABLE_AFTER_ERR();
166 }
167 
168 void ReferenceResolver::reset_(Tree *t_)
169 {
170  if(t_->callbacks() != m_refs.m_callbacks)
171  {
172  m_refs.m_callbacks = t_->callbacks();
173  }
174  m_tree = t_;
175  m_refs.clear();
176 }
177 
178 void ReferenceResolver::resolve_()
179 {
180  /* from the specs: "an alias node refers to the most recent
181  * node in the serialization having the specified anchor". So
182  * we need to start looking upward from ref nodes.
183  *
184  * @see http://yaml.org/spec/1.2/spec.html#id2765878 */
185  _c4dbgp("matching anchors/refs...");
186  for(id_type i = 0, e = m_refs.size(); i < e; ++i)
187  {
188  RefData &C4_RESTRICT refdata = m_refs.top(i);
189  if( ! refdata.type.is_ref())
190  continue;
191  refdata.target = lookup_(&refdata);
192  }
193  _c4dbgp("matching anchors/refs: finished");
194 
195  // insert the resolved references
196  _c4dbgp("modifying tree...");
197  id_type prev_parent_ref = NONE;
198  id_type prev_parent_ref_after = NONE;
199  for(id_type i = 0, e = m_refs.size(); i < e; ++i)
200  {
201  RefData const& C4_RESTRICT refdata = m_refs[i];
202  _c4dbgpf("instance[{}:node{}]: {}/{}...", i, refdata.node, i+1, e);
203  if( ! refdata.type.is_ref())
204  continue;
205  _c4dbgpf("instance[{}:node{}]: is reference!", i, refdata.node);
206  if(refdata.parent_ref != NONE)
207  {
208  _c4dbgpf("instance[{}:node{}] has parent: {}", i, refdata.node, refdata.parent_ref);
209  _RYML_ASSERT_VISIT_(m_tree->m_callbacks, m_tree->is_seq(refdata.parent_ref), m_tree, refdata.node);
210  const id_type p = m_tree->parent(refdata.parent_ref);
211  const id_type after = (prev_parent_ref != refdata.parent_ref) ?
212  refdata.parent_ref//prev_sibling(rd.parent_ref_sibling)
213  :
214  prev_parent_ref_after;
215  prev_parent_ref = refdata.parent_ref;
216  prev_parent_ref_after = m_tree->duplicate_children_no_rep(refdata.target, p, after);
217  m_tree->remove(refdata.node);
218  }
219  else
220  {
221  _c4dbgpf("instance[{}:node{}] has no parent", i, refdata.node, refdata.parent_ref);
222  if(m_tree->has_key(refdata.node) && m_tree->key(refdata.node) == "<<")
223  {
224  _c4dbgpf("instance[{}:node{}] is inheriting", i, refdata.node);
225  _RYML_ASSERT_VISIT_(m_tree->m_callbacks, m_tree->is_keyval(refdata.node), m_tree, refdata.node);
226  const id_type p = m_tree->parent(refdata.node);
227  const id_type after = m_tree->prev_sibling(refdata.node);
228  _c4dbgpf("instance[{}:node{}] p={} after={}", i, refdata.node, p, after);
229  m_tree->duplicate_children_no_rep(refdata.target, p, after);
230  m_tree->remove(refdata.node);
231  }
232  else if(refdata.type.is_key_ref())
233  {
234  _c4dbgpf("instance[{}:node{}] is key ref", i, refdata.node);
235  _RYML_ASSERT_VISIT_(m_tree->m_callbacks, m_tree->is_key_ref(refdata.node), m_tree, refdata.node);
236  _RYML_ASSERT_VISIT_(m_tree->m_callbacks, m_tree->has_key_anchor(refdata.target) || m_tree->has_val_anchor(refdata.target), m_tree, refdata.node);
237  if(m_tree->has_val_anchor(refdata.target) && m_tree->val_anchor(refdata.target) == m_tree->key_ref(refdata.node))
238  {
239  _c4dbgpf("instance[{}:node{}] target.anchor==val.anchor=={}", i, refdata.node, m_tree->val_anchor(refdata.target));
240  _RYML_CHECK_VISIT_(m_tree->m_callbacks, !m_tree->is_container(refdata.target), m_tree, refdata.target);
241  _RYML_CHECK_VISIT_(m_tree->m_callbacks, m_tree->has_val(refdata.target), m_tree, refdata.target);
242  const type_bits existing_style_flags = VAL_STYLE & m_tree->_p(refdata.target)->m_type.type;
243  static_assert((VAL_STYLE >> 1u) == (KEY_STYLE), "bad flags");
244  m_tree->_p(refdata.node)->m_key.scalar = m_tree->val(refdata.target);
245  m_tree->_add_flags(refdata.node, KEY | (existing_style_flags >> 1u));
246  }
247  else
248  {
249  _c4dbgpf("instance[{}:node{}] don't inherit container flags", i, refdata.node);
250  _RYML_CHECK_BASIC_(m_tree->m_callbacks, m_tree->key_anchor(refdata.target) == m_tree->key_ref(refdata.node));
251  m_tree->_p(refdata.node)->m_key.scalar = m_tree->key(refdata.target);
252  // keys cannot be containers, so don't inherit container flags
253  const type_bits existing_style_flags = KEY_STYLE & m_tree->_p(refdata.target)->m_type.type;
254  m_tree->_add_flags(refdata.node, KEY | existing_style_flags);
255  }
256  }
257  else // val ref
258  {
259  _c4dbgpf("instance[{}:node{}] is val ref", i, refdata.node);
260  _RYML_ASSERT_VISIT_(m_tree->m_callbacks, refdata.type.is_val_ref(), m_tree, refdata.node);
261  if(m_tree->has_key_anchor(refdata.target) && m_tree->key_anchor(refdata.target) == m_tree->val_ref(refdata.node))
262  {
263  _c4dbgpf("instance[{}:node{}] target.anchor==key.anchor=={}", i, refdata.node, m_tree->key_anchor(refdata.target));
264  _RYML_CHECK_BASIC_(m_tree->m_callbacks, !m_tree->is_container(refdata.target));
265  _RYML_CHECK_BASIC_(m_tree->m_callbacks, m_tree->has_val(refdata.target));
266  // keys cannot be containers, so don't inherit container flags
267  const type_bits existing_style_flags = (KEY_STYLE) & m_tree->_p(refdata.target)->m_type.type;
268  static_assert((KEY_STYLE << 1u) == (VAL_STYLE), "bad flags");
269  m_tree->_p(refdata.node)->m_val.scalar = m_tree->key(refdata.target);
270  m_tree->_add_flags(refdata.node, VAL | (existing_style_flags << 1u));
271  }
272  else
273  {
274  _c4dbgpf("instance[{}:node{}] duplicate contents", i, refdata.node);
275  m_tree->duplicate_contents(refdata.target, refdata.node);
276  }
277  }
278  }
279  _c4dbg_tree("after insertion", *m_tree);
280  }
281 }
282 
283 void ReferenceResolver::resolve(Tree *t_, bool clear_anchors)
284 {
285  _c4dbgp("resolving references...");
286 
287  reset_(t_);
288 
289  _c4dbg_tree("unresolved tree", *m_tree);
290 
291  gather_anchors_and_refs_();
292  if(m_refs.empty())
293  return;
294  resolve_();
295  _c4dbg_tree("resolved tree", *m_tree);
296 
297  // clear anchors and refs
298  if(clear_anchors)
299  {
300  _c4dbgp("clearing anchors/refs");
301  auto clear_ = [this]{
302  for(auto const& C4_RESTRICT ar : m_refs)
303  {
304  m_tree->rem_anchor_ref(ar.node);
305  if(ar.parent_ref != NONE)
306  if(m_tree->type(ar.parent_ref) != NOTYPE)
307  m_tree->remove(ar.parent_ref);
308  }
309  };
310  clear_();
311  // some of the elements injected during the resolution may
312  // have nested anchors; these anchors will have been newly
313  // injected during the resolution; collect again, and clear
314  // again, to ensure those are also cleared:
315  gather_anchors_and_refs_();
316  clear_();
317  _c4dbgp("clearing anchors/refs: finished");
318  }
319 
320  _c4dbg_tree("final resolved tree", *m_tree);
321 
322  m_tree = nullptr;
323  _c4dbgp("resolving references: finished");
324 }
325 
326 /** @endcond */
327 
328 } // namespace ryml
329 } // namespace c4
Common utilities and infrastructure used by ryml.
uint32_t type_bits
the integral type necessary to cover all the bits for NodeType_e
Definition: node_type.hpp:29
@ VALANCH
the val has an &anchor
Definition: node_type.hpp:45
@ NOTYPE
no node type or style is set
Definition: node_type.hpp:35
@ VALREF
a *reference: the val references an &anchor
Definition: node_type.hpp:43
@ KEY
is member of a map
Definition: node_type.hpp:36
@ VAL_STYLE
mask of all the scalar styles for val (not container styles!)
Definition: node_type.hpp:92
@ VAL
a scalar: has a scalar (ie string) value, possibly empty. must be a leaf node, and cannot be MAP or S...
Definition: node_type.hpp:37
@ KEY_STYLE
mask of all the scalar styles for key (not container styles!)
Definition: node_type.hpp:91
@ KEYREF
a *reference: the key references an &anchor
Definition: node_type.hpp:42
@ KEYANCH
the key has an &anchor
Definition: node_type.hpp:44
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition: common.hpp:244
@ NONE
an index to none
Definition: common.hpp:251
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition: common.cpp:14
#define _c4dbg_tree(...)
void resolve(Tree *tree, bool clear_anchors=true)
Resolve references: for each reference, look for a matching anchor, and copy its contents to the ref ...