rapidyaml  0.10.0
parse and emit YAML, and do it fast
reference_resolver.cpp
Go to the documentation of this file.
2 #include "c4/yml/common.hpp"
3 #include "c4/yml/detail/dbgprint.hpp"
4 #ifdef RYML_DBG
5 #include "c4/yml/detail/print.hpp"
6 #else
7 #define _c4dbg_tree(...)
8 #define _c4dbg_node(...)
9 #endif
10 
11 namespace c4 {
12 namespace yml {
13 
14 /** @cond dev */
15 
16 id_type ReferenceResolver::count_anchors_and_refs_(id_type n)
17 {
18  id_type c = 0;
19  c += m_tree->has_key_anchor(n);
20  c += m_tree->has_val_anchor(n);
21  c += m_tree->is_key_ref(n);
22  c += m_tree->is_val_ref(n);
23  c += m_tree->has_key(n) && m_tree->key(n) == "<<";
24  for(id_type ch = m_tree->first_child(n); ch != NONE; ch = m_tree->next_sibling(ch))
25  c += count_anchors_and_refs_(ch);
26  return c;
27 }
28 
29 void ReferenceResolver::gather_anchors_and_refs__(id_type n)
30 {
31  // insert key refs BEFORE inserting val refs
32  if(m_tree->has_key(n))
33  {
34  if(!m_tree->is_key_quoted(n) && m_tree->key(n) == "<<")
35  {
36  _c4dbgpf("node[{}]: key is <<", n);
37  if(m_tree->has_val(n))
38  {
39  if(m_tree->is_val_ref(n))
40  {
41  _c4dbgpf("node[{}]: instance[{}]: val ref, inheriting! '{}'", n, m_refs.size(), m_tree->val_ref(n));
42  m_refs.push({VALREF, n, NONE, NONE, NONE, NONE});
43  //m_refs.push({KEYREF, n, NONE, NONE, NONE, NONE});
44  }
45  else
46  {
47  _c4dbgpf("node[{}]: not ref!", n);
48  }
49  }
50  else if(m_tree->is_seq(n))
51  {
52  // for merging multiple inheritance targets
53  // <<: [ *CENTER, *BIG ]
54  _c4dbgpf("node[{}]: is seq!", n);
55  for(id_type ich = m_tree->first_child(n); ich != NONE; ich = m_tree->next_sibling(ich))
56  {
57  _c4dbgpf("node[{}]: instance [{}]: val ref, inheriting multiple: {} '{}'", n, m_refs.size(), ich, m_tree->val_ref(ich));
58  if(m_tree->is_container(ich))
59  {
60  detail::_report_err(m_tree->m_callbacks, "ERROR: node {} child {}: refs for << cannot be containers.'", n, ich);
61  C4_UNREACHABLE_AFTER_ERR();
62  }
63  m_refs.push({VALREF, ich, NONE, NONE, n, m_tree->next_sibling(n)});
64  }
65  return; // don't descend into the seq
66  }
67  else
68  {
69  detail::_report_err(m_tree->m_callbacks, "ERROR: node {}: refs for << must be either val or seq", n);
70  C4_UNREACHABLE_AFTER_ERR();
71  }
72  }
73  else if(m_tree->is_key_ref(n))
74  {
75  _c4dbgpf("node[{}]: instance[{}]: key ref: '{}', key='{}'", n, m_refs.size(), m_tree->key_ref(n), m_tree->has_key(n) ? m_tree->key(n) : csubstr{"-"});
76  _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->key(n) != "<<");
77  _RYML_CB_CHECK(m_tree->m_callbacks, (!m_tree->has_key(n)) || m_tree->key(n).ends_with(m_tree->key_ref(n)));
78  m_refs.push({KEYREF, n, NONE, NONE, NONE, NONE});
79  }
80  }
81  // val ref
82  if(m_tree->is_val_ref(n) && (!m_tree->has_key(n) || m_tree->key(n) != "<<"))
83  {
84  _c4dbgpf("node[{}]: instance[{}]: val ref: '{}'", n, m_refs.size(), m_tree->val_ref(n));
85  RYML_CHECK((!m_tree->has_val(n)) || m_tree->val(n).ends_with(m_tree->val_ref(n)));
86  m_refs.push({VALREF, n, NONE, NONE, NONE, NONE});
87  }
88  // anchors
89  if(m_tree->has_key_anchor(n))
90  {
91  _c4dbgpf("node[{}]: instance[{}]: key anchor: '{}'", n, m_refs.size(), m_tree->key_anchor(n));
92  RYML_CHECK(m_tree->has_key(n));
93  m_refs.push({KEYANCH, n, NONE, NONE, NONE, NONE});
94  }
95  if(m_tree->has_val_anchor(n))
96  {
97  _c4dbgpf("node[{}]: instance[{}]: val anchor: '{}'", n, m_refs.size(), m_tree->val_anchor(n));
98  RYML_CHECK(m_tree->has_val(n) || m_tree->is_container(n));
99  m_refs.push({VALANCH, n, NONE, NONE, NONE, NONE});
100  }
101  // recurse
102  for(id_type ch = m_tree->first_child(n); ch != NONE; ch = m_tree->next_sibling(ch))
103  gather_anchors_and_refs__(ch);
104 }
105 
106 void ReferenceResolver::gather_anchors_and_refs_()
107 {
108  _c4dbgp("gathering anchors and refs...");
109 
110  // minimize (re-)allocations by counting first
111  id_type num_anchors_and_refs = count_anchors_and_refs_(m_tree->root_id());
112  if(!num_anchors_and_refs)
113  return;
114  m_refs.reserve(num_anchors_and_refs);
115  m_refs.clear();
116 
117  // now descend through the hierarchy
118  gather_anchors_and_refs__(m_tree->root_id());
119 
120  _c4dbgpf("found {} anchors/refs", m_refs.size());
121 
122  // finally connect the reference list
123  id_type prev_anchor = NONE;
124  id_type count = 0;
125  for(auto &rd : m_refs)
126  {
127  rd.prev_anchor = prev_anchor;
128  if(rd.type.has_anchor())
129  prev_anchor = count;
130  ++count;
131  }
132  _c4dbgp("gathering anchors and refs: finished");
133 }
134 
135 id_type ReferenceResolver::lookup_(RefData const* C4_RESTRICT ra)
136 {
137  #ifdef RYML_DBG
138  id_type instance = static_cast<id_type>(ra-m_refs.m_stack);
139  id_type node = ra->node;
140  #endif
141  RYML_ASSERT(ra->type.is_key_ref() || ra->type.is_val_ref());
142  RYML_ASSERT(ra->type.is_key_ref() != ra->type.is_val_ref());
143  csubstr refname;
144  _c4dbgpf("instance[{}:node{}]: lookup from node={}...", instance, node, ra->node);
145  if(ra->type.is_val_ref())
146  {
147  refname = m_tree->val_ref(ra->node);
148  _c4dbgpf("instance[{}:node{}]: valref: '{}'", instance, node, refname);
149  }
150  else
151  {
152  RYML_ASSERT(ra->type.is_key_ref());
153  refname = m_tree->key_ref(ra->node);
154  _c4dbgpf("instance[{}:node{}]: keyref: '{}'", instance, node, refname);
155  }
156  while(ra->prev_anchor != NONE)
157  {
158  ra = &m_refs[ra->prev_anchor];
159  _c4dbgpf("instance[{}:node{}]: lookup '{}' at [{}:node{}]: keyref='{}' valref='{}'", instance, node, refname, ra-m_refs.m_stack, ra->node,
160  (m_tree->has_key_anchor(ra->node) ? m_tree->key_anchor(ra->node) : csubstr("~")),
161  (m_tree->has_val_anchor(ra->node) ? m_tree->val_anchor(ra->node) : csubstr("~")));
162  if(m_tree->has_anchor(ra->node, refname))
163  {
164  _c4dbgpf("instance[{}:node{}]: got it at [{}:node{}]!", instance, node, ra-m_refs.m_stack, ra->node);
165  return ra->node;
166  }
167  }
168  detail::_report_err(m_tree->m_callbacks, "ERROR: anchor not found: '{}'", refname);
169  C4_UNREACHABLE_AFTER_ERR();
170 }
171 
172 void ReferenceResolver::reset_(Tree *t_)
173 {
174  if(t_->callbacks() != m_refs.m_callbacks)
175  {
176  m_refs.m_callbacks = t_->callbacks();
177  }
178  m_tree = t_;
179  m_refs.clear();
180 }
181 
182 void ReferenceResolver::resolve_()
183 {
184  /* from the specs: "an alias node refers to the most recent
185  * node in the serialization having the specified anchor". So
186  * we need to start looking upward from ref nodes.
187  *
188  * @see http://yaml.org/spec/1.2/spec.html#id2765878 */
189  _c4dbgp("matching anchors/refs...");
190  for(id_type i = 0, e = m_refs.size(); i < e; ++i)
191  {
192  RefData &C4_RESTRICT refdata = m_refs.top(i);
193  if( ! refdata.type.is_ref())
194  continue;
195  refdata.target = lookup_(&refdata);
196  }
197  _c4dbgp("matching anchors/refs: finished");
198 
199  // insert the resolved references
200  _c4dbgp("modifying tree...");
201  id_type prev_parent_ref = NONE;
202  id_type prev_parent_ref_after = NONE;
203  for(id_type i = 0, e = m_refs.size(); i < e; ++i)
204  {
205  RefData const& C4_RESTRICT refdata = m_refs[i];
206  _c4dbgpf("instance[{}:node{}]: {}/{}...", i, refdata.node, i+1, e);
207  if( ! refdata.type.is_ref())
208  continue;
209  _c4dbgpf("instance[{}:node{}]: is reference!", i, refdata.node);
210  if(refdata.parent_ref != NONE)
211  {
212  _c4dbgpf("instance[{}:node{}] has parent: {}", i, refdata.node, refdata.parent_ref);
213  _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->is_seq(refdata.parent_ref));
214  const id_type p = m_tree->parent(refdata.parent_ref);
215  const id_type after = (prev_parent_ref != refdata.parent_ref) ?
216  refdata.parent_ref//prev_sibling(rd.parent_ref_sibling)
217  :
218  prev_parent_ref_after;
219  prev_parent_ref = refdata.parent_ref;
220  prev_parent_ref_after = m_tree->duplicate_children_no_rep(refdata.target, p, after);
221  m_tree->remove(refdata.node);
222  }
223  else
224  {
225  _c4dbgpf("instance[{}:node{}] has no parent", i, refdata.node, refdata.parent_ref);
226  if(m_tree->has_key(refdata.node) && m_tree->key(refdata.node) == "<<")
227  {
228  _c4dbgpf("instance[{}:node{}] is inheriting", i, refdata.node);
229  _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->is_keyval(refdata.node));
230  const id_type p = m_tree->parent(refdata.node);
231  const id_type after = m_tree->prev_sibling(refdata.node);
232  _c4dbgpf("instance[{}:node{}] p={} after={}", i, refdata.node, p, after);
233  m_tree->duplicate_children_no_rep(refdata.target, p, after);
234  m_tree->remove(refdata.node);
235  }
236  else if(refdata.type.is_key_ref())
237  {
238  _c4dbgpf("instance[{}:node{}] is key ref", i, refdata.node);
239  _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->is_key_ref(refdata.node));
240  _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->has_key_anchor(refdata.target) || m_tree->has_val_anchor(refdata.target));
241  if(m_tree->has_val_anchor(refdata.target) && m_tree->val_anchor(refdata.target) == m_tree->key_ref(refdata.node))
242  {
243  _c4dbgpf("instance[{}:node{}] target.anchor==val.anchor=={}", i, refdata.node, m_tree->val_anchor(refdata.target));
244  _RYML_CB_CHECK(m_tree->m_callbacks, !m_tree->is_container(refdata.target));
245  _RYML_CB_CHECK(m_tree->m_callbacks, m_tree->has_val(refdata.target));
246  const type_bits existing_style_flags = VAL_STYLE & m_tree->_p(refdata.target)->m_type.type;
247  static_assert((VAL_STYLE >> 1u) == (KEY_STYLE), "bad flags");
248  m_tree->_p(refdata.node)->m_key.scalar = m_tree->val(refdata.target);
249  m_tree->_add_flags(refdata.node, KEY | (existing_style_flags >> 1u));
250  }
251  else
252  {
253  _c4dbgpf("instance[{}:node{}] don't inherit container flags", i, refdata.node);
254  _RYML_CB_CHECK(m_tree->m_callbacks, m_tree->key_anchor(refdata.target) == m_tree->key_ref(refdata.node));
255  m_tree->_p(refdata.node)->m_key.scalar = m_tree->key(refdata.target);
256  // keys cannot be containers, so don't inherit container flags
257  const type_bits existing_style_flags = KEY_STYLE & m_tree->_p(refdata.target)->m_type.type;
258  m_tree->_add_flags(refdata.node, KEY | existing_style_flags);
259  }
260  }
261  else // val ref
262  {
263  _c4dbgpf("instance[{}:node{}] is val ref", i, refdata.node);
264  _RYML_CB_ASSERT(m_tree->m_callbacks, refdata.type.is_val_ref());
265  if(m_tree->has_key_anchor(refdata.target) && m_tree->key_anchor(refdata.target) == m_tree->val_ref(refdata.node))
266  {
267  _c4dbgpf("instance[{}:node{}] target.anchor==key.anchor=={}", i, refdata.node, m_tree->key_anchor(refdata.target));
268  _RYML_CB_CHECK(m_tree->m_callbacks, !m_tree->is_container(refdata.target));
269  _RYML_CB_CHECK(m_tree->m_callbacks, m_tree->has_val(refdata.target));
270  // keys cannot be containers, so don't inherit container flags
271  const type_bits existing_style_flags = (KEY_STYLE) & m_tree->_p(refdata.target)->m_type.type;
272  static_assert((KEY_STYLE << 1u) == (VAL_STYLE), "bad flags");
273  m_tree->_p(refdata.node)->m_val.scalar = m_tree->key(refdata.target);
274  m_tree->_add_flags(refdata.node, VAL | (existing_style_flags << 1u));
275  }
276  else
277  {
278  _c4dbgpf("instance[{}:node{}] duplicate contents", i, refdata.node);
279  m_tree->duplicate_contents(refdata.target, refdata.node);
280  }
281  }
282  }
283  _c4dbg_tree("after insertion", *m_tree);
284  }
285 }
286 
287 void ReferenceResolver::resolve(Tree *t_, bool clear_anchors)
288 {
289  _c4dbgp("resolving references...");
290 
291  reset_(t_);
292 
293  _c4dbg_tree("unresolved tree", *m_tree);
294 
295  gather_anchors_and_refs_();
296  if(m_refs.empty())
297  return;
298  resolve_();
299  _c4dbg_tree("resolved tree", *m_tree);
300 
301  // clear anchors and refs
302  if(clear_anchors)
303  {
304  _c4dbgp("clearing anchors/refs");
305  auto clear_ = [this]{
306  for(auto const& C4_RESTRICT ar : m_refs)
307  {
308  m_tree->rem_anchor_ref(ar.node);
309  if(ar.parent_ref != NONE)
310  if(m_tree->type(ar.parent_ref) != NOTYPE)
311  m_tree->remove(ar.parent_ref);
312  }
313  };
314  clear_();
315  // some of the elements injected during the resolution may
316  // have nested anchors; these anchors will have been newly
317  // injected during the resolution; collect again, and clear
318  // again, to ensure those are also cleared:
319  gather_anchors_and_refs_();
320  clear_();
321  _c4dbgp("clearing anchors/refs: finished");
322  }
323 
324  _c4dbg_tree("final resolved tree", *m_tree);
325 
326  m_tree = nullptr;
327  _c4dbgp("resolving references: finished");
328 }
329 
330 /** @endcond */
331 
332 } // namespace ryml
333 } // namespace c4
Common utilities and infrastructure used by ryml.
uint32_t type_bits
the integral type necessary to cover all the bits for NodeType_e
Definition: node_type.hpp:29
@ VALANCH
the val has an &anchor
Definition: node_type.hpp:45
@ NOTYPE
no node type or style is set
Definition: node_type.hpp:35
@ VALREF
a *reference: the val references an &anchor
Definition: node_type.hpp:43
@ KEY
is member of a map
Definition: node_type.hpp:36
@ VAL_STYLE
mask of all the scalar styles for val (not container styles!)
Definition: node_type.hpp:92
@ VAL
a scalar: has a scalar (ie string) value, possibly empty. must be a leaf node, and cannot be MAP or S...
Definition: node_type.hpp:37
@ KEY_STYLE
mask of all the scalar styles for key (not container styles!)
Definition: node_type.hpp:91
@ KEYREF
a *reference: the key references an &anchor
Definition: node_type.hpp:42
@ KEYANCH
the key has an &anchor
Definition: node_type.hpp:44
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition: common.hpp:253
@ NONE
an index to none
Definition: common.hpp:260
Definition: common.cpp:12
#define _c4dbg_tree(...)
void resolve(Tree *tree, bool clear_anchors=true)
Resolve references: for each reference, look for a matching anchor, and copy its contents to the ref ...