rapidyaml  0.8.0
parse and emit YAML, and do it fast
reference_resolver.cpp
Go to the documentation of this file.
2 #include "c4/dump.hpp" // this is needed to resolve a function in the next header
3 #include "c4/yml/common.hpp"
4 #include "c4/yml/detail/parser_dbg.hpp"
5 #ifdef RYML_DBG
6 #include "c4/yml/detail/print.hpp"
7 #else
8 #define _c4dbg_tree(...)
9 #define _c4dbg_node(...)
10 #endif
11 
12 namespace c4 {
13 namespace yml {
14 
15 /** @cond dev */
16 
17 id_type ReferenceResolver::count_anchors_and_refs_(id_type n)
18 {
19  id_type c = 0;
20  c += m_tree->has_key_anchor(n);
21  c += m_tree->has_val_anchor(n);
22  c += m_tree->is_key_ref(n);
23  c += m_tree->is_val_ref(n);
24  c += m_tree->has_key(n) && m_tree->key(n) == "<<";
25  for(id_type ch = m_tree->first_child(n); ch != NONE; ch = m_tree->next_sibling(ch))
26  c += count_anchors_and_refs_(ch);
27  return c;
28 }
29 
30 void ReferenceResolver::gather_anchors_and_refs__(id_type n)
31 {
32  // insert key refs BEFORE inserting val refs
33  if(m_tree->has_key(n))
34  {
35  if(m_tree->key(n) == "<<")
36  {
37  _c4dbgpf("node[{}]: key is <<", n);
38  if(m_tree->has_val(n))
39  {
40  if(m_tree->is_val_ref(n))
41  {
42  _c4dbgpf("node[{}]: val ref, inheriting!", n);
43  m_refs.push({VALREF, n, NONE, NONE, NONE, NONE});
44  //m_refs.push({KEYREF, n, NONE, NONE, NONE, NONE});
45  }
46  else
47  {
48  _c4dbgpf("node[{}]: not ref!", n);
49  }
50  }
51  else if(m_tree->is_seq(n))
52  {
53  // for merging multiple inheritance targets
54  // <<: [ *CENTER, *BIG ]
55  _c4dbgpf("node[{}]: is seq!", n);
56  for(id_type ich = m_tree->first_child(n); ich != NONE; ich = m_tree->next_sibling(ich))
57  {
58  _c4dbgpf("node[{}]: val ref, inheriting multiple: {}", n, ich);
59  if(m_tree->is_container(ich))
60  {
61  detail::_report_err(m_tree->m_callbacks, "ERROR: node {} child {}: refs for << cannot be containers.'", n, ich);
62  C4_UNREACHABLE_AFTER_ERR();
63  }
64  m_refs.push({VALREF, ich, NONE, NONE, n, m_tree->next_sibling(n)});
65  }
66  return; // don't descend into the seq
67  }
68  else
69  {
70  detail::_report_err(m_tree->m_callbacks, "ERROR: node {}: refs for << must be either val or seq", n);
71  C4_UNREACHABLE_AFTER_ERR();
72  }
73  }
74  else if(m_tree->is_key_ref(n))
75  {
76  _c4dbgpf("node[{}]: key ref: '{}'", n, m_tree->key_ref(n));
77  _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->key(n) != "<<");
78  _RYML_CB_CHECK(m_tree->m_callbacks, (!m_tree->has_key(n)) || m_tree->key(n).ends_with(m_tree->key_ref(n)));
79  m_refs.push({KEYREF, n, NONE, NONE, NONE, NONE});
80  }
81  }
82  // val ref
83  if(m_tree->is_val_ref(n) && (!m_tree->has_key(n) || m_tree->key(n) != "<<"))
84  {
85  _c4dbgpf("node[{}]: val ref: '{}'", n, m_tree->val_ref(n));
86  RYML_CHECK((!m_tree->has_val(n)) || m_tree->val(n).ends_with(m_tree->val_ref(n)));
87  m_refs.push({VALREF, n, NONE, NONE, NONE, NONE});
88  }
89  // anchors
90  if(m_tree->has_key_anchor(n))
91  {
92  _c4dbgpf("node[{}]: key anchor: '{}'", n, m_tree->key_anchor(n));
93  RYML_CHECK(m_tree->has_key(n));
94  m_refs.push({KEYANCH, n, NONE, NONE, NONE, NONE});
95  }
96  if(m_tree->has_val_anchor(n))
97  {
98  _c4dbgpf("node[{}]: val anchor: '{}'", n, m_tree->val_anchor(n));
99  RYML_CHECK(m_tree->has_val(n) || m_tree->is_container(n));
100  m_refs.push({VALANCH, n, NONE, NONE, NONE, NONE});
101  }
102  // recurse
103  for(id_type ch = m_tree->first_child(n); ch != NONE; ch = m_tree->next_sibling(ch))
104  gather_anchors_and_refs__(ch);
105 }
106 
107 void ReferenceResolver::gather_anchors_and_refs_()
108 {
109  _c4dbgp("gathering anchors and refs...");
110 
111  // minimize (re-)allocations by counting first
112  id_type num_anchors_and_refs = count_anchors_and_refs_(m_tree->root_id());
113  if(!num_anchors_and_refs)
114  return;
115  m_refs.reserve(num_anchors_and_refs);
116  m_refs.clear();
117 
118  // now descend through the hierarchy
119  gather_anchors_and_refs__(m_tree->root_id());
120 
121  _c4dbgpf("found {} anchors/refs", m_refs.size());
122 
123  // finally connect the reference list
124  id_type prev_anchor = NONE;
125  id_type count = 0;
126  for(auto &rd : m_refs)
127  {
128  rd.prev_anchor = prev_anchor;
129  if(rd.type.has_anchor())
130  prev_anchor = count;
131  ++count;
132  }
133  _c4dbgp("gathering anchors and refs: finished");
134 }
135 
136 id_type ReferenceResolver::lookup_(RefData *C4_RESTRICT ra)
137 {
138  RYML_ASSERT(ra->type.is_key_ref() || ra->type.is_val_ref());
139  RYML_ASSERT(ra->type.is_key_ref() != ra->type.is_val_ref());
140  csubstr refname;
141  if(ra->type.is_val_ref())
142  {
143  refname = m_tree->val_ref(ra->node);
144  }
145  else
146  {
147  RYML_ASSERT(ra->type.is_key_ref());
148  refname = m_tree->key_ref(ra->node);
149  }
150  while(ra->prev_anchor != NONE)
151  {
152  ra = &m_refs[ra->prev_anchor];
153  if(m_tree->has_anchor(ra->node, refname))
154  return ra->node;
155  }
156  detail::_report_err(m_tree->m_callbacks, "ERROR: anchor not found: '{}'", refname);
157  C4_UNREACHABLE_AFTER_ERR();
158 }
159 
160 void ReferenceResolver::reset_(Tree *t_)
161 {
162  if(t_->callbacks() != m_refs.m_callbacks)
163  {
164  m_refs.m_callbacks = t_->callbacks();
165  }
166  m_refs.clear();
167  m_tree = t_;
168 }
169 
170 void ReferenceResolver::resolve(Tree *t_)
171 {
172  _c4dbgp("resolving references...");
173 
174  reset_(t_);
175 
176  _c4dbg_tree("unresolved tree", *m_tree);
177 
178  gather_anchors_and_refs_();
179  if(m_refs.empty())
180  return;
181 
182  /* from the specs: "an alias node refers to the most recent
183  * node in the serialization having the specified anchor". So
184  * we need to start looking upward from ref nodes.
185  *
186  * @see http://yaml.org/spec/1.2/spec.html#id2765878 */
187  _c4dbgp("matching anchors/refs...");
188  for(id_type i = 0, e = m_refs.size(); i < e; ++i)
189  {
190  RefData &C4_RESTRICT refdata = m_refs.top(i);
191  if( ! refdata.type.is_ref())
192  continue;
193  refdata.target = lookup_(&refdata);
194  }
195  _c4dbgp("matching anchors/refs: finished");
196 
197  // insert the resolved references
198  _c4dbgp("modifying tree...");
199  id_type prev_parent_ref = NONE;
200  id_type prev_parent_ref_after = NONE;
201  for(id_type i = 0, e = m_refs.size(); i < e; ++i)
202  {
203  RefData const& C4_RESTRICT refdata = m_refs[i];
204  _c4dbgpf("instance {}/{}...", i, e);
205  if( ! refdata.type.is_ref())
206  continue;
207  _c4dbgpf("instance {} is reference!", i);
208  if(refdata.parent_ref != NONE)
209  {
210  _c4dbgpf("ref {} has parent: {}", i, refdata.parent_ref);
211  _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->is_seq(refdata.parent_ref));
212  const id_type p = m_tree->parent(refdata.parent_ref);
213  const id_type after = (prev_parent_ref != refdata.parent_ref) ?
214  refdata.parent_ref//prev_sibling(rd.parent_ref_sibling)
215  :
216  prev_parent_ref_after;
217  prev_parent_ref = refdata.parent_ref;
218  prev_parent_ref_after = m_tree->duplicate_children_no_rep(refdata.target, p, after);
219  m_tree->remove(refdata.node);
220  }
221  else
222  {
223  _c4dbgpf("ref {} has no parent", i, refdata.parent_ref);
224  if(m_tree->has_key(refdata.node) && m_tree->key(refdata.node) == "<<")
225  {
226  _c4dbgpf("ref {} is inheriting", i);
227  _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->is_keyval(refdata.node));
228  const id_type p = m_tree->parent(refdata.node);
229  const id_type after = m_tree->prev_sibling(refdata.node);
230  m_tree->duplicate_children_no_rep(refdata.target, p, after);
231  m_tree->remove(refdata.node);
232  }
233  else if(refdata.type.is_key_ref())
234  {
235  _c4dbgpf("ref {} is key ref", i);
236  _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->is_key_ref(refdata.node));
237  _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->has_key_anchor(refdata.target) || m_tree->has_val_anchor(refdata.target));
238  if(m_tree->has_val_anchor(refdata.target) && m_tree->val_anchor(refdata.target) == m_tree->key_ref(refdata.node))
239  {
240  _RYML_CB_CHECK(m_tree->m_callbacks, !m_tree->is_container(refdata.target));
241  _RYML_CB_CHECK(m_tree->m_callbacks, m_tree->has_val(refdata.target));
242  const type_bits existing_style_flags = VAL_STYLE & m_tree->_p(refdata.target)->m_type.type;
243  static_assert((VAL_STYLE >> 1u) == (KEY_STYLE), "bad flags");
244  m_tree->_p(refdata.node)->m_key.scalar = m_tree->val(refdata.target);
245  m_tree->_add_flags(refdata.node, KEY | (existing_style_flags >> 1u));
246  }
247  else
248  {
249  _RYML_CB_CHECK(m_tree->m_callbacks, m_tree->key_anchor(refdata.target) == m_tree->key_ref(refdata.node));
250  m_tree->_p(refdata.node)->m_key.scalar = m_tree->key(refdata.target);
251  // keys cannot be containers, so don't inherit container flags
252  const type_bits existing_style_flags = KEY_STYLE & m_tree->_p(refdata.target)->m_type.type;
253  m_tree->_add_flags(refdata.node, KEY | existing_style_flags);
254  }
255  }
256  else // val ref
257  {
258  _c4dbgpf("ref {} is val ref", i);
259  _RYML_CB_ASSERT(m_tree->m_callbacks, refdata.type.is_val_ref());
260  if(m_tree->has_key_anchor(refdata.target) && m_tree->key_anchor(refdata.target) == m_tree->val_ref(refdata.node))
261  {
262  _RYML_CB_CHECK(m_tree->m_callbacks, !m_tree->is_container(refdata.target));
263  _RYML_CB_CHECK(m_tree->m_callbacks, m_tree->has_val(refdata.target));
264  // keys cannot be containers, so don't inherit container flags
265  const type_bits existing_style_flags = (KEY_STYLE) & m_tree->_p(refdata.target)->m_type.type;
266  static_assert((KEY_STYLE << 1u) == (VAL_STYLE), "bad flags");
267  m_tree->_p(refdata.node)->m_val.scalar = m_tree->key(refdata.target);
268  m_tree->_add_flags(refdata.node, VAL | (existing_style_flags << 1u));
269  }
270  else
271  {
272  m_tree->duplicate_contents(refdata.target, refdata.node);
273  }
274  }
275  }
276  }
277  _c4dbgp("modifying tree: finished");
278 
279  // clear anchors and refs
280  _c4dbgp("clearing anchors/refs");
281  for(auto const& C4_RESTRICT ar : m_refs)
282  {
283  m_tree->rem_anchor_ref(ar.node);
284  if(ar.parent_ref != NONE)
285  if(m_tree->type(ar.parent_ref) != NOTYPE)
286  m_tree->remove(ar.parent_ref);
287  }
288  _c4dbgp("clearing anchors/refs: finished");
289 
290  _c4dbg_tree("resolved tree", *m_tree);
291 
292  m_tree = nullptr;
293  _c4dbgp("resolving references: finished");
294 }
295 
296 /** @endcond */
297 
298 } // namespace ryml
299 } // namespace c4
Common utilities and infrastructure used by ryml.
uint32_t type_bits
the integral type necessary to cover all the bits for NodeType_e
Definition: node_type.hpp:26
@ VALANCH
the val has an &anchor
Definition: node_type.hpp:42
@ NOTYPE
no node type or style is set
Definition: node_type.hpp:32
@ VALREF
a *reference: the val references an &anchor
Definition: node_type.hpp:40
@ KEY
is member of a map
Definition: node_type.hpp:33
@ VAL_STYLE
mask of all the scalar styles for val (not container styles!)
Definition: node_type.hpp:89
@ VAL
a scalar: has a scalar (ie string) value, possibly empty. must be a leaf node, and cannot be MAP or S...
Definition: node_type.hpp:34
@ KEY_STYLE
mask of all the scalar styles for key (not container styles!)
Definition: node_type.hpp:88
@ KEYREF
a *reference: the key references an &anchor
Definition: node_type.hpp:39
@ KEYANCH
the key has an &anchor
Definition: node_type.hpp:41
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition: common.hpp:253
@ NONE
an index to none
Definition: common.hpp:260
Definition: common.cpp:12
#define _c4dbg_tree(...)
void resolve(Tree *t_)
Resolve references: for each reference, look for a matching anchor, and copy its contents to the ref ...