rapidyaml  0.7.2
parse and emit YAML, and do it fast
reference_resolver.cpp
Go to the documentation of this file.
2 #include "c4/dump.hpp" // this is needed to resolve a function in the next header
3 #include "c4/yml/common.hpp"
4 #include "c4/yml/detail/parser_dbg.hpp"
5 #ifdef RYML_DBG
6 #include "c4/yml/detail/print.hpp"
7 #else
8 #define _c4dbg_tree(...)
9 #define _c4dbg_node(...)
10 #endif
11 
12 namespace c4 {
13 namespace yml {
14 
15 id_type ReferenceResolver::count_anchors_and_refs_(id_type n)
16 {
17  id_type c = 0;
18  c += m_tree->has_key_anchor(n);
19  c += m_tree->has_val_anchor(n);
20  c += m_tree->is_key_ref(n);
21  c += m_tree->is_val_ref(n);
22  c += m_tree->has_key(n) && m_tree->key(n) == "<<";
23  for(id_type ch = m_tree->first_child(n); ch != NONE; ch = m_tree->next_sibling(ch))
24  c += count_anchors_and_refs_(ch);
25  return c;
26 }
27 
28 void ReferenceResolver::gather_anchors_and_refs__(id_type n)
29 {
30  // insert key refs BEFORE inserting val refs
31  if(m_tree->has_key(n))
32  {
33  if(m_tree->key(n) == "<<")
34  {
35  _c4dbgpf("node[{}]: key is <<", n);
36  if(m_tree->has_val(n))
37  {
38  if(m_tree->is_val_ref(n))
39  {
40  _c4dbgpf("node[{}]: val ref, inheriting!", n);
41  m_refs.push({VALREF, n, NONE, NONE, NONE, NONE});
42  //m_refs.push({KEYREF, n, NONE, NONE, NONE, NONE});
43  }
44  else
45  {
46  _c4dbgpf("node[{}]: not ref!", n);
47  }
48  }
49  else if(m_tree->is_seq(n))
50  {
51  // for merging multiple inheritance targets
52  // <<: [ *CENTER, *BIG ]
53  _c4dbgpf("node[{}]: is seq!", n);
54  for(id_type ich = m_tree->first_child(n); ich != NONE; ich = m_tree->next_sibling(ich))
55  {
56  _c4dbgpf("node[{}]: val ref, inheriting multiple: {}", n, ich);
57  if(m_tree->is_container(ich))
58  {
59  detail::_report_err(m_tree->m_callbacks, "ERROR: node {} child {}: refs for << cannot be containers.'", n, ich);
60  C4_UNREACHABLE_AFTER_ERR();
61  }
62  m_refs.push({VALREF, ich, NONE, NONE, n, m_tree->next_sibling(n)});
63  }
64  return; // don't descend into the seq
65  }
66  else
67  {
68  detail::_report_err(m_tree->m_callbacks, "ERROR: node {}: refs for << must be either val or seq", n);
69  C4_UNREACHABLE_AFTER_ERR();
70  }
71  }
72  else if(m_tree->is_key_ref(n))
73  {
74  _c4dbgpf("node[{}]: key ref: '{}'", n, m_tree->key_ref(n));
75  _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->key(n) != "<<");
76  _RYML_CB_CHECK(m_tree->m_callbacks, (!m_tree->has_key(n)) || m_tree->key(n).ends_with(m_tree->key_ref(n)));
77  m_refs.push({KEYREF, n, NONE, NONE, NONE, NONE});
78  }
79  }
80  // val ref
81  if(m_tree->is_val_ref(n) && (!m_tree->has_key(n) || m_tree->key(n) != "<<"))
82  {
83  _c4dbgpf("node[{}]: val ref: '{}'", n, m_tree->val_ref(n));
84  RYML_CHECK((!m_tree->has_val(n)) || m_tree->val(n).ends_with(m_tree->val_ref(n)));
85  m_refs.push({VALREF, n, NONE, NONE, NONE, NONE});
86  }
87  // anchors
88  if(m_tree->has_key_anchor(n))
89  {
90  _c4dbgpf("node[{}]: key anchor: '{}'", n, m_tree->key_anchor(n));
91  RYML_CHECK(m_tree->has_key(n));
92  m_refs.push({KEYANCH, n, NONE, NONE, NONE, NONE});
93  }
94  if(m_tree->has_val_anchor(n))
95  {
96  _c4dbgpf("node[{}]: val anchor: '{}'", n, m_tree->val_anchor(n));
97  RYML_CHECK(m_tree->has_val(n) || m_tree->is_container(n));
98  m_refs.push({VALANCH, n, NONE, NONE, NONE, NONE});
99  }
100  // recurse
101  for(id_type ch = m_tree->first_child(n); ch != NONE; ch = m_tree->next_sibling(ch))
102  gather_anchors_and_refs__(ch);
103 }
104 
105 void ReferenceResolver::gather_anchors_and_refs_()
106 {
107  _c4dbgp("gathering anchors and refs...");
108 
109  // minimize (re-)allocations by counting first
110  id_type num_anchors_and_refs = count_anchors_and_refs_(m_tree->root_id());
111  if(!num_anchors_and_refs)
112  return;
113  m_refs.reserve(num_anchors_and_refs);
114  m_refs.clear();
115 
116  // now descend through the hierarchy
117  gather_anchors_and_refs__(m_tree->root_id());
118 
119  _c4dbgpf("found {} anchors/refs", m_refs.size());
120 
121  // finally connect the reference list
122  id_type prev_anchor = NONE;
123  id_type count = 0;
124  for(auto &rd : m_refs)
125  {
126  rd.prev_anchor = prev_anchor;
127  if(rd.type.has_anchor())
128  prev_anchor = count;
129  ++count;
130  }
131  _c4dbgp("gathering anchors and refs: finished");
132 }
133 
134 id_type ReferenceResolver::lookup_(RefData *C4_RESTRICT ra)
135 {
136  RYML_ASSERT(ra->type.is_key_ref() || ra->type.is_val_ref());
137  RYML_ASSERT(ra->type.is_key_ref() != ra->type.is_val_ref());
138  csubstr refname;
139  if(ra->type.is_val_ref())
140  {
141  refname = m_tree->val_ref(ra->node);
142  }
143  else
144  {
145  RYML_ASSERT(ra->type.is_key_ref());
146  refname = m_tree->key_ref(ra->node);
147  }
148  while(ra->prev_anchor != NONE)
149  {
150  ra = &m_refs[ra->prev_anchor];
151  if(m_tree->has_anchor(ra->node, refname))
152  return ra->node;
153  }
154  detail::_report_err(m_tree->m_callbacks, "ERROR: anchor not found: '{}'", refname);
155  C4_UNREACHABLE_AFTER_ERR();
156 }
157 
158 void ReferenceResolver::reset_(Tree *t_)
159 {
160  if(t_->callbacks() != m_refs.m_callbacks)
161  {
162  m_refs.m_callbacks = t_->callbacks();
163  }
164  m_refs.clear();
165  m_tree = t_;
166 }
167 
169 {
170  _c4dbgp("resolving references...");
171 
172  reset_(t_);
173 
174  _c4dbg_tree("unresolved tree", *m_tree);
175 
176  gather_anchors_and_refs_();
177  if(m_refs.empty())
178  return;
179 
180  /* from the specs: "an alias node refers to the most recent
181  * node in the serialization having the specified anchor". So
182  * we need to start looking upward from ref nodes.
183  *
184  * @see http://yaml.org/spec/1.2/spec.html#id2765878 */
185  _c4dbgp("matching anchors/refs...");
186  for(id_type i = 0, e = m_refs.size(); i < e; ++i)
187  {
188  RefData &C4_RESTRICT refdata = m_refs.top(i);
189  if( ! refdata.type.is_ref())
190  continue;
191  refdata.target = lookup_(&refdata);
192  }
193  _c4dbgp("matching anchors/refs: finished");
194 
195  // insert the resolved references
196  _c4dbgp("modifying tree...");
197  id_type prev_parent_ref = NONE;
198  id_type prev_parent_ref_after = NONE;
199  for(id_type i = 0, e = m_refs.size(); i < e; ++i)
200  {
201  RefData const& C4_RESTRICT refdata = m_refs[i];
202  _c4dbgpf("instance {}/{}...", i, e);
203  if( ! refdata.type.is_ref())
204  continue;
205  _c4dbgpf("instance {} is reference!", i);
206  if(refdata.parent_ref != NONE)
207  {
208  _c4dbgpf("ref {} has parent: {}", i, refdata.parent_ref);
209  _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->is_seq(refdata.parent_ref));
210  const id_type p = m_tree->parent(refdata.parent_ref);
211  const id_type after = (prev_parent_ref != refdata.parent_ref) ?
212  refdata.parent_ref//prev_sibling(rd.parent_ref_sibling)
213  :
214  prev_parent_ref_after;
215  prev_parent_ref = refdata.parent_ref;
216  prev_parent_ref_after = m_tree->duplicate_children_no_rep(refdata.target, p, after);
217  m_tree->remove(refdata.node);
218  }
219  else
220  {
221  _c4dbgpf("ref {} has no parent", i, refdata.parent_ref);
222  if(m_tree->has_key(refdata.node) && m_tree->key(refdata.node) == "<<")
223  {
224  _c4dbgpf("ref {} is inheriting", i);
225  _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->is_keyval(refdata.node));
226  const id_type p = m_tree->parent(refdata.node);
227  const id_type after = m_tree->prev_sibling(refdata.node);
228  m_tree->duplicate_children_no_rep(refdata.target, p, after);
229  m_tree->remove(refdata.node);
230  }
231  else if(refdata.type.is_key_ref())
232  {
233  _c4dbgpf("ref {} is key ref", i);
234  _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->is_key_ref(refdata.node));
235  _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->has_key_anchor(refdata.target) || m_tree->has_val_anchor(refdata.target));
236  if(m_tree->has_val_anchor(refdata.target) && m_tree->val_anchor(refdata.target) == m_tree->key_ref(refdata.node))
237  {
238  _RYML_CB_CHECK(m_tree->m_callbacks, !m_tree->is_container(refdata.target));
239  _RYML_CB_CHECK(m_tree->m_callbacks, m_tree->has_val(refdata.target));
240  const type_bits existing_style_flags = VAL_STYLE & m_tree->_p(refdata.target)->m_type.type;
241  static_assert((VAL_STYLE >> 1u) == (KEY_STYLE), "bad flags");
242  m_tree->_p(refdata.node)->m_key.scalar = m_tree->val(refdata.target);
243  m_tree->_add_flags(refdata.node, KEY | (existing_style_flags >> 1u));
244  }
245  else
246  {
247  _RYML_CB_CHECK(m_tree->m_callbacks, m_tree->key_anchor(refdata.target) == m_tree->key_ref(refdata.node));
248  m_tree->_p(refdata.node)->m_key.scalar = m_tree->key(refdata.target);
249  // keys cannot be containers, so don't inherit container flags
250  const type_bits existing_style_flags = KEY_STYLE & m_tree->_p(refdata.target)->m_type.type;
251  m_tree->_add_flags(refdata.node, KEY | existing_style_flags);
252  }
253  }
254  else // val ref
255  {
256  _c4dbgpf("ref {} is val ref", i);
257  _RYML_CB_ASSERT(m_tree->m_callbacks, refdata.type.is_val_ref());
258  if(m_tree->has_key_anchor(refdata.target) && m_tree->key_anchor(refdata.target) == m_tree->val_ref(refdata.node))
259  {
260  _RYML_CB_CHECK(m_tree->m_callbacks, !m_tree->is_container(refdata.target));
261  _RYML_CB_CHECK(m_tree->m_callbacks, m_tree->has_val(refdata.target));
262  // keys cannot be containers, so don't inherit container flags
263  const type_bits existing_style_flags = (KEY_STYLE) & m_tree->_p(refdata.target)->m_type.type;
264  static_assert((KEY_STYLE << 1u) == (VAL_STYLE), "bad flags");
265  m_tree->_p(refdata.node)->m_val.scalar = m_tree->key(refdata.target);
266  m_tree->_add_flags(refdata.node, VAL | (existing_style_flags << 1u));
267  }
268  else
269  {
270  m_tree->duplicate_contents(refdata.target, refdata.node);
271  }
272  }
273  }
274  }
275  _c4dbgp("modifying tree: finished");
276 
277  // clear anchors and refs
278  _c4dbgp("clearing anchors/refs");
279  for(auto const& C4_RESTRICT ar : m_refs)
280  {
281  m_tree->rem_anchor_ref(ar.node);
282  if(ar.parent_ref != NONE)
283  if(m_tree->type(ar.parent_ref) != NOTYPE)
284  m_tree->remove(ar.parent_ref);
285  }
286  _c4dbgp("clearing anchors/refs: finished");
287 
288  _c4dbg_tree("resolved tree", *m_tree);
289 
290  m_tree = nullptr;
291  _c4dbgp("resolving references: finished");
292 }
293 
294 
295 } // namespace ryml
296 } // namespace c4
Common utilities and infrastructure used by ryml.
uint32_t type_bits
the integral type necessary to cover all the bits for NodeType_e
Definition: node_type.hpp:26
@ VALANCH
the val has an &anchor
Definition: node_type.hpp:42
@ NOTYPE
no node type or style is set
Definition: node_type.hpp:32
@ VALREF
a *reference: the val references an &anchor
Definition: node_type.hpp:40
@ KEY
is member of a map, must have non-empty key
Definition: node_type.hpp:33
@ VAL_STYLE
mask of all the scalar styles for val (not container styles!)
Definition: node_type.hpp:87
@ VAL
a scalar: has a scalar (ie string) value, possibly empty. must be a leaf node, and cannot be MAP or S...
Definition: node_type.hpp:34
@ KEY_STYLE
mask of all the scalar styles for key (not container styles!)
Definition: node_type.hpp:86
@ KEYREF
a *reference: the key references an &anchor
Definition: node_type.hpp:39
@ KEYANCH
the key has an &anchor
Definition: node_type.hpp:41
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition: common.hpp:252
@ NONE
an index to none
Definition: common.hpp:259
Definition: common.cpp:12
#define _c4dbg_tree(...)
void resolve(Tree *t_)
Resolve references: for each reference, look for a matching anchor, and copy its contents to the ref ...