rapidyaml 0.14.0
parse and emit YAML, and do it fast
Loading...
Searching...
No Matches
tag.cpp
Go to the documentation of this file.
1#include "c4/yml/tag.hpp"
2#include "c4/yml/error.hpp"
3#include "c4/yml/detail/dbgprint.hpp"
4
5
6namespace c4 {
7namespace yml {
8
10{
11 if((tag.len > 2) && (tag.str[0] == '!'))
12 {
13 size_t pos = tag.find('!', 1);
14 return pos != npos && pos > 1 && tag.str[1] != '<';
15 }
16 return false;
17}
18
20{
21 YamlTag_e t = to_tag(tag);
22 if(t != TAG_NONE)
23 return from_tag(t);
24 if(tag.begins_with("!<"))
25 tag = tag.sub(1);
26 if(tag.begins_with("<!"))
27 return tag;
28 return tag;
29}
30
32{
33 YamlTag_e t = to_tag(tag);
34 if(t != TAG_NONE)
35 return from_tag_long(t);
36 if(tag.begins_with("!<"))
37 tag = tag.sub(1);
38 if(tag.begins_with("<!"))
39 return tag;
40 return tag;
41}
42
44{
45 csubstr result = normalize_tag_long(tag);
46 if(result.begins_with("!!"))
47 {
48 _RYML_CHECK_BASIC(!output.overlaps(tag));
49 tag = tag.sub(2);
50 const csubstr pfx = "<tag:yaml.org,2002:";
51 const size_t len = pfx.len + tag.len + 1;
52 if(len <= output.len)
53 {
54 memcpy(output.str , pfx.str, pfx.len);
55 memcpy(output.str + pfx.len, tag.str, tag.len);
56 output[pfx.len + tag.len] = '>';
57 result = output.first(len);
58 }
59 else
60 {
61 result.str = nullptr;
62 result.len = len;
63 }
64 }
65 return result;
66}
67
69{
70 if(tag.begins_with("!<"))
71 tag = tag.sub(1);
72 if(tag.begins_with("!!"))
73 {
74 tag = tag.sub(2);
75 }
76 else if(tag.begins_with('!'))
77 {
78 return TAG_NONE;
79 }
80 else
81 {
82 csubstr pfx = "<tag:yaml.org,2002:";
83 csubstr pfx2 = pfx.sub(1);
84 if(tag.begins_with(pfx2))
85 {
86 tag = tag.sub(pfx2.len);
87 }
88 else if(tag.begins_with(pfx))
89 {
90 tag = tag.sub(pfx.len);
91 if(!tag.len)
92 return TAG_NONE;
93 tag = tag.offs(0, 1);
94 }
95 }
96 if(tag == "map")
97 return TAG_MAP;
98 else if(tag == "omap")
99 return TAG_OMAP;
100 else if(tag == "pairs")
101 return TAG_PAIRS;
102 else if(tag == "set")
103 return TAG_SET;
104 else if(tag == "seq")
105 return TAG_SEQ;
106 else if(tag == "binary")
107 return TAG_BINARY;
108 else if(tag == "bool")
109 return TAG_BOOL;
110 else if(tag == "float")
111 return TAG_FLOAT;
112 else if(tag == "int")
113 return TAG_INT;
114 else if(tag == "merge")
115 return TAG_MERGE;
116 else if(tag == "null")
117 return TAG_NULL;
118 else if(tag == "str")
119 return TAG_STR;
120 else if(tag == "timestamp")
121 return TAG_TIMESTAMP;
122 else if(tag == "value")
123 return TAG_VALUE;
124 else if(tag == "yaml")
125 return TAG_YAML;
126
127 return TAG_NONE;
128}
129
131{
132 switch(tag)
133 {
134 case TAG_MAP:
135 return {"<tag:yaml.org,2002:map>"};
136 case TAG_OMAP:
137 return {"<tag:yaml.org,2002:omap>"};
138 case TAG_PAIRS:
139 return {"<tag:yaml.org,2002:pairs>"};
140 case TAG_SET:
141 return {"<tag:yaml.org,2002:set>"};
142 case TAG_SEQ:
143 return {"<tag:yaml.org,2002:seq>"};
144 case TAG_BINARY:
145 return {"<tag:yaml.org,2002:binary>"};
146 case TAG_BOOL:
147 return {"<tag:yaml.org,2002:bool>"};
148 case TAG_FLOAT:
149 return {"<tag:yaml.org,2002:float>"};
150 case TAG_INT:
151 return {"<tag:yaml.org,2002:int>"};
152 case TAG_MERGE:
153 return {"<tag:yaml.org,2002:merge>"};
154 case TAG_NULL:
155 return {"<tag:yaml.org,2002:null>"};
156 case TAG_STR:
157 return {"<tag:yaml.org,2002:str>"};
158 case TAG_TIMESTAMP:
159 return {"<tag:yaml.org,2002:timestamp>"};
160 case TAG_VALUE:
161 return {"<tag:yaml.org,2002:value>"};
162 case TAG_YAML:
163 return {"<tag:yaml.org,2002:yaml>"};
164 case TAG_NONE:
165 default:
166 return {""};
167 }
168}
169
171{
172 switch(tag)
173 {
174 case TAG_MAP:
175 return {"!!map"};
176 case TAG_OMAP:
177 return {"!!omap"};
178 case TAG_PAIRS:
179 return {"!!pairs"};
180 case TAG_SET:
181 return {"!!set"};
182 case TAG_SEQ:
183 return {"!!seq"};
184 case TAG_BINARY:
185 return {"!!binary"};
186 case TAG_BOOL:
187 return {"!!bool"};
188 case TAG_FLOAT:
189 return {"!!float"};
190 case TAG_INT:
191 return {"!!int"};
192 case TAG_MERGE:
193 return {"!!merge"};
194 case TAG_NULL:
195 return {"!!null"};
196 case TAG_STR:
197 return {"!!str"};
198 case TAG_TIMESTAMP:
199 return {"!!timestamp"};
200 case TAG_VALUE:
201 return {"!!value"};
202 case TAG_YAML:
203 return {"!!yaml"};
204 case TAG_NONE:
205 default:
206 return {""};
207 }
208}
209
211{
212 if(handle.begins_with('!') && handle.ends_with('!'))
213 {
214 _c4dbgpf("handle={}", _prs(handle, true));
215 csubstr trimmed = handle.sub(1);
216 if(trimmed.ends_with('!'))
217 trimmed = trimmed.offs(0, 1);
218 _c4dbgpf("handle_trimmed={}", _prs(trimmed, true));
219 // https://yaml.org/spec/1.2.2/#rule-ns-word-char
220 for(char c : trimmed)
221 {
222 bool ok = (c >= '0' && c <= '9')
223 || (c >= 'a' && c <= 'z')
224 || (c >= 'A' && c <= 'Z')
225 || c == '-';
226 if(!ok)
227 {
228 _c4dbgpf("invalid handle character: '{}'", _c4prc(c));
229 return false;
230 }
231 }
232 return true;
233 }
234 return false;
235}
236
237namespace {
238bool is_valid_tag_char(char c)
239{
240 // https://yaml.org/spec/1.2.2/#691-node-tags
241 bool ok = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
242 if(!ok)
243 {
244 switch(c)
245 {
246 case '-':
247 case '#':
248 case ';':
249 case '/':
250 case '?':
251 case ':':
252 case '@':
253 case '&':
254 case '=':
255 case '+':
256 case '$':
257 case '_':
258 case '.':
259 case '~':
260 case '*':
261 case '\'':
262 case '(':
263 case ')':
264 case '%':
265 break;
266 default:
267 return false;
268 }
269 }
270 return true;
271}
272bool read_hex_char(csubstr suffix, size_t pos, char *out)
273{
274 // must be succeeded by 2 hex digits
275 if(pos + 3 > suffix.len)
276 return false;
277 suffix = suffix.range(pos + 1, pos + 3);
278 uint8_t val = 0;
279 if(C4_UNLIKELY(!read_hex(suffix, &val) || val > 127))
280 return false;
281 *out = static_cast<char>(val);
282 return true;
283}
284} // namespace
285
286
287size_t transform_tag(substr output, csubstr handle, csubstr prefix, csubstr tag,
288 Callbacks const& callbacks, Location const& ymlloc,
289 bool with_brackets)
290{
291 _RYML_ASSERT_BASIC_(callbacks, tag.len >= handle.len);
292 _RYML_ASSERT_BASIC_(callbacks, !output.overlaps(tag));
293 _RYML_ASSERT_BASIC_(callbacks, prefix.len > 0);
294 csubstr rest = tag.sub(handle.len);
295 _c4dbgpf("%TAG: rest={}", _prs(rest));
296 size_t rpos = 0, wpos = 0;
297 auto appendstr = [&](csubstr s) {
298 if(s.len && wpos + s.len <= output.len)
299 memcpy(output.str + wpos, s.str, s.len);
300 wpos += s.len;
301 };
302 auto appendchar = [&](char c) {
303 if(wpos < output.len)
304 output.str[wpos] = c;
305 ++wpos;
306 };
307 if(with_brackets)
308 appendchar('<');
309 appendstr(prefix);
310 const char *errmsg = nullptr;
311 for(size_t pos = 0; pos < rest.len; ++pos)
312 {
313 char c = rest.str[pos];
314 if(C4_LIKELY(is_valid_tag_char(c)))
315 {
316 if(c != '%')
317 continue;
318 else if(read_hex_char(rest, pos, &c))
319 {
320 appendstr(rest.range(rpos, pos));
321 appendchar(c);
322 pos += 2;
323 rpos = pos + 1;
324 continue;
325 }
326 }
327 errmsg = "invalid tag";
328 goto err; // NOLINT
329 }
330 appendstr(rest.sub(rpos));
331 if(with_brackets)
332 appendchar('>');
333 return wpos;
334err:
335 if(ymlloc)
336 {
337 _RYML_ERR_PARSE_(callbacks, ymlloc, errmsg);
338 }
339 else
340 {
341 _RYML_ERR_BASIC_(callbacks, errmsg);
342 }
343}
344
345
346//-----------------------------------------------------------------------------
347
349{
350 // this assumes we have a very small number of tag directives
351 id_type i = 0;
352 for(; i < RYML_MAX_TAG_DIRECTIVES; ++i)
353 if(m_directives[i].handle.empty())
354 break;
355 return i;
356}
357
358TagDirective const* TagDirectives::add(csubstr handle, csubstr prefix, id_type doc_id) noexcept
359{
360 id_type pos = size();
361 TagDirective *C4_RESTRICT td = nullptr;
363 {
364 td = &m_directives[pos];
365 td->handle = handle;
366 td->prefix = prefix;
367 td->doc_id = doc_id;
368 _c4dbgpf("tagd[{}]: added! handle={} prefix={} doc={}", pos, td->handle, td->prefix, td->doc_id);
369 }
370 return td;
371}
372
373void TagDirectives::clear() noexcept
374{
375 for(TagDirective &td : m_directives)
376 {
377 td.handle = {};
378 td.prefix = {};
379 td.doc_id = NONE;
380 }
381}
382
384{
385 TagDirective const* first = nullptr;
386 TagDirective const* last = nullptr;
387 for(id_type i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i)
388 {
389 TagDirective const& C4_RESTRICT td = m_directives[i];
390 if(doc_id == td.doc_id)
391 {
392 first = m_directives + i;
393 break;
394 }
395 else if(td.handle.empty())
396 {
397 break;
398 }
399 }
400 if(first)
401 {
403 for(TagDirective const* C4_RESTRICT td = first; td < last; ++td)
404 {
405 if(doc_id != td->doc_id || td->handle.empty())
406 {
407 last = td;
408 break;
409 }
410 }
411 }
412 else
413 {
414 first = last = m_directives;
415 }
416 return TagDirectiveRange{first, last};
417}
418
419TagDirective const* TagDirectives::lookup(csubstr tag, id_type doc_id) const noexcept
420{
421 _c4dbgpf("tagd: searching for {}, doc_id={}", _prs(tag), doc_id);
422 for(id_type i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i)
423 {
424 TagDirective const& C4_RESTRICT td = m_directives[i];
425 if(td.handle.empty())
426 {
427 continue;
428 }
429 _c4dbgpf("tagd[{}]: handle={} prefix={} doc_id={}", i, td.handle, td.prefix, td.doc_id);
430 if(tag.begins_with(td.handle))
431 {
432 if(td.handle == '!' && (
433 tag.begins_with("!!")
434 || tag.begins_with('<')
435 || tag.begins_with("!<")
436 || is_custom_tag(tag)))
437 continue;
438 _c4dbgpf("tagd[{}]: matches handle!", i);
439 if(doc_id == td.doc_id)
440 {
441 _c4dbgpf("tagd[{}]: matches doc={}!", i, doc_id);
442 return &td;
443 }
444 }
445 }
446 return nullptr;
447}
448
449csubstr TagDirectives::resolve(substr buf, size_t *bufsz, csubstr tag, id_type id, Location const& ymlloc, Callbacks const& callbacks, bool with_brackets) const
450{
451 _RYML_ASSERT_BASIC_(callbacks, !buf.overlaps(tag));
452 TagDirective const* C4_RESTRICT td = lookup(tag, id);
453 *bufsz = 0;
454 csubstr handle, prefix, ret;
455 const char *errmsg = nullptr;
456 size_t len;
457 if(td)
458 {
459 handle = td->handle;
460 prefix = td->prefix;
461 }
462 else
463 {
464 _c4dbgp("tagd: no directive found");
465 if(tag.begins_with('<'))
466 {
467 _c4dbgp("tagd: already resolved");
468 if(C4_UNLIKELY(!tag.ends_with('>')))
469 {
470 errmsg = "malformed tag";
471 goto err; // NOLINT
472 }
473 return tag;
474 }
475 else if(tag.begins_with("!<"))
476 {
477 _c4dbgp("tagd: already resolved");
478 if(C4_UNLIKELY(!tag.ends_with('>')))
479 {
480 errmsg = "malformed tag";
481 goto err; // NOLINT
482 }
483 return tag.sub(1);
484 }
485 else if(tag.begins_with("!!"))
486 {
487 _c4dbgp("tagd: !!");
488 YamlTag_e tagenum = to_tag(tag);
489 if(tagenum != TAG_NONE)
490 {
491 _c4dbgpf("tagd: standard tag: {} -> {}", tag, from_tag_long(tagenum));
492 tag = from_tag_long(tagenum);
493 return with_brackets ? tag : tag.offs(1, 1);
494 }
495 handle = "!!";
496 prefix = "tag:yaml.org,2002:";
497 }
498 else if(C4_UNLIKELY(is_custom_tag(tag)))
499 {
500 _c4dbgp("tagd: custom_tag");
501 _c4dbgpf("tag '{}' at id={}: no matching directive was found", tag, id);
502 errmsg = "tag without matching directive";
503 goto err; // NOLINT
504 }
505 else
506 {
507 _c4dbgp("tagd: !");
508 handle = prefix = "!";
509 }
510 }
511 len = transform_tag(buf, handle, prefix, tag, callbacks, ymlloc, with_brackets);
512 *bufsz = len;
513 if(len <= buf.len)
514 {
515 ret = buf.first(len);
516 }
517 else
518 {
519 _c4dbgp("tagd: not enough room");
520 ret.str = nullptr;
521 ret.len = len;
522 }
523 return ret;
524err:
525 if(ymlloc)
526 {
527 _RYML_ERR_PARSE_(callbacks, ymlloc, errmsg);
528 }
529 else
530 {
531 _RYML_ERR_BASIC_(callbacks, errmsg);
532 }
533}
534
535
536//-----------------------------------------------------------------------------
537TagCache::LookupResult TagCache::find(csubstr tag, id_type doc_id, id_type linear_threshold) const noexcept
538{
539 LookupResult ret = {};
540 id_type sz = m_entries.size();
541 if(sz < linear_threshold) // do a linear search on small size
542 {
543 for(size_t i = 0; i < sz; ++i)
544 {
545 Entry const& C4_RESTRICT e = m_entries[i];
546 if(e.tag == tag && e.doc_id == doc_id)
547 {
548 ret.resolved = e.resolved;
549 ret.pos = i;
550 return ret;
551 }
552 else if(e.tag > tag || ((e.tag == tag) && e.doc_id > doc_id))
553 {
554 ret.pos = i;
555 return ret;
556 }
557 }
558 ret.pos = sz;
559 }
560 else // do a binary search on larger size
561 {
562 id_type first = 0;
563 id_type count = sz;
564 while(count)
565 {
566 id_type halfsz = count / id_type(2);
567 id_type mid = first + halfsz;
568 _RYML_ASSERT_BASIC_(m_entries.m_callbacks, mid < sz);
569 Entry const& C4_RESTRICT e = m_entries[mid];
570 if(e.tag < tag || (e.tag == tag && e.doc_id < doc_id))
571 {
572 first = mid + 1;
573 _RYML_ASSERT_BASIC_(m_entries.m_callbacks, count >= halfsz + 1);
574 count -= halfsz + 1;
575 }
576 else
577 {
578 count = halfsz;
579 }
580 }
581 ret.pos = first;
582 if(first < sz)
583 {
584 Entry const& C4_RESTRICT e = m_entries[first];
585 if(e.tag == tag && e.doc_id == doc_id)
586 {
587 ret.resolved = m_entries[first].resolved;
588 }
589 }
590 }
591 return ret;
592}
593
595{
596 const id_type sz = m_entries.size();
597 _RYML_ASSERT_BASIC_(m_entries.m_callbacks, pos <= sz);
598 _RYML_ASSERT_BASIC_(m_entries.m_callbacks, pos == sz || tag < m_entries[pos].tag || (tag == m_entries[pos].tag && doc_id < m_entries[pos].doc_id));
599 m_entries.resize(sz + 1);
600 if(pos < sz)
601 memmove(m_entries.m_stack + pos + 1, m_entries.m_stack + pos, (sz - pos) * sizeof(Entry));
602 m_entries.m_stack[pos].tag = tag;
603 m_entries.m_stack[pos].resolved = resolved;
604 m_entries.m_stack[pos].doc_id = doc_id;
605 _c4dbgpf("tagcache: add entry @pos={}: docid={} {} -> {}", pos, doc_id, tag, _maybe_null_str(resolved));
606}
607
608} // namespace yml
609} // namespace c4
#define RYML_NOEXCEPT
Conditionally expands to noexcept when RYML_USE_ASSERT is 0 and is empty otherwise.
Definition common.hpp:197
Error utilities used by ryml.
bool read_hex(csubstr s, I *v) noexcept
read an hexadecimal integer from a string.
Definition charconv.hpp:903
basic_substring< char > substr
a mutable string view
Definition substr.hpp:2356
basic_substring< const char > csubstr
an immutable string view
Definition substr.hpp:2357
csubstr from_tag_long(YamlTag_e tag)
Definition tag.cpp:130
bool is_valid_tag_handle(csubstr handle)
Definition tag.cpp:210
bool is_custom_tag(csubstr tag)
is a tag of the form !handle!tag?
Definition tag.cpp:9
csubstr normalize_tag_long(csubstr tag)
Definition tag.cpp:31
YamlTag_e
a bit mask for marking tags for types
Definition tag.hpp:33
size_t transform_tag(substr output, csubstr handle, csubstr prefix, csubstr tag, Callbacks const &callbacks, Location const &ymlloc, bool with_brackets)
returns the length of the transformed tag, or 0 to signal that the tag is local and cannot be resolve...
Definition tag.cpp:287
csubstr normalize_tag(csubstr tag)
Definition tag.cpp:19
csubstr from_tag(YamlTag_e tag)
Definition tag.cpp:170
YamlTag_e to_tag(csubstr tag)
Definition tag.cpp:68
#define RYML_MAX_TAG_DIRECTIVES
the maximum number of tag directives in a Tree
Definition tag.hpp:26
@ TAG_SET
!
Definition tag.hpp:39
@ TAG_MERGE
!
Definition tag.hpp:46
@ TAG_INT
!
Definition tag.hpp:45
@ TAG_SEQ
!
Definition tag.hpp:40
@ TAG_NULL
!
Definition tag.hpp:47
@ TAG_YAML
!
Definition tag.hpp:51
@ TAG_TIMESTAMP
!
Definition tag.hpp:49
@ TAG_NONE
Definition tag.hpp:34
@ TAG_STR
!
Definition tag.hpp:48
@ TAG_BOOL
!
Definition tag.hpp:43
@ TAG_MAP
!
Definition tag.hpp:36
@ TAG_BINARY
!
Definition tag.hpp:42
@ TAG_PAIRS
!
Definition tag.hpp:38
@ TAG_VALUE
!
Definition tag.hpp:50
@ TAG_OMAP
!
Definition tag.hpp:37
@ TAG_FLOAT
!
Definition tag.hpp:44
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition common.hpp:249
@ npos
a null string position
Definition common.hpp:263
@ NONE
an index to none
Definition common.hpp:256
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition common.cpp:14
basic_substring range(size_t first, size_t last=npos) const noexcept
return [first,last[.
Definition substr.hpp:520
bool begins_with(const C c) const noexcept
true if the first character of the string is c
Definition substr.hpp:851
basic_substring offs(size_t left, size_t right) const noexcept
offset from the ends: return [left,len-right[ ; ie, trim a number of characters from the left and rig...
Definition substr.hpp:548
size_t len
the length of the substring
Definition substr.hpp:218
bool ends_with(const C c) const noexcept
true if the last character of the string is c
Definition substr.hpp:895
size_t find(const C c, size_t start_pos=0) const
Definition substr.hpp:714
bool overlaps(ro_substr const that) const noexcept
true if there is overlap of at least one element between that and *this
Definition substr.hpp:494
basic_substring first(size_t num) const noexcept
return the first num elements: [0,num[
Definition substr.hpp:530
basic_substring sub(size_t first) const noexcept
return [first,len[
Definition substr.hpp:503
C * str
a restricted pointer to the first character of the substring
Definition substr.hpp:216
A c-style callbacks class to customize behavior on errors or allocation.
Definition common.hpp:546
holds a source or yaml file position, for example when an error is detected; See also location_format...
Definition common.hpp:289
LookupResult find(csubstr tag, id_type doc_id, id_type linear_threshold=Entries::sso_size) const noexcept
Definition tag.cpp:537
id_type const_iterator
Definition tag.hpp:79
void add(csubstr tag, csubstr resolved, id_type doc_id, const_iterator pos) RYML_NOEXCEPT
Definition tag.cpp:594
void clear() noexcept
Definition tag.cpp:373
TagDirective m_directives[RYML_MAX_TAG_DIRECTIVES]
Definition tag.hpp:127
id_type size() const noexcept
Definition tag.cpp:348
TagDirectiveRange lookup_range(id_type doc_id) const noexcept
Definition tag.cpp:383
csubstr resolve(substr buf, size_t *bufsz, csubstr tag, id_type doc_id, Location const &ymlloc, Callbacks const &callbacks, bool with_brackets=true) const
Definition tag.cpp:449
TagDirective const * add(csubstr handle, csubstr prefix, id_type doc_id) noexcept
Definition tag.cpp:358
TagDirective const * lookup(csubstr tag, id_type id) const noexcept
Definition tag.cpp:419