rapidyaml  0.10.0
parse and emit YAML, and do it fast
string.hpp
Go to the documentation of this file.
1 #ifndef _C4_YML_EXTRA_STRING_HPP_
2 #define _C4_YML_EXTRA_STRING_HPP_
3 
4 #ifndef RYML_SINGLE_HEADER
5 #ifndef _C4_YML_COMMON_HPP_
6 #include "c4/yml/common.hpp"
7 #endif
8 #endif
9 
10 #include <new>
11 
12 C4_SUPPRESS_WARNING_GCC_CLANG_PUSH
13 C4_SUPPRESS_WARNING_GCC_CLANG("-Wold-style-cast")
14 C4_SUPPRESS_WARNING_GCC("-Wuseless-cast")
15 
16 #ifndef RYML_STRING_SSO_SIZE
17 #define RYML_STRING_SSO_SIZE 128
18 #endif
19 
20 #ifndef RYML_STRING_LIST_SSO_SIZE
21 #define RYML_STRING_LIST_SSO_SIZE 66
22 #endif
23 
24 namespace c4 {
25 namespace yml {
26 namespace extra {
27 
28 /** an owning string class used by the yaml std event handler (and the
29  * YamlScript handler). we use this instead of std::string because:
30  * 1) this spares the dependency on the standard library
31  * 2) enables possibility of adding borrowing semantics (in the future) */
32 struct string
33 {
35  char m_buf[sso_size];
36  char *C4_RESTRICT m_str;
39 
40 public:
41 
43  : m_buf()
44  , m_str(m_buf)
45  , m_size(0)
47  {}
48  ~string() noexcept
49  {
50  _free();
51  }
52 
53  string(string const& that) RYML_NOEXCEPT : string()
54  {
55  resize(that.m_size);
56  _cp(&that);
57  }
58 
59  string(string &&that) noexcept : string()
60  {
61  _mv(&that);
62  }
63 
64  string& operator= (string const& that) RYML_NOEXCEPT
65  {
66  resize(that.m_size);
67  _cp(&that);
68  return *this;
69  }
70 
71  string& operator= (string &&that) noexcept
72  {
73  _mv(&that);
74  return *this;
75  }
76 
77 public:
78 
79  C4_ALWAYS_INLINE C4_HOT operator csubstr() const noexcept { return {m_str, m_size}; }
80  C4_ALWAYS_INLINE C4_HOT operator substr() noexcept { return {m_str, m_size}; }
81 
82 public:
83 
84  const char* data() const noexcept { return m_str; }
85  id_type size() const noexcept { return m_size; }
86  id_type capacity() const noexcept { return m_capacity; }
87 
88  void clear()
89  {
90  m_size = 0;
91  }
92 
93  void resize(id_type sz)
94  {
95  reserve(sz);
96  m_size = sz;
97  }
98 
99  void reserve(id_type sz)
100  {
101  if(sz <= m_capacity)
102  return;
104  cap = cap > sz ? cap : sz;
105  if(cap <= sso_size)
106  return;
107  Callbacks cb = get_callbacks();
108  char *buf = (char*) _RYML_CB_ALLOC(cb, char, cap);
109  if(m_size)
110  memcpy(buf, m_str, (size_t)m_size);
111  if(m_str != m_buf)
112  {
113  _RYML_CB_FREE(cb, m_str, char, m_size);
114  }
115  m_str = buf;
116  m_capacity = cap;
117  }
118 
119 public:
120 
121  C4_ALWAYS_INLINE C4_HOT void append(char c)
122  {
123  if(C4_UNLIKELY(m_size == m_capacity))
124  reserve(m_size + 1);
125  m_str[m_size++] = c;
126  }
127  C4_ALWAYS_INLINE C4_HOT void append(csubstr cs)
128  {
129  if(cs.len)
130  {
131  const id_type ilen = (id_type)cs.len;
132  if(C4_UNLIKELY(m_size + ilen > m_capacity))
133  reserve(m_size + ilen);
134  memcpy(m_str + m_size, cs.str, cs.len);
135  m_size += ilen;
136  }
137  }
138  C4_ALWAYS_INLINE void insert(char c, id_type pos)
139  {
140  RYML_ASSERT(pos <= m_size);
141  if(pos < m_size)
142  {
143  if(C4_UNLIKELY(m_size == m_capacity))
144  reserve(m_size + 1);
145  char *C4_RESTRICT src = m_str + pos;
146  memmove(src + 1, src, m_size - pos);
147  *src = c;
148  ++m_size;
149  }
150  else
151  {
152  append(c);
153  }
154  }
155  C4_NO_INLINE void insert(csubstr cs, id_type pos)
156  {
157  RYML_ASSERT(pos <= m_size);
158  if(cs.len)
159  {
160  if(pos < m_size)
161  {
162  const id_type ilen = (id_type)cs.len;
163  if(C4_UNLIKELY(m_size + ilen > m_capacity))
164  reserve(m_size + ilen);
165  char *C4_RESTRICT src = m_str + pos;
166  memmove(src + cs.len, src, m_size - pos);
167  memcpy(src, cs.str, cs.len);
168  m_size += ilen;
169  }
170  else
171  {
172  append(cs);
173  }
174  }
175  }
176  C4_NO_INLINE size_t find_last(csubstr pattern) RYML_NOEXCEPT
177  {
178  RYML_ASSERT(pattern.len);
179  if(m_size >= pattern.len)
180  {
181  for(size_t i = m_size - pattern.len; i != (size_t)-1; --i)
182  {
183  if(m_str[i] == pattern[0])
184  {
185  bool gotit = true;
186  for(size_t j = 1; j < pattern.len; ++j)
187  {
188  if(m_str[i + j] != pattern[j])
189  {
190  gotit = false;
191  break;
192  }
193  }
194  if(gotit)
195  return i;
196  }
197  }
198  }
199  return npos;
200  }
201 
202 public:
203 
204  void _free()
205  {
206  RYML_ASSERT(m_str != nullptr); // this structure cannot be memset() to zero
207  if(m_str != m_buf)
208  {
209  _RYML_CB_FREE(get_callbacks(), m_str, char, (size_t)m_capacity);
210  m_str = m_buf;
212  }
213  RYML_ASSERT(m_capacity == sso_size);
214  m_size = 0;
215  }
216 
217  void _cp(string const* C4_RESTRICT that)
218  {
219  #if RYML_USE_ASSERT
220  if(that->m_str != that->m_buf)
221  {
222  RYML_ASSERT(that->m_capacity > sso_size);
223  RYML_ASSERT(that->m_size <= that->m_capacity);
224  }
225  else
226  {
227  RYML_ASSERT(that->m_capacity <= sso_size);
228  RYML_ASSERT(that->m_size <= that->m_capacity);
229  }
230  #endif
231  memcpy(m_str, that->m_str, that->m_size);
232  m_size = that->m_size;
233  m_capacity = that->m_size < sso_size ? sso_size : that->m_size;
234  }
235 
236  void _mv(string *C4_RESTRICT that)
237  {
238  if(that->m_str != that->m_buf)
239  {
240  RYML_ASSERT(that->m_capacity > sso_size);
241  RYML_ASSERT(that->m_size <= that->m_capacity);
242  m_str = that->m_str;
243  }
244  else
245  {
246  RYML_ASSERT(that->m_capacity <= sso_size);
247  RYML_ASSERT(that->m_size <= that->m_capacity);
248  memcpy(m_buf, that->m_buf, that->m_size);
249  m_str = m_buf;
250  }
251  m_size = that->m_size;
252  m_capacity = that->m_capacity;
253  // make sure no deallocation happens on destruction
254  RYML_ASSERT(that->m_str != this->m_buf);
255  that->m_str = that->m_buf;
256  that->m_capacity = sso_size;
257  that->m_size = 0;
258  }
259 };
260 
261 
262 //-----------------------------------------------------------------------------
263 //-----------------------------------------------------------------------------
264 //-----------------------------------------------------------------------------
265 
266 /** a string collection used by the event handler. using this instead of
267  * std::vector spares the dependency on the standard library. */
269 {
271  union {
272  alignas(string) string m_buf[sso_size];
273  alignas(string) char m_buf_bytes[sso_size * sizeof(string)];
274  };
275  string *C4_RESTRICT m_arr;
278 
279 public:
280 
282  : m_arr(m_buf)
283  , m_size(0)
285  {}
286  ~string_vector() noexcept
287  {
288  _free();
289  }
290 
292  {
293  reserve(that.m_size);
294  m_size = that.m_size;
295  for(id_type i = 0; i < that.m_size; ++i)
296  new ((void*)(m_arr+i)) string(that.m_arr[i]);
297  }
298 
300  {
301  reserve(that.m_size);
302  m_size = that.m_size;
303  for(id_type i = 0; i < that.m_size; ++i)
304  new ((void*)(m_arr+i)) string(std::move(that.m_arr[i]));
305  that.~string_vector();
306  }
307 
309  {
310  _free();
311  reserve(that.m_size);
312  for(id_type i = 0; i < that.m_size; ++i)
313  m_arr[i].operator=(that.m_arr[i]);
314  m_size = that.m_size;
315  return *this;
316  }
317 
319  {
320  _free();
321  reserve(that.m_size);
322  for(id_type i = 0; i < that.m_size; ++i)
323  m_arr[i].operator=(std::move(that.m_arr[i]));
324  m_size = that.m_size;
325  that.~string_vector();
326  return *this;
327  }
328 
329  void _free()
330  {
331  RYML_ASSERT(m_arr != nullptr); // this structure cannot be memset() to zero
332  for(id_type i = 0; i < m_size; ++i)
333  m_arr[i].~string();
334  if(m_arr != m_buf)
335  {
336  _RYML_CB_FREE(get_callbacks(), m_arr, string, (size_t)m_capacity);
337  m_arr = m_buf;
339  }
340  RYML_ASSERT(m_capacity == sso_size);
341  m_size = 0;
342  }
343 
344 public:
345 
346  id_type size() const noexcept { return m_size; }
347  id_type capacity() const noexcept { return m_capacity; }
348 
349  void clear()
350  {
351  resize(0);
352  }
353 
354  void resize(id_type sz)
355  {
356  reserve(sz);
357  if(sz >= m_size)
358  {
359  for(id_type i = m_size; i < sz; ++i)
360  new ((void*)(m_arr + i)) string();
361  }
362  else
363  {
364  for(id_type i = sz; i < m_size; ++i)
365  m_arr[i].~string();
366  }
367  m_size = sz;
368  }
369 
370  void reserve(id_type sz)
371  {
372  if(sz <= m_capacity)
373  return;
375  cap = cap > sz ? cap : sz;
376  if(cap <= sso_size)
377  return;
378  Callbacks cb = get_callbacks();
379  string *buf = (string*) _RYML_CB_ALLOC(cb, string, cap);
380  for(id_type i = 0; i < m_size; ++i)
381  new ((void*)(buf + i)) string(std::move(m_arr[i]));
382  if(m_arr != m_buf)
383  {
384  _RYML_CB_FREE(cb, m_arr, string, m_size);
385  }
386  m_arr = buf;
387  m_capacity = cap;
388  }
389 
390 public:
391 
392  string& emplace_back()
393  {
394  RYML_ASSERT(m_size < m_capacity);
395  if(m_size == m_capacity)
396  reserve(m_size + 1);
397  string& ret = m_arr[m_size++];
398  new ((void*)&ret) string();
399  return ret;
400  }
401  string& operator[] (id_type i)
402  {
403  RYML_ASSERT(m_size <= m_capacity);
404  RYML_ASSERT(i < m_size);
405  return m_arr[i];
406  }
407  string const& operator[] (id_type i) const
408  {
409  RYML_ASSERT(m_size <= m_capacity);
410  RYML_ASSERT(i < m_size);
411  return m_arr[i];
412  }
413 };
414 
415 } // namespace extra
416 } // namespace yml
417 } // namespace c4
418 
419 C4_SUPPRESS_WARNING_GCC_POP
420 
421 #endif /* _C4_YML_EXTRA_STRING_HPP_ */
Common utilities and infrastructure used by ryml.
#define RYML_NOEXCEPT
Conditionally expands to noexcept when RYML_USE_ASSERT is 0 and is empty otherwise.
Definition: common.hpp:167
Callbacks const & get_callbacks()
get the global callbacks
Definition: common.cpp:118
RYML_ID_TYPE id_type
The type of a node id in the YAML tree; to override the default type, define the macro RYML_ID_TYPE t...
Definition: common.hpp:253
@ npos
a null string position
Definition: common.hpp:267
Definition: common.cpp:12
#define RYML_STRING_SSO_SIZE
Definition: string.hpp:17
#define RYML_STRING_LIST_SSO_SIZE
Definition: string.hpp:21
a c-style callbacks class.
Definition: common.hpp:377
a string collection used by the event handler.
Definition: string.hpp:269
string_vector & operator=(string_vector const &that) RYML_NOEXCEPT
Definition: string.hpp:308
string & operator[](id_type i)
Definition: string.hpp:401
void reserve(id_type sz)
Definition: string.hpp:370
string_vector(string_vector &&that) noexcept
Definition: string.hpp:299
string m_buf[sso_size]
Definition: string.hpp:272
string_vector(string_vector const &that) RYML_NOEXCEPT
Definition: string.hpp:291
id_type capacity() const noexcept
Definition: string.hpp:347
id_type size() const noexcept
Definition: string.hpp:346
char m_buf_bytes[sso_size *sizeof(string)]
Definition: string.hpp:273
void resize(id_type sz)
Definition: string.hpp:354
an owning string class used by the yaml std event handler (and the YamlScript handler).
Definition: string.hpp:33
void resize(id_type sz)
Definition: string.hpp:93
string(string &&that) noexcept
Definition: string.hpp:59
void reserve(id_type sz)
Definition: string.hpp:99
~string() noexcept
Definition: string.hpp:48
void insert(char c, id_type pos)
Definition: string.hpp:138
string(string const &that) RYML_NOEXCEPT
Definition: string.hpp:53
id_type size() const noexcept
Definition: string.hpp:85
char m_buf[sso_size]
Definition: string.hpp:35
void _mv(string *that)
Definition: string.hpp:236
id_type capacity() const noexcept
Definition: string.hpp:86
size_t find_last(csubstr pattern) RYML_NOEXCEPT
Definition: string.hpp:176
void insert(csubstr cs, id_type pos)
Definition: string.hpp:155
const char * data() const noexcept
Definition: string.hpp:84
void _cp(string const *that)
Definition: string.hpp:217
void append(csubstr cs)
Definition: string.hpp:127
string & operator=(string const &that) RYML_NOEXCEPT
Definition: string.hpp:64
void append(char c)
Definition: string.hpp:121