rapidyaml  0.13.0
parse and emit YAML, and do it fast
substr.hpp
Go to the documentation of this file.
1 #ifndef _C4_SUBSTR_HPP_
2 #define _C4_SUBSTR_HPP_
3 
4 /** @file substr.hpp read+write string views */
5 
6 #include <string.h>
7 #include <ctype.h>
8 #include <type_traits>
9 
10 #include "c4/export.hpp"
11 #include "c4/language.hpp"
12 #include "c4/error.hpp"
13 #include "c4/substr_fwd.hpp"
14 
15 C4_SUPPRESS_WARNING_GCC_CLANG_PUSH
16 C4_SUPPRESS_WARNING_GCC_CLANG("-Wold-style-cast")
17 C4_SUPPRESS_WARNING_GCC("-Wuseless-cast")
18 C4_SUPPRESS_WARNING_GCC("-Wtype-limits") // disable warnings on size_t>=0, used heavily in assertions below. These assertions are a preparation step for providing the index type as a template parameter.
19 
20 namespace c4 {
21 
22 /** @defgroup doc_substr Substring: read/write string views
23  * @{ */
24 
25 //-----------------------------------------------------------------------------
26 //-----------------------------------------------------------------------------
27 //-----------------------------------------------------------------------------
28 
29 /** @cond dev */
30 namespace detail {
31 template<typename C>
32 static inline void _do_reverse(C *C4_RESTRICT first, C *C4_RESTRICT last)
33 {
34  while(last > first)
35  {
36  C tmp = *last;
37  *last-- = *first;
38  *first++ = tmp;
39  }
40 }
41 } // namespace detail
42 /** @endcond */
43 
44 
45 //-----------------------------------------------------------------------------
46 //-----------------------------------------------------------------------------
47 //-----------------------------------------------------------------------------
48 
49 /** @cond dev */
50 // utility macros to deuglify SFINAE code; undefined after the class.
51 // https://stackoverflow.com/questions/43051882/how-to-disable-a-class-member-funrtion-for-certain-template-types
52 #define C4_REQUIRE_RW(ret_type) \
53  template <typename U=C> \
54  typename std::enable_if< ! std::is_const<U>::value, ret_type>::type
55 /** @endcond */
56 
57 
58 /** a non-owning string-view, consisting of a character pointer
59  * and a length.
60  *
61  * @note The pointer is explicitly restricted.
62  *
63  * @see a [quickstart
64  * sample](https://rapidyaml.readthedocs.io/latest/doxygen/group__doc__quickstart.html#ga43e253da0692c13967019446809c1113)
65  * in rapidyaml's documentation.
66  */
67 template<class C>
68 struct basic_substring // NOLINT(cppcoreguidelines-special-member-functions,hicpp-special-member-functions)
69 {
70 public:
71 
72  /** a restricted pointer to the first character of the substring */
73  C * C4_RESTRICT str;
74  /** the length of the substring */
75  size_t len;
76 
77 public:
78 
79  /** @name Types */
80  /** @{ */
81 
82  using CC = typename std::add_const<C>::type; //!< CC=const char
83  using NCC_ = typename std::remove_const<C>::type; //!< NCC_=non const char
84 
87 
88  using char_type = C;
89  using size_type = size_t;
90 
91  using iterator = C*;
92  using const_iterator = CC*;
93 
94  enum : size_t { npos = (size_t)-1, NONE = (size_t)-1 };
95 
96  /// convert automatically to substring of const C
97  template<class U=C>
98  C4_ALWAYS_INLINE operator typename std::enable_if<!std::is_const<U>::value, ro_substr const&>::type () const noexcept
99  {
100  return *(ro_substr const*)this; // don't call the str+len ctor because it does a check
101  }
102 
103  /** @} */
104 
105 public:
106 
107  /** @name Default construction and assignment */
108  /** @{ */
109 
110  C4_ALWAYS_INLINE constexpr basic_substring() noexcept : str(), len() {}
111 
112  C4_ALWAYS_INLINE basic_substring(basic_substring const&) noexcept = default;
113  C4_ALWAYS_INLINE basic_substring(basic_substring &&) noexcept = default;
114  C4_ALWAYS_INLINE basic_substring(std::nullptr_t) noexcept : str(nullptr), len(0) {}
115 
116  C4_ALWAYS_INLINE basic_substring& operator= (basic_substring const&) noexcept = default;
117  C4_ALWAYS_INLINE basic_substring& operator= (basic_substring &&) noexcept = default;
118  C4_ALWAYS_INLINE basic_substring& operator= (std::nullptr_t) noexcept { str = nullptr; len = 0; return *this; }
119 
120  C4_ALWAYS_INLINE void clear() noexcept { str = nullptr; len = 0; }
121 
122  /** @} */
123 
124 public:
125 
126  /** @name Construction and assignment from characters with the same type */
127  /** @{ */
128 
129  /** Construct from an array.
130  * @warning the input string need not be zero terminated, but the
131  * length is taken as if the string was zero terminated */
132  template<size_t N>
133  C4_ALWAYS_INLINE constexpr basic_substring(C (&s_)[N]) noexcept : str(s_), len(N-1) {}
134  /** Construct from a pointer and length.
135  * @warning the input string need not be zero terminated. */
136  C4_ALWAYS_INLINE basic_substring(C *s_, size_t len_) noexcept : str(s_), len(len_) { C4_ASSERT(str || !len_); }
137  /** Construct from two pointers.
138  * @warning the end pointer MUST BE larger than or equal to the begin pointer
139  * @warning the input string need not be zero terminated */
140  C4_ALWAYS_INLINE basic_substring(C *beg_, C *end_) noexcept : str(beg_), len(static_cast<size_t>(end_ - beg_)) { C4_ASSERT(end_ >= beg_); }
141  /** Construct from a C-string (zero-terminated string)
142  * @warning the input string MUST BE zero terminated.
143  * @warning will call strlen()
144  * @note this overload uses SFINAE to prevent it from overriding the array ctor
145  * @see For a more detailed explanation on why the plain overloads cannot
146  * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
147  template<class U, typename std::enable_if<std::is_same<U, C*>::value || std::is_same<U, NCC_*>::value, int>::type=0>
148  C4_ALWAYS_INLINE basic_substring(U s_) noexcept : str(s_), len(s_ ? strlen(s_) : 0) {}
149 
150  /** Assign from an array.
151  * @warning the input string need not be zero terminated, but the
152  * length is taken as if the string was zero terminated */
153  template<size_t N>
154  C4_ALWAYS_INLINE void assign(C (&s_)[N]) noexcept { str = (s_); len = (N-1); }
155  /** Assign from a pointer and length.
156  * @warning the input string need not be zero terminated. */
157  C4_ALWAYS_INLINE void assign(C *s_, size_t len_) noexcept { str = s_; len = len_; C4_ASSERT(str || !len_); }
158  /** Assign from two pointers.
159  * @warning the end pointer MUST BE larger than or equal to the begin pointer
160  * @warning the input string need not be zero terminated. */
161  C4_ALWAYS_INLINE void assign(C *beg_, C *end_) noexcept { C4_ASSERT(end_ >= beg_); str = (beg_); len = static_cast<size_t>(end_ - beg_); }
162  /** Assign from a C-string (zero-terminated string of type const C* or C*)
163  * @warning the input string must be zero terminated.
164  * @warning will call strlen()
165  * @note this overload uses SFINAE to prevent it from overriding the array ctor
166  * @see For a more detailed explanation on why the plain pointer overloads cannot
167  * coexist with the array overloads, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
168  template<class U, typename std::enable_if<std::is_same<U, C*>::value || std::is_same<U, NCC_*>::value, int>::type=0>
169  C4_ALWAYS_INLINE void assign(U s_) noexcept { str = (s_); len = (s_ ? strlen(s_) : 0); }
170 
171  /** Assign from an array.
172  * @warning the input string need not be zero terminated. */
173  template<size_t N>
174  C4_ALWAYS_INLINE basic_substring& operator= (C (&s_)[N]) noexcept { str = (s_); len = (N-1); return *this; }
175  /** Assign from a C-string (zero-terminated string of type const C* or C*)
176  * @warning the input string MUST BE zero terminated.
177  * @warning will call strlen()
178  * @note this overload uses SFINAE to prevent it from overriding the array ctor
179  * @see For a more detailed explanation on why the plain pointer overloads cannot
180  * coexist with the array overloads, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
181  template<class U, typename std::enable_if<std::is_same<U, C*>::value || std::is_same<U, NCC_*>::value, int>::type=0>
182  C4_ALWAYS_INLINE basic_substring& operator= (U s_) noexcept { str = s_; len = s_ ? strlen(s_) : 0; return *this; }
183 
184  /** @} */
185 
186 public:
187 
188  /** @name Standard accessor methods */
189  /** @{ */
190 
191  C4_ALWAYS_INLINE C4_PURE bool has_str() const noexcept { return ! empty() && str[0] != C(0); }
192  C4_ALWAYS_INLINE C4_PURE bool empty() const noexcept { return (len == 0 || str == nullptr); }
193  C4_ALWAYS_INLINE C4_PURE bool not_empty() const noexcept { return (len != 0 && str != nullptr); }
194  C4_ALWAYS_INLINE C4_PURE size_t size() const noexcept { return len; }
195 
196  C4_ALWAYS_INLINE C4_PURE iterator begin() noexcept { return str; }
197  C4_ALWAYS_INLINE C4_PURE iterator end () noexcept { return str + len; }
198 
199  C4_ALWAYS_INLINE C4_PURE const_iterator begin() const noexcept { return str; }
200  C4_ALWAYS_INLINE C4_PURE const_iterator end () const noexcept { return str + len; }
201 
202  C4_ALWAYS_INLINE C4_PURE C * data() noexcept { return str; }
203  C4_ALWAYS_INLINE C4_PURE C const* data() const noexcept { return str; }
204 
205  C4_ALWAYS_INLINE C4_PURE C & operator[] (size_t i) noexcept { C4_ASSERT(i >= 0 && i < len); return str[i]; }
206  C4_ALWAYS_INLINE C4_PURE C const& operator[] (size_t i) const noexcept { C4_ASSERT(i >= 0 && i < len); return str[i]; }
207 
208  C4_ALWAYS_INLINE C4_PURE C & front() noexcept { C4_ASSERT(len > 0 && str != nullptr); return *str; }
209  C4_ALWAYS_INLINE C4_PURE C const& front() const noexcept { C4_ASSERT(len > 0 && str != nullptr); return *str; }
210 
211  C4_ALWAYS_INLINE C4_PURE C & back() noexcept { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); }
212  C4_ALWAYS_INLINE C4_PURE C const& back() const noexcept { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); }
213 
214  /** @} */
215 
216 public:
217 
218  /** @name Comparison methods */
219  /** @{ */
220 
221  C4_ALWAYS_INLINE C4_PURE int compare(C const c) const noexcept
222  {
223  C4_XASSERT((str != nullptr) || len == 0);
224  if(C4_LIKELY(str != nullptr && len > 0))
225  return (*str != c) ? *str - c : (static_cast<int>(len) - 1);
226  else
227  return -1;
228  }
229 
230  C4_PURE int compare(C const* C4_RESTRICT that, size_t sz) const noexcept
231  {
232  #if defined(__GNUC__) && (__GNUC__ >= 6)
233  C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wnull-dereference")
234  #endif
235  C4_XASSERT(that || sz == 0);
236  C4_XASSERT(str || len == 0);
237  if(C4_LIKELY(str && that))
238  {
239  {
240  const size_t min = len < sz ? len : sz;
241  for(size_t i = 0; i < min; ++i)
242  if(str[i] != that[i])
243  return str[i] < that[i] ? -1 : 1;
244  }
245  if(len < sz)
246  return -1;
247  else if(len == sz)
248  return 0;
249  else
250  return 1;
251  }
252  else if(len == sz)
253  {
254  C4_XASSERT(len == 0 && sz == 0);
255  return 0;
256  }
257  return len < sz ? -1 : 1;
258  #if defined(__GNUC__) && (__GNUC__ >= 6)
259  C4_SUPPRESS_WARNING_GCC_POP
260  #endif
261  }
262 
263  C4_ALWAYS_INLINE C4_PURE int compare(ro_substr const that) const noexcept { return this->compare(that.str, that.len); }
264 
265  C4_ALWAYS_INLINE C4_PURE bool operator== (std::nullptr_t) const noexcept { return str == nullptr; }
266  C4_ALWAYS_INLINE C4_PURE bool operator!= (std::nullptr_t) const noexcept { return str != nullptr; }
267 
268  C4_ALWAYS_INLINE C4_PURE bool operator== (C const c) const noexcept { return this->compare(c) == 0; }
269  C4_ALWAYS_INLINE C4_PURE bool operator!= (C const c) const noexcept { return this->compare(c) != 0; }
270  C4_ALWAYS_INLINE C4_PURE bool operator< (C const c) const noexcept { return this->compare(c) < 0; }
271  C4_ALWAYS_INLINE C4_PURE bool operator> (C const c) const noexcept { return this->compare(c) > 0; }
272  C4_ALWAYS_INLINE C4_PURE bool operator<= (C const c) const noexcept { return this->compare(c) <= 0; }
273  C4_ALWAYS_INLINE C4_PURE bool operator>= (C const c) const noexcept { return this->compare(c) >= 0; }
274 
275  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator== (basic_substring<U> const that) const noexcept { return this->compare(that) == 0; }
276  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator!= (basic_substring<U> const that) const noexcept { return this->compare(that) != 0; }
277  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator< (basic_substring<U> const that) const noexcept { return this->compare(that) < 0; }
278  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator> (basic_substring<U> const that) const noexcept { return this->compare(that) > 0; }
279  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator<= (basic_substring<U> const that) const noexcept { return this->compare(that) <= 0; }
280  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator>= (basic_substring<U> const that) const noexcept { return this->compare(that) >= 0; }
281 
282  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator== (const char (&that)[N]) const noexcept { return this->compare(that, N-1) == 0; }
283  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator!= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) != 0; }
284  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator< (const char (&that)[N]) const noexcept { return this->compare(that, N-1) < 0; }
285  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator> (const char (&that)[N]) const noexcept { return this->compare(that, N-1) > 0; }
286  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator<= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) <= 0; }
287  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator>= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) >= 0; }
288 
289  /** @} */
290 
291 public:
292 
293  /** @name Sub-selection methods */
294  /** @{ */
295 
296  /** true if *this is a substring of that (ie, from the same buffer) */
297  C4_ALWAYS_INLINE C4_PURE bool is_sub(ro_substr const that) const noexcept
298  {
299  return that.is_super(*this);
300  }
301 
302  /** true if that is a substring of *this (ie, from the same buffer) */
303  C4_ALWAYS_INLINE C4_PURE bool is_super(ro_substr const that) const noexcept
304  {
305  if(C4_LIKELY(len > 0))
306  return that.str >= str && that.str+that.len <= str+len;
307  else
308  return that.len == 0 && that.str == str && str != nullptr;
309  }
310 
311  /** true if there is overlap of at least one element between that and *this */
312  C4_ALWAYS_INLINE C4_PURE bool overlaps(ro_substr const that) const noexcept
313  {
314  // thanks @timwynants
315  return that.str+that.len > str && that.str < str+len;
316  }
317 
318 public:
319 
320  /** return [first,len[ */
321  C4_ALWAYS_INLINE C4_PURE basic_substring sub(size_t first) const noexcept
322  {
323  C4_ASSERT(first >= 0 && first <= len);
324  return basic_substring(str + first, len - first);
325  }
326 
327  /** return [first,first+num[. If num==npos, return [first,len[ */
328  C4_ALWAYS_INLINE C4_PURE basic_substring sub(size_t first, size_t num) const noexcept
329  {
330  C4_ASSERT(first >= 0 && first <= len);
331  C4_ASSERT((num >= 0 && num <= len) || (num == npos));
332  size_t rnum = num != npos ? num : len - first;
333  C4_ASSERT((first >= 0 && first + rnum <= len) || (num == 0));
334  return basic_substring(str + first, rnum);
335  }
336 
337  /** return [first,last[. If last==npos, return [first,len[ */
338  C4_ALWAYS_INLINE C4_PURE basic_substring range(size_t first, size_t last=npos) const noexcept
339  {
340  C4_ASSERT(first >= 0 && first <= len);
341  last = last != npos ? last : len;
342  C4_ASSERT(first <= last);
343  C4_ASSERT(last >= 0 && last <= len);
344  return basic_substring(str + first, last - first);
345  }
346 
347  /** return the first @p num elements: [0,num[*/
348  C4_ALWAYS_INLINE C4_PURE basic_substring first(size_t num) const noexcept
349  {
350  C4_ASSERT(num <= len || num == npos);
351  return basic_substring(str, num != npos ? num : len);
352  }
353 
354  /** return the last @p num elements: [len-num,len[*/
355  C4_ALWAYS_INLINE C4_PURE basic_substring last(size_t num) const noexcept
356  {
357  C4_ASSERT(num <= len || num == npos);
358  return num != npos ?
359  basic_substring(str + len - num, num) :
360  *this;
361  }
362 
363  /** offset from the ends: return [left,len-right[ ; ie, trim a
364  number of characters from the left and right. This is
365  equivalent to python's negative list indices. */
366  C4_ALWAYS_INLINE C4_PURE basic_substring offs(size_t left, size_t right) const noexcept
367  {
368  C4_ASSERT(left >= 0 && left <= len);
369  C4_ASSERT(right >= 0 && right <= len);
370  C4_ASSERT(left <= len - right + 1);
371  return basic_substring(str + left, len - right - left);
372  }
373 
374  /** return [0, pos[ . Same as .first(pos), but provided for compatibility with .right_of() */
375  C4_ALWAYS_INLINE C4_PURE basic_substring left_of(size_t pos) const noexcept
376  {
377  C4_ASSERT(pos <= len || pos == npos);
378  return (pos != npos) ?
379  basic_substring(str, pos) :
380  *this;
381  }
382 
383  /** return [0, pos+include_pos[ . Same as .first(pos+1), but provided for compatibility with .right_of() */
384  C4_ALWAYS_INLINE C4_PURE basic_substring left_of(size_t pos, bool include_pos) const noexcept
385  {
386  C4_ASSERT(pos <= len || pos == npos);
387  return (pos != npos) ?
388  basic_substring(str, pos+include_pos) :
389  *this;
390  }
391 
392  /** return [pos+1, len[ */
393  C4_ALWAYS_INLINE C4_PURE basic_substring right_of(size_t pos) const noexcept
394  {
395  C4_ASSERT(pos <= len || pos == npos);
396  return (pos != npos) ?
397  basic_substring(str + (pos + 1), len - (pos + 1)) :
398  basic_substring(str + len, size_t(0));
399  }
400 
401  /** return [pos+!include_pos, len[ */
402  C4_ALWAYS_INLINE C4_PURE basic_substring right_of(size_t pos, bool include_pos) const noexcept
403  {
404  C4_ASSERT(pos <= len || pos == npos);
405  return (pos != npos) ?
406  basic_substring(str + (pos + !include_pos), len - (pos + !include_pos)) :
407  basic_substring(str + len, size_t(0));
408  }
409 
410 public:
411 
412  /** given @p subs a substring of the current string, get the
413  * portion of the current string to the left of it */
414  C4_ALWAYS_INLINE C4_PURE basic_substring left_of(ro_substr const subs) const noexcept
415  {
416  C4_ASSERT(is_super(subs) || subs.empty());
417  auto ssb = subs.begin();
418  auto b = begin();
419  auto e = end();
420  if(ssb >= b && ssb <= e)
421  return sub(0, static_cast<size_t>(ssb - b));
422  else
423  return sub(0, 0);
424  }
425 
426  /** given @p subs a substring of the current string, get the
427  * portion of the current string to the right of it */
428  C4_ALWAYS_INLINE C4_PURE basic_substring right_of(ro_substr const subs) const noexcept
429  {
430  C4_ASSERT(is_super(subs) || subs.empty());
431  auto sse = subs.end();
432  auto b = begin();
433  auto e = end();
434  if(sse >= b && sse <= e)
435  return sub(static_cast<size_t>(sse - b), static_cast<size_t>(e - sse));
436  else
437  return sub(0, 0);
438  }
439 
440  /** @} */
441 
442 public:
443 
444  /** @name Removing characters (trim()) / patterns (strip()) from the tips of the string */
445  /** @{ */
446 
447  /** trim left */
448  basic_substring triml(const C c) const
449  {
450  if( ! empty())
451  {
452  size_t pos = first_not_of(c);
453  if(pos != npos)
454  return sub(pos);
455  }
456  return sub(0, 0);
457  }
458  /** trim left ANY of the characters.
459  * @see stripl() to remove a pattern from the left */
461  {
462  if( ! empty())
463  {
464  size_t pos = first_not_of(chars);
465  if(pos != npos)
466  return sub(pos);
467  }
468  return sub(0, 0);
469  }
470 
471  /** trim the character c from the right */
472  basic_substring trimr(const C c) const
473  {
474  if( ! empty())
475  {
476  size_t pos = last_not_of(c, npos);
477  if(pos != npos)
478  return sub(0, pos+1);
479  }
480  return sub(0, 0);
481  }
482  /** trim right ANY of the characters
483  * @see stripr() to remove a pattern from the right */
485  {
486  if( ! empty())
487  {
488  size_t pos = last_not_of(chars, npos);
489  if(pos != npos)
490  return sub(0, pos+1);
491  }
492  return sub(0, 0);
493  }
494 
495  /** trim the character c left and right */
496  basic_substring trim(const C c) const
497  {
498  return triml(c).trimr(c);
499  }
500  /** trim left and right ANY of the characters
501  * @see strip() to remove a pattern from the left and right */
502  basic_substring trim(ro_substr const chars) const
503  {
504  return triml(chars).trimr(chars);
505  }
506 
507  /** remove a pattern from the left
508  * @see triml() to remove characters*/
510  {
511  if( ! begins_with(pattern))
512  return *this;
513  return sub(pattern.len < len ? pattern.len : len);
514  }
515 
516  /** remove a pattern from the right
517  * @see trimr() to remove characters*/
519  {
520  if( ! ends_with(pattern))
521  return *this;
522  return left_of(len - (pattern.len < len ? pattern.len : len));
523  }
524 
525  /** @} */
526 
527 public:
528 
529  /** @name Lookup methods */
530  /** @{ */
531 
532  size_t find(const C c, size_t start_pos=0) const
533  {
534  return first_of(c, start_pos);
535  }
536  size_t find(ro_substr pattern, size_t start_pos=0) const
537  {
538  C4_ASSERT(start_pos == npos || (start_pos >= 0 && start_pos <= len));
539  if(len < pattern.len) return npos;
540  for(size_t i = start_pos, e = len - pattern.len + 1; i < e; ++i)
541  {
542  bool gotit = true;
543  for(size_t j = 0; j < pattern.len; ++j)
544  {
545  C4_ASSERT(i + j < len);
546  if(str[i + j] != pattern.str[j])
547  {
548  gotit = false;
549  break;
550  }
551  }
552  if(gotit)
553  {
554  return i;
555  }
556  }
557  return npos;
558  }
559 
560 public:
561 
562  /** count the number of occurrences of c */
563  size_t count(const C c, size_t pos=0) const
564  {
565  C4_ASSERT(pos >= 0 && pos <= len);
566  size_t num = 0;
567  pos = find(c, pos);
568  while(pos != npos)
569  {
570  ++num;
571  pos = find(c, pos + 1);
572  }
573  return num;
574  }
575 
576  /** count the number of occurrences of s */
577  size_t count(ro_substr c, size_t pos=0) const
578  {
579  C4_ASSERT(pos >= 0 && pos <= len);
580  size_t num = 0;
581  pos = find(c, pos);
582  while(pos != npos)
583  {
584  ++num;
585  pos = find(c, pos + c.len);
586  }
587  return num;
588  }
589 
590  /** get the substr consisting of the first occurrence of @p c after @p pos, or an empty substr if none occurs */
591  basic_substring select(const C c, size_t pos=0) const
592  {
593  pos = find(c, pos);
594  return pos != npos ? sub(pos, 1) : basic_substring();
595  }
596 
597  /** get the substr consisting of the first occurrence of @p pattern after @p pos, or an empty substr if none occurs */
598  basic_substring select(ro_substr pattern, size_t pos=0) const
599  {
600  pos = find(pattern, pos);
601  return pos != npos ? sub(pos, pattern.len) : basic_substring();
602  }
603 
604 public:
605 
607  {
608  size_t which;
609  size_t pos;
610  operator bool() const { return which != NONE && pos != npos; }
611  };
612 
614  {
615  ro_substr s[2] = {s0, s1};
616  return first_of_any_iter(&s[0], &s[0] + 2);
617  }
618 
620  {
621  ro_substr s[3] = {s0, s1, s2};
622  return first_of_any_iter(&s[0], &s[0] + 3);
623  }
624 
626  {
627  ro_substr s[4] = {s0, s1, s2, s3};
628  return first_of_any_iter(&s[0], &s[0] + 4);
629  }
630 
632  {
633  ro_substr s[5] = {s0, s1, s2, s3, s4};
634  return first_of_any_iter(&s[0], &s[0] + 5);
635  }
636 
637  template<class It>
638  first_of_any_result first_of_any_iter(It first_span, It last_span) const
639  {
640  for(size_t i = 0; i < len; ++i)
641  {
642  size_t curr = 0;
643  for(It it = first_span; it != last_span; ++curr, ++it)
644  {
645  auto const& chars = *it;
646  if((i + chars.len) > len) continue;
647  bool gotit = true;
648  for(size_t j = 0; j < chars.len; ++j)
649  {
650  C4_ASSERT(i + j < len);
651  if(str[i + j] != chars[j])
652  {
653  gotit = false;
654  break;
655  }
656  }
657  if(gotit)
658  {
659  return {curr, i};
660  }
661  }
662  }
663  return {NONE, npos};
664  }
665 
666 public:
667 
668  /** true if the first character of the string is @p c */
669  bool begins_with(const C c) const
670  {
671  #if defined(__GNUC__) && (__GNUC__ >= 6)
672  C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wnull-dereference")
673  #endif
674  return len > 0 ? str[0] == c : false;
675  #if defined(__GNUC__) && (__GNUC__ >= 6)
676  C4_SUPPRESS_WARNING_GCC_POP
677  #endif
678  }
679 
680  /** true if the first @p num characters of the string are @p c */
681  bool begins_with(const C c, size_t num) const
682  {
683  if(len < num)
684  {
685  return false;
686  }
687  for(size_t i = 0; i < num; ++i)
688  {
689  if(str[i] != c)
690  {
691  return false;
692  }
693  }
694  return true;
695  }
696 
697  /** true if the string begins with the given @p pattern */
698  bool begins_with(ro_substr pattern) const
699  {
700  if(len < pattern.len)
701  {
702  return false;
703  }
704  for(size_t i = 0; i < pattern.len; ++i)
705  {
706  if(str[i] != pattern[i])
707  {
708  return false;
709  }
710  }
711  return true;
712  }
713 
714  /** true if the first character of the string is any of the given @p chars */
715  bool begins_with_any(ro_substr chars) const
716  {
717  if(len == 0)
718  {
719  return false;
720  }
721  for(size_t i = 0; i < chars.len; ++i)
722  {
723  if(str[0] == chars.str[i])
724  {
725  return true;
726  }
727  }
728  return false;
729  }
730 
731  /** true if the last character of the string is @p c */
732  bool ends_with(const C c) const
733  {
734  return len > 0 ? str[len-1] == c : false;
735  }
736 
737  /** true if the last @p num characters of the string are @p c */
738  bool ends_with(const C c, size_t num) const
739  {
740  if(len < num)
741  {
742  return false;
743  }
744  for(size_t i = len - num; i < len; ++i)
745  {
746  if(str[i] != c)
747  {
748  return false;
749  }
750  }
751  return true;
752  }
753 
754  /** true if the string ends with the given @p pattern */
755  bool ends_with(ro_substr pattern) const
756  {
757  if(len < pattern.len)
758  {
759  return false;
760  }
761  for(size_t i = 0, s = len-pattern.len; i < pattern.len; ++i)
762  {
763  if(str[s+i] != pattern[i])
764  {
765  return false;
766  }
767  }
768  return true;
769  }
770 
771  /** true if the last character of the string is any of the given @p chars */
772  bool ends_with_any(ro_substr chars) const
773  {
774  if(len == 0)
775  {
776  return false;
777  }
778  for(size_t i = 0; i < chars.len; ++i)
779  {
780  if(str[len - 1] == chars[i])
781  {
782  return true;
783  }
784  }
785  return false;
786  }
787 
788 public:
789 
790  /** @return the first position where c is found in the string, or npos if none is found */
791  size_t first_of(const C c, size_t start=0) const
792  {
793  C4_ASSERT(start == npos || (start >= 0 && start <= len));
794  for(size_t i = start; i < len; ++i)
795  {
796  if(str[i] == c)
797  return i;
798  }
799  return npos;
800  }
801 
802  /** @return the last position where c is found in the string, or npos if none is found */
803  size_t last_of(const C c, size_t start=npos) const
804  {
805  C4_ASSERT(start == npos || (start >= 0 && start <= len));
806  if(start == npos)
807  start = len;
808  for(size_t i = start-1; i != size_t(-1); --i)
809  {
810  if(str[i] == c)
811  return i;
812  }
813  return npos;
814  }
815 
816  /** @return the first position where ANY of the chars is found in the string, or npos if none is found */
817  size_t first_of(ro_substr chars, size_t start=0) const
818  {
819  C4_ASSERT(start == npos || (start >= 0 && start <= len));
820  for(size_t i = start; i < len; ++i)
821  {
822  for(size_t j = 0; j < chars.len; ++j)
823  {
824  if(str[i] == chars[j])
825  return i;
826  }
827  }
828  return npos;
829  }
830 
831  /** @return the last position where ANY of the chars is found in the string, or npos if none is found */
832  size_t last_of(ro_substr chars, size_t start=npos) const
833  {
834  C4_ASSERT(start == npos || (start >= 0 && start <= len));
835  if(start == npos)
836  start = len;
837  for(size_t i = start-1; i != size_t(-1); --i)
838  {
839  for(size_t j = 0; j < chars.len; ++j)
840  {
841  if(str[i] == chars[j])
842  return i;
843  }
844  }
845  return npos;
846  }
847 
848 public:
849 
850  size_t first_not_of(const C c) const
851  {
852  for(size_t i = 0; i < len; ++i)
853  {
854  if(str[i] != c)
855  return i;
856  }
857  return npos;
858  }
859 
860  size_t first_not_of(const C c, size_t start) const
861  {
862  C4_ASSERT((start >= 0 && start <= len) || (start == len && len == 0));
863  for(size_t i = start; i < len; ++i)
864  {
865  if(str[i] != c)
866  return i;
867  }
868  return npos;
869  }
870 
871  size_t last_not_of(const C c) const
872  {
873  for(size_t i = len-1; i != size_t(-1); --i)
874  {
875  if(str[i] != c)
876  return i;
877  }
878  return npos;
879  }
880 
881  size_t last_not_of(const C c, size_t start) const
882  {
883  C4_ASSERT(start == npos || (start >= 0 && start <= len));
884  if(start == npos)
885  start = len;
886  for(size_t i = start-1; i != size_t(-1); --i)
887  {
888  if(str[i] != c)
889  return i;
890  }
891  return npos;
892  }
893 
894  size_t first_not_of(ro_substr chars) const
895  {
896  for(size_t i = 0; i < len; ++i)
897  {
898  bool gotit = true;
899  for(size_t j = 0; j < chars.len; ++j)
900  {
901  if(str[i] == chars.str[j])
902  {
903  gotit = false;
904  break;
905  }
906  }
907  if(gotit)
908  {
909  return i;
910  }
911  }
912  return npos;
913  }
914 
915  size_t first_not_of(ro_substr chars, size_t start) const
916  {
917  C4_ASSERT((start >= 0 && start <= len) || (start == len && len == 0));
918  for(size_t i = start; i < len; ++i)
919  {
920  bool gotit = true;
921  for(size_t j = 0; j < chars.len; ++j)
922  {
923  if(str[i] == chars.str[j])
924  {
925  gotit = false;
926  break;
927  }
928  }
929  if(gotit)
930  {
931  return i;
932  }
933  }
934  return npos;
935  }
936 
937  size_t last_not_of(ro_substr chars) const
938  {
939  for(size_t i = len-1; i != size_t(-1); --i)
940  {
941  bool gotit = true;
942  for(size_t j = 0; j < chars.len; ++j)
943  {
944  if(str[i] == chars.str[j])
945  {
946  gotit = false;
947  break;
948  }
949  }
950  if(gotit)
951  {
952  return i;
953  }
954  }
955  return npos;
956  }
957 
958  size_t last_not_of(ro_substr chars, size_t start) const
959  {
960  C4_ASSERT(start == npos || (start >= 0 && start <= len));
961  if(start == npos)
962  start = len;
963  for(size_t i = start-1; i != size_t(-1); --i)
964  {
965  bool gotit = true;
966  for(size_t j = 0; j < chars.len; ++j)
967  {
968  if(str[i] == chars.str[j])
969  {
970  gotit = false;
971  break;
972  }
973  }
974  if(gotit)
975  {
976  return i;
977  }
978  }
979  return npos;
980  }
981 
982  /** @} */
983 
984 public:
985 
986  /** @name Range lookup methods */
987  /** @{ */
988 
989  /** get the range delimited by an open-close pair of characters.
990  * @note There must be no nested pairs.
991  * @note No checks for escapes are performed. */
992  basic_substring pair_range(CC open, CC close) const
993  {
994  size_t b = find(open);
995  if(b == npos)
996  return basic_substring();
997  size_t e = find(close, b+1);
998  if(e == npos)
999  return basic_substring();
1000  basic_substring ret = range(b, e+1);
1001  C4_ASSERT(ret.sub(1).find(open) == npos);
1002  return ret;
1003  }
1004 
1005  /** get the range delimited by a single open-close character (eg, quotes).
1006  * @note The open-close character can be escaped. */
1007  basic_substring pair_range_esc(CC open_close, CC escape=CC('\\'))
1008  {
1009  size_t b = find(open_close);
1010  if(b == npos) return basic_substring();
1011  for(size_t i = b+1; i < len; ++i)
1012  {
1013  CC c = str[i];
1014  if(c == open_close)
1015  {
1016  if(str[i-1] != escape)
1017  {
1018  return range(b, i+1);
1019  }
1020  }
1021  }
1022  return basic_substring();
1023  }
1024 
1025  /** get the range delimited by an open-close pair of characters,
1026  * with possibly nested occurrences. No checks for escapes are
1027  * performed. */
1029  {
1030  size_t b = find(open);
1031  if(b == npos) return basic_substring();
1032  size_t e, curr = b+1, count = 0;
1033  const char both[] = {open, close, '\0'};
1034  while((e = first_of(both, curr)) != npos)
1035  {
1036  if(str[e] == open)
1037  {
1038  ++count;
1039  curr = e+1;
1040  }
1041  else if(str[e] == close)
1042  {
1043  if(count == 0) return range(b, e+1);
1044  --count;
1045  curr = e+1;
1046  }
1047  }
1048  return basic_substring();
1049  }
1050 
1052  {
1053  constexpr const C dq('"'), sq('\'');
1054  if(len >= 2 && (str[len - 2] != C('\\')) &&
1055  ((begins_with(sq) && ends_with(sq))
1056  ||
1057  (begins_with(dq) && ends_with(dq))))
1058  {
1059  return range(1, len -1);
1060  }
1061  return *this;
1062  }
1063 
1064  /** @} */
1065 
1066 public:
1067 
1068  /** @name Number-matching query methods */
1069  /** @{ */
1070 
1071  /** @return true if the substring contents are a floating-point or integer number.
1072  * @note any leading or trailing whitespace will return false. */
1073  bool is_number() const
1074  {
1075  if(empty() || (first_non_empty_span().empty()))
1076  return false;
1077  if(first_uint_span() == *this)
1078  return true;
1079  if(first_int_span() == *this)
1080  return true;
1081  if(first_real_span() == *this)
1082  return true;
1083  return false;
1084  }
1085 
1086  /** @return true if the substring contents are a real number.
1087  * @note any leading or trailing whitespace will return false. */
1088  bool is_real() const
1089  {
1090  if(empty() || (first_non_empty_span().empty()))
1091  return false;
1092  if(first_real_span() == *this)
1093  return true;
1094  return false;
1095  }
1096 
1097  /** @return true if the substring contents are an integer number.
1098  * @note any leading or trailing whitespace will return false. */
1099  bool is_integer() const
1100  {
1101  if(empty() || (first_non_empty_span().empty()))
1102  return false;
1103  if(first_uint_span() == *this)
1104  return true;
1105  if(first_int_span() == *this)
1106  return true;
1107  return false;
1108  }
1109 
1110  /** @return true if the substring contents are an unsigned integer number.
1111  * @note any leading or trailing whitespace will return false. */
1112  bool is_unsigned_integer() const
1113  {
1114  if(empty() || (first_non_empty_span().empty()))
1115  return false;
1116  if(first_uint_span() == *this)
1117  return true;
1118  return false;
1119  }
1120 
1121  /** get the first span consisting exclusively of non-empty characters */
1123  {
1124  constexpr const ro_substr empty_chars(" \n\r\t");
1125  size_t pos = first_not_of(empty_chars);
1126  if(pos == npos)
1127  return first(0);
1128  auto ret = sub(pos);
1129  pos = ret.first_of(empty_chars);
1130  return ret.first(pos);
1131  }
1132 
1133  /** get the first span which can be interpreted as an unsigned integer */
1135  {
1136  basic_substring ne = first_non_empty_span();
1137  if(ne.empty())
1138  return ne;
1139  if(ne.str[0] == '-')
1140  return first(0);
1141  size_t skip_start = size_t(ne.str[0] == '+');
1142  return ne._first_integral_span(skip_start);
1143  }
1144 
1145  /** get the first span which can be interpreted as a signed integer */
1147  {
1148  basic_substring ne = first_non_empty_span();
1149  if(ne.empty())
1150  return ne;
1151  size_t skip_start = size_t(ne.str[0] == '+' || ne.str[0] == '-');
1152  return ne._first_integral_span(skip_start);
1153  }
1154 
1155  basic_substring _first_integral_span(size_t skip_start) const
1156  {
1157  C4_ASSERT(!empty());
1158  if(skip_start == len)
1159  return first(0);
1160  C4_ASSERT(skip_start < len);
1161  if(len >= skip_start + 3)
1162  {
1163  if(str[skip_start] != '0')
1164  {
1165  for(size_t i = skip_start; i < len; ++i)
1166  {
1167  char c = str[i];
1168  if(c < '0' || c > '9')
1169  return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
1170  }
1171  }
1172  else
1173  {
1174  char next = str[skip_start + 1];
1175  if(next == 'x' || next == 'X')
1176  {
1177  skip_start += 2;
1178  for(size_t i = skip_start; i < len; ++i)
1179  {
1180  const char c = str[i];
1181  if( ! _is_hex_char(c))
1182  return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
1183  }
1184  return *this;
1185  }
1186  else if(next == 'b' || next == 'B')
1187  {
1188  skip_start += 2;
1189  for(size_t i = skip_start; i < len; ++i)
1190  {
1191  const char c = str[i];
1192  if(c != '0' && c != '1')
1193  return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
1194  }
1195  return *this;
1196  }
1197  else if(next == 'o' || next == 'O')
1198  {
1199  skip_start += 2;
1200  for(size_t i = skip_start; i < len; ++i)
1201  {
1202  const char c = str[i];
1203  if(c < '0' || c > '7')
1204  return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
1205  }
1206  return *this;
1207  }
1208  }
1209  }
1210  // must be a decimal, or it is not a an number
1211  for(size_t i = skip_start; i < len; ++i)
1212  {
1213  const char c = str[i];
1214  if(c < '0' || c > '9')
1215  return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
1216  }
1217  return *this;
1218  }
1219 
1220  /** get the first span which can be interpreted as a real (floating-point) number */
1222  {
1223  basic_substring ne = first_non_empty_span();
1224  if(ne.empty())
1225  return ne;
1226  const size_t skip_start = (ne.str[0] == '+' || ne.str[0] == '-');
1227  C4_ASSERT(skip_start == 0 || skip_start == 1);
1228  // if we have at least three digits after the leading sign, it
1229  // can be decimal, or hex, or bin or oct. Ex:
1230  // non-decimal: 0x0, 0b0, 0o0
1231  // decimal: 1.0, 10., 1e1, 100, inf, nan, infinity
1232  if(ne.len >= skip_start+3)
1233  {
1234  // if it does not have leading 0, it must be decimal, or it is not a real
1235  if(ne.str[skip_start] != '0')
1236  {
1237  if(ne.str[skip_start] == 'i') // is it infinity or inf?
1238  {
1239  basic_substring word = ne._word_follows(skip_start + 1, "nfinity");
1240  if(word.len)
1241  return word;
1242  return ne._word_follows(skip_start + 1, "nf");
1243  }
1244  else if(ne.str[skip_start] == 'n') // is it nan?
1245  {
1246  return ne._word_follows(skip_start + 1, "an");
1247  }
1248  else // must be a decimal, or it is not a real
1249  {
1250  return ne._first_real_span_dec(skip_start);
1251  }
1252  }
1253  else // starts with 0. is it 0x, 0b or 0o?
1254  {
1255  const char next = ne.str[skip_start + 1];
1256  // hexadecimal
1257  if(next == 'x' || next == 'X')
1258  return ne._first_real_span_hex(skip_start + 2);
1259  // binary
1260  else if(next == 'b' || next == 'B')
1261  return ne._first_real_span_bin(skip_start + 2);
1262  // octal
1263  else if(next == 'o' || next == 'O')
1264  return ne._first_real_span_oct(skip_start + 2);
1265  // none of the above. may still be a decimal.
1266  else
1267  return ne._first_real_span_dec(skip_start); // do not skip the 0.
1268  }
1269  }
1270  // less than 3 chars after the leading sign. It is either a
1271  // decimal or it is not a real. (cannot be any of 0x0, etc).
1272  return ne._first_real_span_dec(skip_start);
1273  }
1274 
1275  /** true if the character is a delimiter character *at the end* */
1276  static constexpr C4_ALWAYS_INLINE C4_CONST bool _is_delim_char(char c) noexcept
1277  {
1278  return c == ' ' || c == '\n'
1279  || c == ']' || c == ')' || c == '}'
1280  || c == ',' || c == ';' || c == '\r' || c == '\t' || c == '\0';
1281  }
1282 
1283  /** true if the character is in [0-9a-fA-F] */
1284  static constexpr C4_ALWAYS_INLINE C4_CONST bool _is_hex_char(char c) noexcept
1285  {
1286  return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
1287  }
1288 
1289  C4_NO_INLINE C4_PURE basic_substring _word_follows(size_t pos, csubstr word) const noexcept
1290  {
1291  size_t posend = pos + word.len;
1292  if(len >= posend && sub(pos, word.len) == word)
1293  if(len == posend || _is_delim_char(str[posend]))
1294  return first(posend);
1295  return first(0);
1296  }
1297 
1298  // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
1299  C4_NO_INLINE C4_PURE basic_substring _first_real_span_dec(size_t pos) const noexcept
1300  {
1301  bool intchars = false;
1302  bool fracchars = false;
1303  bool powchars;
1304  // integral part
1305  for( ; pos < len; ++pos)
1306  {
1307  const char c = str[pos];
1308  if(c >= '0' && c <= '9')
1309  {
1310  intchars = true;
1311  }
1312  else if(c == '.')
1313  {
1314  ++pos;
1315  goto fractional_part_dec; // NOLINT
1316  }
1317  else if(c == 'e' || c == 'E')
1318  {
1319  ++pos;
1320  goto power_part_dec; // NOLINT
1321  }
1322  else if(_is_delim_char(c))
1323  {
1324  return intchars ? first(pos) : first(0);
1325  }
1326  else
1327  {
1328  return first(0);
1329  }
1330  }
1331  // no . or p were found; this is either an integral number
1332  // or not a number at all
1333  return intchars ?
1334  *this :
1335  first(0);
1336  fractional_part_dec:
1337  C4_ASSERT(pos > 0);
1338  C4_ASSERT(str[pos - 1] == '.');
1339  for( ; pos < len; ++pos)
1340  {
1341  const char c = str[pos];
1342  if(c >= '0' && c <= '9')
1343  {
1344  fracchars = true;
1345  }
1346  else if(c == 'e' || c == 'E')
1347  {
1348  ++pos;
1349  goto power_part_dec; // NOLINT
1350  }
1351  else if(_is_delim_char(c))
1352  {
1353  return intchars || fracchars ? first(pos) : first(0);
1354  }
1355  else
1356  {
1357  return first(0);
1358  }
1359  }
1360  return intchars || fracchars ?
1361  *this :
1362  first(0);
1363  power_part_dec:
1364  C4_ASSERT(pos > 0);
1365  C4_ASSERT(str[pos - 1] == 'e' || str[pos - 1] == 'E');
1366  // either digits, or +, or - are expected here, followed by more digits.
1367  if((len == pos) || ((!intchars) && (!fracchars)))
1368  return first(0);
1369  if(str[pos] == '-' || str[pos] == '+')
1370  ++pos; // skip the sign
1371  powchars = false;
1372  for( ; pos < len; ++pos)
1373  {
1374  const char c = str[pos];
1375  if(c >= '0' && c <= '9')
1376  powchars = true;
1377  else if(powchars && _is_delim_char(c))
1378  return first(pos);
1379  else
1380  return first(0);
1381  }
1382  return powchars ? *this : first(0);
1383  }
1384 
1385  // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
1386  C4_NO_INLINE C4_PURE basic_substring _first_real_span_hex(size_t pos) const noexcept
1387  {
1388  bool intchars = false;
1389  bool fracchars = false;
1390  bool powchars;
1391  // integral part
1392  for( ; pos < len; ++pos)
1393  {
1394  const char c = str[pos];
1395  if(_is_hex_char(c))
1396  {
1397  intchars = true;
1398  }
1399  else if(c == '.')
1400  {
1401  ++pos;
1402  goto fractional_part_hex; // NOLINT
1403  }
1404  else if(c == 'p' || c == 'P')
1405  {
1406  ++pos;
1407  goto power_part_hex; // NOLINT
1408  }
1409  else if(_is_delim_char(c))
1410  {
1411  return intchars ? first(pos) : first(0);
1412  }
1413  else
1414  {
1415  return first(0);
1416  }
1417  }
1418  // no . or p were found; this is either an integral number
1419  // or not a number at all
1420  return intchars ?
1421  *this :
1422  first(0);
1423  fractional_part_hex:
1424  C4_ASSERT(pos > 0);
1425  C4_ASSERT(str[pos - 1] == '.');
1426  for( ; pos < len; ++pos)
1427  {
1428  const char c = str[pos];
1429  if(_is_hex_char(c))
1430  {
1431  fracchars = true;
1432  }
1433  else if(c == 'p' || c == 'P')
1434  {
1435  ++pos;
1436  goto power_part_hex; // NOLINT
1437  }
1438  else if(_is_delim_char(c))
1439  {
1440  return intchars || fracchars ? first(pos) : first(0);
1441  }
1442  else
1443  {
1444  return first(0);
1445  }
1446  }
1447  return intchars || fracchars ?
1448  *this :
1449  first(0);
1450  power_part_hex:
1451  C4_ASSERT(pos > 0);
1452  C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P');
1453  // either a + or a - is expected here, followed by more chars.
1454  // also, using (pos+1) in this check will cause an early
1455  // return when no more chars follow the sign.
1456  if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars)))
1457  return first(0);
1458  ++pos; // this was the sign.
1459  // ... so the (pos+1) ensures that we enter the loop and
1460  // hence that there exist chars in the power part
1461  powchars = false;
1462  for( ; pos < len; ++pos)
1463  {
1464  const char c = str[pos];
1465  if(c >= '0' && c <= '9')
1466  powchars = true;
1467  else if(powchars && _is_delim_char(c))
1468  return first(pos);
1469  else
1470  return first(0);
1471  }
1472  return *this;
1473  }
1474 
1475  // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
1476  C4_NO_INLINE C4_PURE basic_substring _first_real_span_bin(size_t pos) const noexcept
1477  {
1478  bool intchars = false;
1479  bool fracchars = false;
1480  bool powchars;
1481  // integral part
1482  for( ; pos < len; ++pos)
1483  {
1484  const char c = str[pos];
1485  if(c == '0' || c == '1')
1486  {
1487  intchars = true;
1488  }
1489  else if(c == '.')
1490  {
1491  ++pos;
1492  goto fractional_part_bin; // NOLINT
1493  }
1494  else if(c == 'p' || c == 'P')
1495  {
1496  ++pos;
1497  goto power_part_bin; // NOLINT
1498  }
1499  else if(_is_delim_char(c))
1500  {
1501  return intchars ? first(pos) : first(0);
1502  }
1503  else
1504  {
1505  return first(0);
1506  }
1507  }
1508  // no . or p were found; this is either an integral number
1509  // or not a number at all
1510  return intchars ?
1511  *this :
1512  first(0);
1513  fractional_part_bin:
1514  C4_ASSERT(pos > 0);
1515  C4_ASSERT(str[pos - 1] == '.');
1516  for( ; pos < len; ++pos)
1517  {
1518  const char c = str[pos];
1519  if(c == '0' || c == '1')
1520  {
1521  fracchars = true;
1522  }
1523  else if(c == 'p' || c == 'P')
1524  {
1525  ++pos;
1526  goto power_part_bin; // NOLINT
1527  }
1528  else if(_is_delim_char(c))
1529  {
1530  return intchars || fracchars ? first(pos) : first(0);
1531  }
1532  else
1533  {
1534  return first(0);
1535  }
1536  }
1537  return intchars || fracchars ?
1538  *this :
1539  first(0);
1540  power_part_bin:
1541  C4_ASSERT(pos > 0);
1542  C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P');
1543  // either a + or a - is expected here, followed by more chars.
1544  // also, using (pos+1) in this check will cause an early
1545  // return when no more chars follow the sign.
1546  if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars)))
1547  return first(0);
1548  ++pos; // this was the sign.
1549  // ... so the (pos+1) ensures that we enter the loop and
1550  // hence that there exist chars in the power part
1551  powchars = false;
1552  for( ; pos < len; ++pos)
1553  {
1554  const char c = str[pos];
1555  if(c >= '0' && c <= '9')
1556  powchars = true;
1557  else if(powchars && _is_delim_char(c))
1558  return first(pos);
1559  else
1560  return first(0);
1561  }
1562  return *this;
1563  }
1564 
1565  // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
1566  C4_NO_INLINE C4_PURE basic_substring _first_real_span_oct(size_t pos) const noexcept
1567  {
1568  bool intchars = false;
1569  bool fracchars = false;
1570  bool powchars;
1571  // integral part
1572  for( ; pos < len; ++pos)
1573  {
1574  const char c = str[pos];
1575  if(c >= '0' && c <= '7')
1576  {
1577  intchars = true;
1578  }
1579  else if(c == '.')
1580  {
1581  ++pos;
1582  goto fractional_part_oct; // NOLINT
1583  }
1584  else if(c == 'p' || c == 'P')
1585  {
1586  ++pos;
1587  goto power_part_oct; // NOLINT
1588  }
1589  else if(_is_delim_char(c))
1590  {
1591  return intchars ? first(pos) : first(0);
1592  }
1593  else
1594  {
1595  return first(0);
1596  }
1597  }
1598  // no . or p were found; this is either an integral number
1599  // or not a number at all
1600  return intchars ?
1601  *this :
1602  first(0);
1603  fractional_part_oct:
1604  C4_ASSERT(pos > 0);
1605  C4_ASSERT(str[pos - 1] == '.');
1606  for( ; pos < len; ++pos)
1607  {
1608  const char c = str[pos];
1609  if(c >= '0' && c <= '7')
1610  {
1611  fracchars = true;
1612  }
1613  else if(c == 'p' || c == 'P')
1614  {
1615  ++pos;
1616  goto power_part_oct; // NOLINT
1617  }
1618  else if(_is_delim_char(c))
1619  {
1620  return intchars || fracchars ? first(pos) : first(0);
1621  }
1622  else
1623  {
1624  return first(0);
1625  }
1626  }
1627  return intchars || fracchars ?
1628  *this :
1629  first(0);
1630  power_part_oct:
1631  C4_ASSERT(pos > 0);
1632  C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P');
1633  // either a + or a - is expected here, followed by more chars.
1634  // also, using (pos+1) in this check will cause an early
1635  // return when no more chars follow the sign.
1636  if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars)))
1637  return first(0);
1638  ++pos; // this was the sign.
1639  // ... so the (pos+1) ensures that we enter the loop and
1640  // hence that there exist chars in the power part
1641  powchars = false;
1642  for( ; pos < len; ++pos)
1643  {
1644  const char c = str[pos];
1645  if(c >= '0' && c <= '9')
1646  powchars = true;
1647  else if(powchars && _is_delim_char(c))
1648  return first(pos);
1649  else
1650  return first(0);
1651  }
1652  return *this;
1653  }
1654 
1655  /** @} */
1656 
1657 public:
1658 
1659  /** @name Splitting methods */
1660  /** @{ */
1661 
1662  /** returns true if the string has not been exhausted yet, meaning
1663  * it's ok to call next_split() again. When no instance of sep
1664  * exists in the string, returns the full string. When the input
1665  * is an empty string, the output string is the empty string. */
1666  bool next_split(C sep, size_t *C4_RESTRICT start_pos, basic_substring *C4_RESTRICT out) const
1667  {
1668  if(C4_LIKELY(*start_pos < len))
1669  {
1670  for(size_t i = *start_pos; i < len; i++)
1671  {
1672  if(str[i] == sep)
1673  {
1674  out->assign(str + *start_pos, i - *start_pos);
1675  *start_pos = i+1;
1676  return true;
1677  }
1678  }
1679  out->assign(str + *start_pos, len - *start_pos);
1680  *start_pos = len + 1;
1681  return true;
1682  }
1683  else
1684  {
1685  bool valid = len > 0 && (*start_pos == len);
1686  if(valid && str && str[len-1] == sep)
1687  {
1688  out->assign(str + len, size_t(0)); // the cast is needed to prevent overload ambiguity
1689  }
1690  else
1691  {
1692  out->assign(str + len + 1, size_t(0)); // the cast is needed to prevent overload ambiguity
1693  }
1694  *start_pos = len + 1;
1695  return valid;
1696  }
1697  }
1698 
1699 private:
1700 
1701  struct split_proxy_impl
1702  {
1704  {
1705  split_proxy_impl const* m_proxy;
1707  size_t m_pos;
1709 
1710  split_iterator_impl(split_proxy_impl const* proxy, size_t pos, C sep)
1711  : m_proxy(proxy), m_pos(pos), m_sep(sep)
1712  {
1713  _tick();
1714  }
1715 
1716  void _tick()
1717  {
1718  m_proxy->m_str.next_split(m_sep, &m_pos, &m_str);
1719  }
1720 
1721  split_iterator_impl& operator++ () { _tick(); return *this; }
1722  split_iterator_impl operator++ (int) { split_iterator_impl it = *this; _tick(); return it; } // NOLINT
1723 
1724  basic_substring& operator* () { return m_str; }
1725  basic_substring* operator-> () { return &m_str; }
1726 
1727  bool operator!= (split_iterator_impl const& that) const
1728  {
1729  return !(this->operator==(that));
1730  }
1731  bool operator== (split_iterator_impl const& that) const
1732  {
1733  C4_XASSERT((m_sep == that.m_sep) && "cannot compare split iterators with different separators");
1734  if(m_str.size() != that.m_str.size())
1735  return false;
1736  if(m_str.data() != that.m_str.data())
1737  return false;
1738  return m_pos == that.m_pos;
1739  }
1740  };
1741 
1742  basic_substring m_str;
1743  size_t m_start_pos;
1744  C m_sep;
1745 
1746  split_proxy_impl(basic_substring str_, size_t start_pos, C sep)
1747  : m_str(str_), m_start_pos(start_pos), m_sep(sep)
1748  {
1749  }
1750 
1751  split_iterator_impl begin() const
1752  {
1753  auto it = split_iterator_impl(this, m_start_pos, m_sep);
1754  return it;
1755  }
1756  split_iterator_impl end() const
1757  {
1758  size_t pos = m_str.size() + 1;
1759  auto it = split_iterator_impl(this, pos, m_sep);
1760  return it;
1761  }
1762  };
1763 
1764 public:
1765 
1766  using split_proxy = split_proxy_impl;
1767 
1768  /** a view into the splits */
1769  split_proxy split(C sep, size_t start_pos=0) const
1770  {
1771  C4_XASSERT((start_pos >= 0 && start_pos < len) || empty());
1772  auto ss = sub(0, len);
1773  auto it = split_proxy(ss, start_pos, sep);
1774  return it;
1775  }
1776 
1777 public:
1778 
1779  /** pop right: return the first split from the right. Use
1780  * gpop_left() to get the reciprocal part.
1781  */
1782  basic_substring pop_right(C sep=C('/'), bool skip_empty=false) const
1783  {
1784  if(C4_LIKELY(len > 1))
1785  {
1786  auto pos = last_of(sep);
1787  if(pos != npos)
1788  {
1789  if(pos + 1 < len) // does not end with sep
1790  {
1791  return sub(pos + 1); // return from sep to end
1792  }
1793  else // the string ends with sep
1794  {
1795  if( ! skip_empty)
1796  {
1797  return sub(pos + 1, 0);
1798  }
1799  auto ppos = last_not_of(sep); // skip repeated seps
1800  if(ppos == npos) // the string is all made of seps
1801  {
1802  return sub(0, 0);
1803  }
1804  // find the previous sep
1805  auto pos0 = last_of(sep, ppos);
1806  if(pos0 == npos) // only the last sep exists
1807  {
1808  return sub(0); // return the full string (because skip_empty is true)
1809  }
1810  ++pos0;
1811  return sub(pos0);
1812  }
1813  }
1814  else // no sep was found, return the full string
1815  {
1816  return *this;
1817  }
1818  }
1819  else if(len == 1)
1820  {
1821  if(begins_with(sep))
1822  {
1823  return sub(0, 0);
1824  }
1825  return *this;
1826  }
1827  else // an empty string
1828  {
1829  return basic_substring();
1830  }
1831  }
1832 
1833  /** return the first split from the left. Use gpop_right() to get
1834  * the reciprocal part. */
1835  basic_substring pop_left(C sep = C('/'), bool skip_empty=false) const
1836  {
1837  if(C4_LIKELY(len > 1))
1838  {
1839  auto pos = first_of(sep);
1840  if(pos != npos)
1841  {
1842  if(pos > 0) // does not start with sep
1843  {
1844  return sub(0, pos); // return everything up to it
1845  }
1846  else // the string starts with sep
1847  {
1848  if( ! skip_empty)
1849  {
1850  return sub(0, 0);
1851  }
1852  auto ppos = first_not_of(sep); // skip repeated seps
1853  if(ppos == npos) // the string is all made of seps
1854  {
1855  return sub(0, 0);
1856  }
1857  // find the next sep
1858  auto pos0 = first_of(sep, ppos);
1859  if(pos0 == npos) // only the first sep exists
1860  {
1861  return sub(0); // return the full string (because skip_empty is true)
1862  }
1863  C4_XASSERT(pos0 > 0);
1864  // return everything up to the second sep
1865  return sub(0, pos0);
1866  }
1867  }
1868  else // no sep was found, return the full string
1869  {
1870  return sub(0);
1871  }
1872  }
1873  else if(len == 1)
1874  {
1875  if(begins_with(sep))
1876  {
1877  return sub(0, 0);
1878  }
1879  return sub(0);
1880  }
1881  else // an empty string
1882  {
1883  return basic_substring();
1884  }
1885  }
1886 
1887 public:
1888 
1889  /** greedy pop left. eg, csubstr("a/b/c").gpop_left('/')="c" */
1890  basic_substring gpop_left(C sep = C('/'), bool skip_empty=false) const
1891  {
1892  auto ss = pop_right(sep, skip_empty);
1893  ss = left_of(ss);
1894  if(ss.find(sep) != npos)
1895  {
1896  if(ss.ends_with(sep))
1897  {
1898  if(skip_empty)
1899  {
1900  ss = ss.trimr(sep);
1901  }
1902  else
1903  {
1904  ss = ss.sub(0, ss.len-1); // safe to subtract because ends_with(sep) is true
1905  }
1906  }
1907  }
1908  return ss;
1909  }
1910 
1911  /** greedy pop right. eg, csubstr("a/b/c").gpop_right('/')="a" */
1912  basic_substring gpop_right(C sep = C('/'), bool skip_empty=false) const
1913  {
1914  auto ss = pop_left(sep, skip_empty);
1915  ss = right_of(ss);
1916  if(ss.find(sep) != npos)
1917  {
1918  if(ss.begins_with(sep))
1919  {
1920  if(skip_empty)
1921  {
1922  ss = ss.triml(sep);
1923  }
1924  else
1925  {
1926  ss = ss.sub(1);
1927  }
1928  }
1929  }
1930  return ss;
1931  }
1932 
1933  /** @} */
1934 
1935 public:
1936 
1937  /** @name Path-like manipulation methods */
1938  /** @{ */
1939 
1940  basic_substring basename(C sep=C('/')) const
1941  {
1942  auto ss = pop_right(sep, /*skip_empty*/true);
1943  ss = ss.trimr(sep);
1944  return ss;
1945  }
1946 
1947  basic_substring dirname(C sep=C('/')) const
1948  {
1949  auto ss = basename(sep);
1950  ss = ss.empty() ? *this : left_of(ss);
1951  return ss;
1952  }
1953 
1954  C4_ALWAYS_INLINE basic_substring name_wo_extshort() const
1955  {
1956  return gpop_left('.');
1957  }
1958 
1959  C4_ALWAYS_INLINE basic_substring name_wo_extlong() const
1960  {
1961  return pop_left('.');
1962  }
1963 
1964  C4_ALWAYS_INLINE basic_substring extshort() const
1965  {
1966  return pop_right('.');
1967  }
1968 
1969  C4_ALWAYS_INLINE basic_substring extlong() const
1970  {
1971  return gpop_right('.');
1972  }
1973 
1974  /** @} */
1975 
1976 public:
1977 
1978  /** @name Content-modification methods (only for non-const C) */
1979  /** @{ */
1980 
1981  /** convert the string to upper-case
1982  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
1983  C4_REQUIRE_RW(void) toupper()
1984  {
1985  for(size_t i = 0; i < len; ++i)
1986  {
1987  str[i] = static_cast<C>(::toupper(str[i]));
1988  }
1989  }
1990 
1991  /** convert the string to lower-case
1992  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
1993  C4_REQUIRE_RW(void) tolower()
1994  {
1995  for(size_t i = 0; i < len; ++i)
1996  {
1997  str[i] = static_cast<C>(::tolower(str[i]));
1998  }
1999  }
2000 
2001 public:
2002 
2003  /** fill the entire contents with the given @p val
2004  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2005  C4_REQUIRE_RW(void) fill(C val)
2006  {
2007  for(size_t i = 0; i < len; ++i)
2008  str[i] = val;
2009  }
2010 
2011 public:
2012 
2013  /** copy a string to this substr, starting at 0
2014  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2015  C4_REQUIRE_RW(void) copy_from(ro_substr that)
2016  {
2017  C4_ASSERT(!overlaps(that));
2018  size_t num = that.len <= len ? that.len : len;
2019  // calling memcpy with zero len is undefined behavior
2020  // and will wreak havoc in calling code's branches.
2021  // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
2022  if(num)
2023  memcpy(str, that.str, sizeof(C) * num);
2024  }
2025 
2026  /** copy a string to this substr, starting at a specified given position
2027  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2028  C4_REQUIRE_RW(void) copy_from(ro_substr that, size_t ifirst, size_t num=npos)
2029  {
2030  C4_ASSERT(ifirst >= 0 && ifirst <= len);
2031  num = num != npos ? num : len - ifirst;
2032  num = num < that.len ? num : that.len;
2033  C4_ASSERT(ifirst + num >= 0 && ifirst + num <= len);
2034  // calling memcpy with zero len is undefined behavior
2035  // and will wreak havoc in calling code's branches.
2036  // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
2037  if(num)
2038  memcpy(str + (sizeof(C) * ifirst), that.str, sizeof(C) * num);
2039  }
2040 
2041 public:
2042 
2043  /** reverse in place
2044  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2045  C4_REQUIRE_RW(void) reverse()
2046  {
2047  if(len == 0) return;
2048  detail::_do_reverse(str, str + len - 1);
2049  }
2050 
2051  /** revert a subpart in place
2052  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2053  C4_REQUIRE_RW(void) reverse_sub(size_t ifirst, size_t num)
2054  {
2055  C4_ASSERT(ifirst >= 0 && ifirst <= len);
2056  C4_ASSERT(ifirst + num >= 0 && ifirst + num <= len);
2057  if(num == 0) return;
2058  detail::_do_reverse(str + ifirst, str + ifirst + num - 1);
2059  }
2060 
2061  /** revert a range in place
2062  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2063  C4_REQUIRE_RW(void) reverse_range(size_t ifirst, size_t ilast)
2064  {
2065  C4_ASSERT(ifirst >= 0 && ifirst <= len);
2066  C4_ASSERT(ilast >= 0 && ilast <= len);
2067  if(ifirst == ilast) return;
2068  detail::_do_reverse(str + ifirst, str + ilast - 1);
2069  }
2070 
2071 public:
2072 
2073  /** erase part of the string. eg, with char s[] = "0123456789",
2074  * substr(s).erase(3, 2) = "01256789", and s is now "01245678989"
2075  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2076  C4_REQUIRE_RW(basic_substring) erase(size_t pos, size_t num)
2077  {
2078  C4_ASSERT(pos >= 0 && pos+num <= len);
2079  size_t num_to_move = len - pos - num;
2080  memmove(str + pos, str + pos + num, sizeof(C) * num_to_move);
2081  return basic_substring{str, len - num};
2082  }
2083 
2084  /** @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2085  C4_REQUIRE_RW(basic_substring) erase_range(size_t first, size_t last)
2086  {
2087  C4_ASSERT(first <= last);
2088  return erase(first, static_cast<size_t>(last-first)); // NOLINT
2089  }
2090 
2091  /** erase a part of the string.
2092  * @note @p sub must be a substring of this string
2093  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2094  C4_REQUIRE_RW(basic_substring) erase(ro_substr sub)
2095  {
2096  C4_ASSERT(is_super(sub));
2097  C4_ASSERT(sub.str >= str);
2098  return erase(static_cast<size_t>(sub.str - str), sub.len);
2099  }
2100 
2101 public:
2102 
2103  /** replace every occurrence of character @p value with the character @p repl
2104  * @return the number of characters that were replaced
2105  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2106  C4_REQUIRE_RW(size_t) replace(C value, C repl, size_t pos=0)
2107  {
2108  C4_ASSERT((pos >= 0 && pos <= len) || pos == npos);
2109  size_t did_it = 0;
2110  while((pos = find(value, pos)) != npos)
2111  {
2112  str[pos++] = repl;
2113  ++did_it;
2114  }
2115  return did_it;
2116  }
2117 
2118  /** replace every occurrence of each character in @p value with
2119  * the character @p repl.
2120  * @return the number of characters that were replaced
2121  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2122  C4_REQUIRE_RW(size_t) replace(ro_substr chars, C repl, size_t pos=0)
2123  {
2124  C4_ASSERT((pos >= 0 && pos <= len) || pos == npos);
2125  size_t did_it = 0;
2126  while((pos = first_of(chars, pos)) != npos)
2127  {
2128  str[pos++] = repl;
2129  ++did_it;
2130  }
2131  return did_it;
2132  }
2133 
2134  /** replace @p pattern with @p repl, and write the result into
2135  * @p dst. pattern and repl don't need equal sizes.
2136  *
2137  * @return the required size for dst. No overflow occurs if
2138  * dst.len is smaller than the required size; this can be used to
2139  * determine the required size for an existing container. */
2140  size_t replace_all(rw_substr dst, ro_substr pattern, ro_substr repl, size_t pos=0) const
2141  {
2142  C4_ASSERT( ! pattern.empty()); //!< @todo relax this precondition
2143  C4_ASSERT( ! this ->overlaps(dst)); //!< @todo relax this precondition
2144  C4_ASSERT( ! pattern.overlaps(dst));
2145  C4_ASSERT( ! repl .overlaps(dst));
2146  C4_ASSERT((pos >= 0 && pos <= len) || pos == npos);
2147  C4_SUPPRESS_WARNING_GCC_PUSH
2148  C4_SUPPRESS_WARNING_GCC("-Warray-bounds") // gcc11 has a false positive here
2149  #if (!defined(__clang__)) && (defined(__GNUC__) && (__GNUC__ >= 7))
2150  C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow") // gcc11 has a false positive here
2151  #endif
2152  #define _c4append(first, last) \
2153  { \
2154  C4_ASSERT((last) >= (first)); \
2155  size_t num = static_cast<size_t>((last) - (first)); \
2156  if(num > 0 && sz + num <= dst.len) \
2157  { \
2158  memcpy(dst.str + sz, first, num * sizeof(C)); \
2159  } \
2160  sz += num; \
2161  }
2162  size_t sz = 0;
2163  size_t b = pos;
2164  _c4append(str, str + pos);
2165  do {
2166  size_t e = find(pattern, b);
2167  if(e == npos)
2168  {
2169  _c4append(str + b, str + len);
2170  break;
2171  }
2172  _c4append(str + b, str + e);
2173  _c4append(repl.begin(), repl.end());
2174  b = e + pattern.size();
2175  } while(b < len && b != npos);
2176  return sz;
2177  #undef _c4append
2178  C4_SUPPRESS_WARNING_GCC_POP
2179  }
2180 
2181  /** @} */
2182 
2183 }; // template class basic_substring
2184 
2185 #undef C4_REQUIRE_RW
2186 
2187 
2188 //-----------------------------------------------------------------------------
2189 //-----------------------------------------------------------------------------
2190 //-----------------------------------------------------------------------------
2191 
2192 
2193 /** @defgroup doc_substr_adapters substr adapters
2194  *
2195  * to_substr() and to_csubstr() is used in generic code like
2196  * format(), and allow adding construction of substrings from new
2197  * types like containers.
2198  * @{ */
2199 
2200 
2201 /** neutral version for use in generic code */
2202 C4_ALWAYS_INLINE substr to_substr(substr s) noexcept { return s; }
2203 /** neutral version for use in generic code */
2204 C4_ALWAYS_INLINE csubstr to_csubstr(substr s) noexcept { return csubstr{s.str, s.len}; }
2205 /** neutral version for use in generic code */
2206 C4_ALWAYS_INLINE csubstr to_csubstr(csubstr s) noexcept { return s; }
2207 
2208 
2209 template<size_t N> C4_ALWAYS_INLINE substr to_substr(char (&s)[N]) noexcept
2210 {
2211  return substr(s, N-1);
2212 }
2213 template<size_t N> C4_ALWAYS_INLINE csubstr to_csubstr(const char (&s)[N]) noexcept
2214 {
2215  return csubstr(s, N-1);
2216 }
2217 
2218 
2219 /** @note this overload uses SFINAE to prevent it from overriding the array overload
2220  * @see For a more detailed explanation on why the plain overloads cannot
2221  * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
2222 template<class U> C4_ALWAYS_INLINE auto to_substr(U s) noexcept
2223  -> typename std::enable_if<std::is_same<U, char*>::value, substr>::type
2224 {
2225  return substr(s);
2226 }
2227 /** @note this overload uses SFINAE to prevent it from overriding the array overload
2228  * @see For a more detailed explanation on why the plain overloads cannot
2229  * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
2230 template<class U> C4_ALWAYS_INLINE auto to_csubstr(U s) noexcept
2231  -> typename std::enable_if<std::is_same<U, const char*>::value || std::is_same<U, char*>::value, csubstr>::type
2232 {
2233  return csubstr(s);
2234 }
2235 
2236 
2237 /** a traits class to mark a type as a string type
2238  * (meaning @ref c4::to_csubstr() can be used directly). */
2239 template<class T> struct is_string : public std::false_type {};
2240 /** a traits class to mark a type as a writeable string type
2241  * (meaning @ref c4::to_substr() can be used directly). */
2242 template<class T> struct is_writeable_string : public std::false_type {};
2243 
2244 template<typename C> struct is_string<basic_substring<C>> : public std::true_type {};
2245 template<> struct is_writeable_string<basic_substring<char>> : public std::true_type {};
2246 template<> struct is_writeable_string<basic_substring<const char>> : public std::false_type {};
2247 
2248 template<> struct is_string<const char*> : public std::true_type {};
2249 template<> struct is_writeable_string<const char*> : public std::false_type {};
2250 
2251 template<> struct is_string<char*> : public std::true_type {};
2252 template<> struct is_writeable_string<char*> : public std::true_type {};
2253 
2254 template<size_t N> struct is_string<const char[N]> : public std::true_type {};
2255 template<size_t N> struct is_writeable_string<const char[N]> : public std::false_type {};
2256 
2257 template<size_t N> struct is_string<char[N]> : public std::true_type {};
2258 template<size_t N> struct is_writeable_string<char[N]> : public std::true_type {};
2259 
2260 template<size_t N> struct is_string<const char (&)[N]> : public std::true_type {};
2261 template<size_t N> struct is_writeable_string<const char (&)[N]> : public std::false_type {};
2262 
2263 template<size_t N> struct is_string<char (&)[N]> : public std::true_type {};
2264 template<size_t N> struct is_writeable_string<char (&)[N]> : public std::true_type {};
2265 
2266 template<size_t N> struct is_string<const char (&&)[N]> : public std::true_type {};
2267 template<size_t N> struct is_writeable_string<const char (&&)[N]> : public std::false_type {};
2268 
2269 template<size_t N> struct is_string<char (&&)[N]> : public std::true_type {};
2270 template<size_t N> struct is_writeable_string<char (&&)[N]> : public std::true_type {};
2271 
2272 /** @} */
2273 
2274 
2275 //-----------------------------------------------------------------------------
2276 //-----------------------------------------------------------------------------
2277 //-----------------------------------------------------------------------------
2278 
2279 /** @defgroup doc_substr_cmp substr comparison operators
2280  * @{ */
2281 
2282 template<typename C, size_t N> inline bool operator== (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) == 0; }
2283 template<typename C, size_t N> inline bool operator!= (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) != 0; }
2284 template<typename C, size_t N> inline bool operator< (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) > 0; }
2285 template<typename C, size_t N> inline bool operator> (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) < 0; }
2286 template<typename C, size_t N> inline bool operator<= (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) >= 0; }
2287 template<typename C, size_t N> inline bool operator>= (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) <= 0; }
2288 
2289 template<typename C> inline bool operator== (const char c, basic_substring<C> const that) noexcept { return that.compare(c) == 0; }
2290 template<typename C> inline bool operator!= (const char c, basic_substring<C> const that) noexcept { return that.compare(c) != 0; }
2291 template<typename C> inline bool operator< (const char c, basic_substring<C> const that) noexcept { return that.compare(c) > 0; }
2292 template<typename C> inline bool operator> (const char c, basic_substring<C> const that) noexcept { return that.compare(c) < 0; }
2293 template<typename C> inline bool operator<= (const char c, basic_substring<C> const that) noexcept { return that.compare(c) >= 0; }
2294 template<typename C> inline bool operator>= (const char c, basic_substring<C> const that) noexcept { return that.compare(c) <= 0; }
2295 
2296 /** @} */
2297 
2298 
2299 //-----------------------------------------------------------------------------
2300 //-----------------------------------------------------------------------------
2301 //-----------------------------------------------------------------------------
2302 
2303 /* C4_SUBSTR_NO_OSTREAM_LSHIFT doctest does not deal well with
2304  * template operator<<
2305  * @see https://github.com/onqtam/doctest/pull/431 */
2306 #ifndef C4_SUBSTR_NO_OSTREAM_LSHIFT
2307 #ifdef __clang__
2308 # pragma clang diagnostic push
2309 # pragma clang diagnostic ignored "-Wsign-conversion"
2310 #elif defined(__GNUC__)
2311 # pragma GCC diagnostic push
2312 # pragma GCC diagnostic ignored "-Wsign-conversion"
2313 #endif
2314 
2315 /** output the string to a stream */
2316 template<class OStream, class C>
2317 inline OStream& operator<< (OStream& os, basic_substring<C> s)
2318 {
2319  os.write(s.str, s.len);
2320  return os;
2321 }
2322 
2323 // this causes ambiguity
2324 ///** this is used by google test */
2325 //template<class OStream, class C>
2326 //inline void PrintTo(basic_substring<C> s, OStream* os)
2327 //{
2328 // os->write(s.str, s.len);
2329 //}
2330 
2331 #ifdef __clang__
2332 # pragma clang diagnostic pop
2333 #elif defined(__GNUC__)
2334 # pragma GCC diagnostic pop
2335 #endif
2336 #endif // !C4_SUBSTR_NO_OSTREAM_LSHIFT
2337 
2338 /** @} */
2339 
2340 } // namespace c4
2341 
2342 
2343 C4_SUPPRESS_WARNING_GCC_CLANG_POP
2344 
2345 #endif /* _C4_SUBSTR_HPP_ */
left_< T > left(T val, size_t width, char padchar=' ')
tag type to mark an argument to be aligned left.
Definition: format.hpp:519
right_< T > right(T val, size_t width, char padchar=' ')
tag function to mark an argument to be aligned right
Definition: format.hpp:557
auto to_csubstr(U s) noexcept -> typename std::enable_if< std::is_same< U, const char * >::value||std::is_same< U, char * >::value, csubstr >::type
Definition: substr.hpp:2230
auto to_substr(U s) noexcept -> typename std::enable_if< std::is_same< U, char * >::value, substr >::type
Definition: substr.hpp:2222
bool operator<(const char c, basic_substring< C > const that) noexcept
Definition: substr.hpp:2291
bool operator!=(const char c, basic_substring< C > const that) noexcept
Definition: substr.hpp:2290
bool operator==(const char c, basic_substring< C > const that) noexcept
Definition: substr.hpp:2289
bool operator>(const char c, basic_substring< C > const that) noexcept
Definition: substr.hpp:2292
bool operator>=(const char c, basic_substring< C > const that) noexcept
Definition: substr.hpp:2294
bool operator<=(const char c, basic_substring< C > const that) noexcept
Definition: substr.hpp:2293
OStream & operator<<(OStream &os, basic_substring< C > s)
output the string to a stream
Definition: substr.hpp:2317
@ npos
a null string position
Definition: common.hpp:258
@ NONE
an index to none
Definition: common.hpp:251
(Undefined by default) Use shorter error message from checks/asserts: do not show the check condition...
Definition: common.cpp:14
split_iterator_impl(split_proxy_impl const *proxy, size_t pos, C sep)
Definition: substr.hpp:1710
a non-owning string-view, consisting of a character pointer and a length.
Definition: substr.hpp:69
basic_substring _first_real_span_hex(size_t pos) const noexcept
Definition: substr.hpp:1386
void reverse()
reverse in place
Definition: substr.hpp:2045
basic_substring first_uint_span() const
get the first span which can be interpreted as an unsigned integer
Definition: substr.hpp:1134
first_of_any_result first_of_any_iter(It first_span, It last_span) const
Definition: substr.hpp:638
int compare(ro_substr const that) const noexcept
Definition: substr.hpp:263
basic_substring(U s_) noexcept
Construct from a C-string (zero-terminated string)
Definition: substr.hpp:148
size_t first_not_of(ro_substr chars) const
Definition: substr.hpp:894
basic_substring gpop_right(C sep=C('/'), bool skip_empty=false) const
greedy pop right.
Definition: substr.hpp:1912
basic_substring trim(const C c) const
trim the character c left and right
Definition: substr.hpp:496
C const & front() const noexcept
Definition: substr.hpp:209
size_t count(const C c, size_t pos=0) const
count the number of occurrences of c
Definition: substr.hpp:563
basic_substring _first_real_span_oct(size_t pos) const noexcept
Definition: substr.hpp:1566
bool begins_with(const C c) const
true if the first character of the string is c
Definition: substr.hpp:669
first_of_any_result first_of_any(ro_substr s0, ro_substr s1) const
Definition: substr.hpp:613
basic_substring sub(size_t first, size_t num) const noexcept
return [first,first+num[.
Definition: substr.hpp:328
basic_substring pair_range(CC open, CC close) const
get the range delimited by an open-close pair of characters.
Definition: substr.hpp:992
basic_substring pop_right(C sep=C('/'), bool skip_empty=false) const
pop right: return the first split from the right.
Definition: substr.hpp:1782
basic_substring range(size_t first, size_t last=npos) const noexcept
return [first,last[.
Definition: substr.hpp:338
int compare(C const c) const noexcept
Definition: substr.hpp:221
C const & back() const noexcept
Definition: substr.hpp:212
size_t first_not_of(const C c) const
Definition: substr.hpp:850
const_iterator begin() const noexcept
Definition: substr.hpp:199
size_t last_not_of(const C c, size_t start) const
Definition: substr.hpp:881
basic_substring left_of(ro_substr const subs) const noexcept
given subs a substring of the current string, get the portion of the current string to the left of it
Definition: substr.hpp:414
basic_substring triml(const C c) const
trim left
Definition: substr.hpp:448
bool ends_with(const C c) const
true if the last character of the string is c
Definition: substr.hpp:732
size_t last_of(const C c, size_t start=npos) const
Definition: substr.hpp:803
bool is_integer() const
Definition: substr.hpp:1099
void tolower()
convert the string to lower-case
Definition: substr.hpp:1993
basic_substring trimr(ro_substr chars) const
trim right ANY of the characters
Definition: substr.hpp:484
basic_substring(basic_substring const &) noexcept=default
basic_substring _first_real_span_bin(size_t pos) const noexcept
Definition: substr.hpp:1476
void toupper()
convert the string to upper-case
Definition: substr.hpp:1983
first_of_any_result first_of_any(ro_substr s0, ro_substr s1, ro_substr s2, ro_substr s3, ro_substr s4) const
Definition: substr.hpp:631
basic_substring offs(size_t left, size_t right) const noexcept
offset from the ends: return [left,len-right[ ; ie, trim a number of characters from the left and rig...
Definition: substr.hpp:366
basic_substring first_real_span() const
get the first span which can be interpreted as a real (floating-point) number
Definition: substr.hpp:1221
basic_substring unquoted() const
Definition: substr.hpp:1051
C & front() noexcept
Definition: substr.hpp:208
split_proxy_impl split_proxy
Definition: substr.hpp:1766
basic_substring first_int_span() const
get the first span which can be interpreted as a signed integer
Definition: substr.hpp:1146
basic_substring select(ro_substr pattern, size_t pos=0) const
get the substr consisting of the first occurrence of pattern after pos, or an empty substr if none oc...
Definition: substr.hpp:598
basic_substring pair_range_esc(CC open_close, CC escape=CC('\\'))
get the range delimited by a single open-close character (eg, quotes).
Definition: substr.hpp:1007
size_t replace_all(rw_substr dst, ro_substr pattern, ro_substr repl, size_t pos=0) const
replace pattern with repl, and write the result into dst.
Definition: substr.hpp:2140
size_t count(ro_substr c, size_t pos=0) const
count the number of occurrences of s
Definition: substr.hpp:577
basic_substring name_wo_extshort() const
Definition: substr.hpp:1954
split_proxy split(C sep, size_t start_pos=0) const
a view into the splits
Definition: substr.hpp:1769
basic_substring name_wo_extlong() const
Definition: substr.hpp:1959
basic_substring erase(ro_substr sub)
erase a part of the string.
Definition: substr.hpp:2094
C & back() noexcept
Definition: substr.hpp:211
size_t last_not_of(ro_substr chars) const
Definition: substr.hpp:937
bool is_real() const
Definition: substr.hpp:1088
constexpr basic_substring() noexcept
Definition: substr.hpp:110
basic_substring triml(ro_substr chars) const
trim left ANY of the characters.
Definition: substr.hpp:460
size_t len
the length of the substring
Definition: substr.hpp:75
basic_substring left_of(size_t pos, bool include_pos) const noexcept
return [0, pos+include_pos[ .
Definition: substr.hpp:384
size_t last_not_of(ro_substr chars, size_t start) const
Definition: substr.hpp:958
basic_substring last(size_t num) const noexcept
return the last num elements: [len-num,len[
Definition: substr.hpp:355
size_t first_of(const C c, size_t start=0) const
Definition: substr.hpp:791
basic_substring stripl(ro_substr pattern) const
remove a pattern from the left
Definition: substr.hpp:509
bool ends_with(ro_substr pattern) const
true if the string ends with the given pattern
Definition: substr.hpp:755
basic_substring right_of(size_t pos, bool include_pos) const noexcept
return [pos+!include_pos, len[
Definition: substr.hpp:402
size_t find(const C c, size_t start_pos=0) const
Definition: substr.hpp:532
basic_substring trim(ro_substr const chars) const
trim left and right ANY of the characters
Definition: substr.hpp:502
const_iterator end() const noexcept
Definition: substr.hpp:200
typename std::add_const< C >::type CC
CC=const char.
Definition: substr.hpp:82
basic_substring(C *s_, size_t len_) noexcept
Construct from a pointer and length.
Definition: substr.hpp:136
basic_substring erase_range(size_t first, size_t last)
Definition: substr.hpp:2085
basic_substring extshort() const
Definition: substr.hpp:1964
void assign(U s_) noexcept
Assign from a C-string (zero-terminated string of type const C* or C*)
Definition: substr.hpp:169
basic_substring stripr(ro_substr pattern) const
remove a pattern from the right
Definition: substr.hpp:518
size_t first_of(ro_substr chars, size_t start=0) const
Definition: substr.hpp:817
size_t find(ro_substr pattern, size_t start_pos=0) const
Definition: substr.hpp:536
void assign(C *s_, size_t len_) noexcept
Assign from a pointer and length.
Definition: substr.hpp:157
iterator begin() noexcept
Definition: substr.hpp:196
void assign(C(&s_)[N]) noexcept
Assign from an array.
Definition: substr.hpp:154
int compare(C const *that, size_t sz) const noexcept
Definition: substr.hpp:230
bool ends_with_any(ro_substr chars) const
true if the last character of the string is any of the given chars
Definition: substr.hpp:772
void fill(C val)
fill the entire contents with the given val
Definition: substr.hpp:2005
size_t size() const noexcept
Definition: substr.hpp:194
basic_substring basename(C sep=C('/')) const
Definition: substr.hpp:1940
basic_substring(C *beg_, C *end_) noexcept
Construct from two pointers.
Definition: substr.hpp:140
bool is_unsigned_integer() const
Definition: substr.hpp:1112
bool overlaps(ro_substr const that) const noexcept
true if there is overlap of at least one element between that and *this
Definition: substr.hpp:312
basic_substring _first_integral_span(size_t skip_start) const
Definition: substr.hpp:1155
iterator end() noexcept
Definition: substr.hpp:197
bool not_empty() const noexcept
Definition: substr.hpp:193
basic_substring first(size_t num) const noexcept
return the first num elements: [0,num[
Definition: substr.hpp:348
basic_substring left_of(size_t pos) const noexcept
return [0, pos[ .
Definition: substr.hpp:375
void copy_from(ro_substr that, size_t ifirst, size_t num=npos)
copy a string to this substr, starting at a specified given position
Definition: substr.hpp:2028
void reverse_range(size_t ifirst, size_t ilast)
revert a range in place
Definition: substr.hpp:2063
basic_substring pair_range_nested(CC open, CC close) const
get the range delimited by an open-close pair of characters, with possibly nested occurrences.
Definition: substr.hpp:1028
C const * data() const noexcept
Definition: substr.hpp:203
size_t replace(C value, C repl, size_t pos=0)
replace every occurrence of character value with the character repl
Definition: substr.hpp:2106
bool has_str() const noexcept
Definition: substr.hpp:191
bool begins_with(ro_substr pattern) const
true if the string begins with the given pattern
Definition: substr.hpp:698
bool is_number() const
Definition: substr.hpp:1073
basic_substring sub(size_t first) const noexcept
return [first,len[
Definition: substr.hpp:321
basic_substring _first_real_span_dec(size_t pos) const noexcept
Definition: substr.hpp:1299
first_of_any_result first_of_any(ro_substr s0, ro_substr s1, ro_substr s2) const
Definition: substr.hpp:619
void copy_from(ro_substr that)
copy a string to this substr, starting at 0
Definition: substr.hpp:2015
bool begins_with_any(ro_substr chars) const
true if the first character of the string is any of the given chars
Definition: substr.hpp:715
bool next_split(C sep, size_t *start_pos, basic_substring *out) const
returns true if the string has not been exhausted yet, meaning it's ok to call next_split() again.
Definition: substr.hpp:1666
basic_substring(basic_substring &&) noexcept=default
bool empty() const noexcept
Definition: substr.hpp:192
basic_substring right_of(ro_substr const subs) const noexcept
given subs a substring of the current string, get the portion of the current string to the right of i...
Definition: substr.hpp:428
size_t first_not_of(const C c, size_t start) const
Definition: substr.hpp:860
void assign(C *beg_, C *end_) noexcept
Assign from two pointers.
Definition: substr.hpp:161
basic_substring trimr(const C c) const
trim the character c from the right
Definition: substr.hpp:472
first_of_any_result first_of_any(ro_substr s0, ro_substr s1, ro_substr s2, ro_substr s3) const
Definition: substr.hpp:625
constexpr basic_substring(C(&s_)[N]) noexcept
Construct from an array.
Definition: substr.hpp:133
basic_substring dirname(C sep=C('/')) const
Definition: substr.hpp:1947
size_t replace(ro_substr chars, C repl, size_t pos=0)
replace every occurrence of each character in value with the character repl.
Definition: substr.hpp:2122
bool is_super(ro_substr const that) const noexcept
true if that is a substring of *this (ie, from the same buffer)
Definition: substr.hpp:303
size_t last_not_of(const C c) const
Definition: substr.hpp:871
bool ends_with(const C c, size_t num) const
true if the last num characters of the string are c
Definition: substr.hpp:738
bool begins_with(const C c, size_t num) const
true if the first num characters of the string are c
Definition: substr.hpp:681
static constexpr C4_CONST bool _is_hex_char(char c) noexcept
true if the character is in [0-9a-fA-F]
Definition: substr.hpp:1284
basic_substring erase(size_t pos, size_t num)
erase part of the string.
Definition: substr.hpp:2076
C * data() noexcept
Definition: substr.hpp:202
basic_substring _word_follows(size_t pos, csubstr word) const noexcept
Definition: substr.hpp:1289
void reverse_sub(size_t ifirst, size_t num)
revert a subpart in place
Definition: substr.hpp:2053
basic_substring gpop_left(C sep=C('/'), bool skip_empty=false) const
greedy pop left.
Definition: substr.hpp:1890
size_t first_not_of(ro_substr chars, size_t start) const
Definition: substr.hpp:915
void clear() noexcept
Definition: substr.hpp:120
size_t last_of(ro_substr chars, size_t start=npos) const
Definition: substr.hpp:832
typename std::remove_const< C >::type NCC_
NCC_=non const char.
Definition: substr.hpp:83
basic_substring first_non_empty_span() const
get the first span consisting exclusively of non-empty characters
Definition: substr.hpp:1122
C * str
a restricted pointer to the first character of the substring
Definition: substr.hpp:73
basic_substring right_of(size_t pos) const noexcept
return [pos+1, len[
Definition: substr.hpp:393
basic_substring pop_left(C sep=C('/'), bool skip_empty=false) const
return the first split from the left.
Definition: substr.hpp:1835
static constexpr C4_CONST bool _is_delim_char(char c) noexcept
true if the character is a delimiter character at the end
Definition: substr.hpp:1276
bool is_sub(ro_substr const that) const noexcept
true if *this is a substring of that (ie, from the same buffer)
Definition: substr.hpp:297
basic_substring select(const C c, size_t pos=0) const
get the substr consisting of the first occurrence of c after pos, or an empty substr if none occurs
Definition: substr.hpp:591
basic_substring extlong() const
Definition: substr.hpp:1969
a traits class to mark a type as a string type (meaning c4::to_csubstr() can be used directly).
Definition: substr.hpp:2239
a traits class to mark a type as a writeable string type (meaning c4::to_substr() can be used directl...
Definition: substr.hpp:2242
#define _c4append(first, last)