rapidyaml  0.7.2
parse and emit YAML, and do it fast
substr.hpp
Go to the documentation of this file.
1 #ifndef _C4_SUBSTR_HPP_
2 #define _C4_SUBSTR_HPP_
3 
4 /** @file substr.hpp read+write string views */
5 
6 #include <string.h>
7 #include <ctype.h>
8 #include <type_traits>
9 
10 #include "c4/config.hpp"
11 #include "c4/error.hpp"
12 #include "c4/substr_fwd.hpp"
13 
14 #ifdef __clang__
15 # pragma clang diagnostic push
16 # pragma clang diagnostic ignored "-Wold-style-cast"
17 #elif defined(__GNUC__)
18 # pragma GCC diagnostic push
19 # pragma GCC diagnostic ignored "-Wtype-limits" // disable warnings on size_t>=0, used heavily in assertions below. These assertions are a preparation step for providing the index type as a template parameter.
20 # pragma GCC diagnostic ignored "-Wuseless-cast"
21 # pragma GCC diagnostic ignored "-Wold-style-cast"
22 #endif
23 
24 
25 namespace c4 {
26 
27 /** @defgroup doc_substr Substring: read/write string views
28  * @{ */
29 
30 //-----------------------------------------------------------------------------
31 //-----------------------------------------------------------------------------
32 //-----------------------------------------------------------------------------
33 
34 /** @cond dev */
35 namespace detail {
36 template<typename C>
37 static inline void _do_reverse(C *C4_RESTRICT first, C *C4_RESTRICT last)
38 {
39  while(last > first)
40  {
41  C tmp = *last;
42  *last-- = *first;
43  *first++ = tmp;
44  }
45 }
46 } // namespace detail
47 /** @endcond */
48 
49 //-----------------------------------------------------------------------------
50 //-----------------------------------------------------------------------------
51 //-----------------------------------------------------------------------------
52 
53 /** @cond dev */
54 // utility macros to deuglify SFINAE code; undefined after the class.
55 // https://stackoverflow.com/questions/43051882/how-to-disable-a-class-member-funrtion-for-certain-template-types
56 #define C4_REQUIRE_RW(ret_type) \
57  template <typename U=C> \
58  typename std::enable_if< ! std::is_const<U>::value, ret_type>::type
59 /** @endcond */
60 
61 
62 /** a non-owning string-view, consisting of a character pointer
63  * and a length.
64  *
65  * @note The pointer is explicitly restricted.
66  *
67  * @see a [quickstart
68  * sample](https://rapidyaml.readthedocs.io/latest/doxygen/group__doc__quickstart.html#ga43e253da0692c13967019446809c1113)
69  * in rapidyaml's documentation.
70  *
71  * @see @ref substr and @ref to_substr()
72  * @see @ref csubstr and @ref to_csubstr()
73  */
74 template<class C>
75 struct C4CORE_EXPORT basic_substring
76 {
77 public:
78 
79  /** a restricted pointer to the first character of the substring */
80  C * C4_RESTRICT str;
81  /** the length of the substring */
82  size_t len;
83 
84 public:
85 
86  /** @name Types */
87  /** @{ */
88 
89  using CC = typename std::add_const<C>::type; //!< CC=const char
90  using NCC_ = typename std::remove_const<C>::type; //!< NCC_=non const char
91 
94 
95  using char_type = C;
96  using size_type = size_t;
97 
98  using iterator = C*;
99  using const_iterator = CC*;
100 
101  enum : size_t { npos = (size_t)-1, NONE = (size_t)-1 };
102 
103  /// convert automatically to substring of const C
104  template<class U=C>
105  C4_ALWAYS_INLINE operator typename std::enable_if<!std::is_const<U>::value, ro_substr const&>::type () const noexcept
106  {
107  return *(ro_substr const*)this; // don't call the str+len ctor because it does a check
108  }
109 
110  /** @} */
111 
112 public:
113 
114  /** @name Default construction and assignment */
115  /** @{ */
116 
117  C4_ALWAYS_INLINE constexpr basic_substring() noexcept : str(), len() {}
118 
119  C4_ALWAYS_INLINE basic_substring(basic_substring const&) noexcept = default;
120  C4_ALWAYS_INLINE basic_substring(basic_substring &&) noexcept = default;
121  C4_ALWAYS_INLINE basic_substring(std::nullptr_t) noexcept : str(nullptr), len(0) {}
122 
123  C4_ALWAYS_INLINE basic_substring& operator= (basic_substring const&) noexcept = default;
124  C4_ALWAYS_INLINE basic_substring& operator= (basic_substring &&) noexcept = default;
125  C4_ALWAYS_INLINE basic_substring& operator= (std::nullptr_t) noexcept { str = nullptr; len = 0; return *this; }
126 
127  C4_ALWAYS_INLINE void clear() noexcept { str = nullptr; len = 0; }
128 
129  /** @} */
130 
131 public:
132 
133  /** @name Construction and assignment from characters with the same type */
134  /** @{ */
135 
136  /** Construct from an array.
137  * @warning the input string need not be zero terminated, but the
138  * length is taken as if the string was zero terminated */
139  template<size_t N>
140  C4_ALWAYS_INLINE constexpr basic_substring(C (&s_)[N]) noexcept : str(s_), len(N-1) {}
141  /** Construct from a pointer and length.
142  * @warning the input string need not be zero terminated. */
143  C4_ALWAYS_INLINE basic_substring(C *s_, size_t len_) noexcept : str(s_), len(len_) { C4_ASSERT(str || !len_); }
144  /** Construct from two pointers.
145  * @warning the end pointer MUST BE larger than or equal to the begin pointer
146  * @warning the input string need not be zero terminated */
147  C4_ALWAYS_INLINE basic_substring(C *beg_, C *end_) noexcept : str(beg_), len(static_cast<size_t>(end_ - beg_)) { C4_ASSERT(end_ >= beg_); }
148  /** Construct from a C-string (zero-terminated string)
149  * @warning the input string MUST BE zero terminated.
150  * @warning will call strlen()
151  * @note this overload uses SFINAE to prevent it from overriding the array ctor
152  * @see For a more detailed explanation on why the plain overloads cannot
153  * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
154  template<class U, typename std::enable_if<std::is_same<U, C*>::value || std::is_same<U, NCC_*>::value, int>::type=0>
155  C4_ALWAYS_INLINE basic_substring(U s_) noexcept : str(s_), len(s_ ? strlen(s_) : 0) {}
156 
157  /** Assign from an array.
158  * @warning the input string need not be zero terminated, but the
159  * length is taken as if the string was zero terminated */
160  template<size_t N>
161  C4_ALWAYS_INLINE void assign(C (&s_)[N]) noexcept { str = (s_); len = (N-1); }
162  /** Assign from a pointer and length.
163  * @warning the input string need not be zero terminated. */
164  C4_ALWAYS_INLINE void assign(C *s_, size_t len_) noexcept { str = s_; len = len_; C4_ASSERT(str || !len_); }
165  /** Assign from two pointers.
166  * @warning the end pointer MUST BE larger than or equal to the begin pointer
167  * @warning the input string need not be zero terminated. */
168  C4_ALWAYS_INLINE void assign(C *beg_, C *end_) noexcept { C4_ASSERT(end_ >= beg_); str = (beg_); len = static_cast<size_t>(end_ - beg_); }
169  /** Assign from a C-string (zero-terminated string)
170  * @warning the input string must be zero terminated.
171  * @warning will call strlen()
172  * @note this overload uses SFINAE to prevent it from overriding the array ctor
173  * @see For a more detailed explanation on why the plain overloads cannot
174  * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
175  template<class U, typename std::enable_if<std::is_same<U, C*>::value || std::is_same<U, NCC_*>::value, int>::type=0>
176  C4_ALWAYS_INLINE void assign(U s_) noexcept { str = (s_); len = (s_ ? strlen(s_) : 0); }
177 
178  /** Assign from an array.
179  * @warning the input string need not be zero terminated. */
180  template<size_t N>
181  C4_ALWAYS_INLINE basic_substring& operator= (C (&s_)[N]) noexcept { str = (s_); len = (N-1); return *this; }
182  /** Assign from a C-string (zero-terminated string)
183  * @warning the input string MUST BE zero terminated.
184  * @warning will call strlen()
185  * @note this overload uses SFINAE to prevent it from overriding the array ctor
186  * @see For a more detailed explanation on why the plain overloads cannot
187  * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
188  template<class U, typename std::enable_if<std::is_same<U, C*>::value || std::is_same<U, NCC_*>::value, int>::type=0>
189  C4_ALWAYS_INLINE basic_substring& operator= (U s_) noexcept { str = s_; len = s_ ? strlen(s_) : 0; return *this; }
190 
191  /** @} */
192 
193 public:
194 
195  /** @name Standard accessor methods */
196  /** @{ */
197 
198  C4_ALWAYS_INLINE C4_PURE bool has_str() const noexcept { return ! empty() && str[0] != C(0); }
199  C4_ALWAYS_INLINE C4_PURE bool empty() const noexcept { return (len == 0 || str == nullptr); }
200  C4_ALWAYS_INLINE C4_PURE bool not_empty() const noexcept { return (len != 0 && str != nullptr); }
201  C4_ALWAYS_INLINE C4_PURE size_t size() const noexcept { return len; }
202 
203  C4_ALWAYS_INLINE C4_PURE iterator begin() noexcept { return str; }
204  C4_ALWAYS_INLINE C4_PURE iterator end () noexcept { return str + len; }
205 
206  C4_ALWAYS_INLINE C4_PURE const_iterator begin() const noexcept { return str; }
207  C4_ALWAYS_INLINE C4_PURE const_iterator end () const noexcept { return str + len; }
208 
209  C4_ALWAYS_INLINE C4_PURE C * data() noexcept { return str; }
210  C4_ALWAYS_INLINE C4_PURE C const* data() const noexcept { return str; }
211 
212  C4_ALWAYS_INLINE C4_PURE C & operator[] (size_t i) noexcept { C4_ASSERT(i >= 0 && i < len); return str[i]; }
213  C4_ALWAYS_INLINE C4_PURE C const& operator[] (size_t i) const noexcept { C4_ASSERT(i >= 0 && i < len); return str[i]; }
214 
215  C4_ALWAYS_INLINE C4_PURE C & front() noexcept { C4_ASSERT(len > 0 && str != nullptr); return *str; }
216  C4_ALWAYS_INLINE C4_PURE C const& front() const noexcept { C4_ASSERT(len > 0 && str != nullptr); return *str; }
217 
218  C4_ALWAYS_INLINE C4_PURE C & back() noexcept { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); }
219  C4_ALWAYS_INLINE C4_PURE C const& back() const noexcept { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); }
220 
221  /** @} */
222 
223 public:
224 
225  /** @name Comparison methods */
226  /** @{ */
227 
228  C4_PURE int compare(C const c) const noexcept
229  {
230  C4_XASSERT((str != nullptr) || len == 0);
231  if(C4_LIKELY(str != nullptr && len > 0))
232  return (*str != c) ? *str - c : (static_cast<int>(len) - 1);
233  else
234  return -1;
235  }
236 
237  C4_PURE int compare(const char *C4_RESTRICT that, size_t sz) const noexcept
238  {
239  C4_XASSERT(that || sz == 0);
240  C4_XASSERT(str || len == 0);
241  if(C4_LIKELY(str && that))
242  {
243  {
244  const size_t min = len < sz ? len : sz;
245  for(size_t i = 0; i < min; ++i)
246  if(str[i] != that[i])
247  return str[i] < that[i] ? -1 : 1;
248  }
249  if(len < sz)
250  return -1;
251  else if(len == sz)
252  return 0;
253  else
254  return 1;
255  }
256  else if(len == sz)
257  {
258  C4_XASSERT(len == 0 && sz == 0);
259  return 0;
260  }
261  return len < sz ? -1 : 1;
262  }
263 
264  C4_ALWAYS_INLINE C4_PURE int compare(ro_substr const that) const noexcept { return this->compare(that.str, that.len); }
265 
266  C4_ALWAYS_INLINE C4_PURE bool operator== (std::nullptr_t) const noexcept { return str == nullptr; }
267  C4_ALWAYS_INLINE C4_PURE bool operator!= (std::nullptr_t) const noexcept { return str != nullptr; }
268 
269  C4_ALWAYS_INLINE C4_PURE bool operator== (C const c) const noexcept { return this->compare(c) == 0; }
270  C4_ALWAYS_INLINE C4_PURE bool operator!= (C const c) const noexcept { return this->compare(c) != 0; }
271  C4_ALWAYS_INLINE C4_PURE bool operator< (C const c) const noexcept { return this->compare(c) < 0; }
272  C4_ALWAYS_INLINE C4_PURE bool operator> (C const c) const noexcept { return this->compare(c) > 0; }
273  C4_ALWAYS_INLINE C4_PURE bool operator<= (C const c) const noexcept { return this->compare(c) <= 0; }
274  C4_ALWAYS_INLINE C4_PURE bool operator>= (C const c) const noexcept { return this->compare(c) >= 0; }
275 
276  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator== (basic_substring<U> const that) const noexcept { return this->compare(that) == 0; }
277  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator!= (basic_substring<U> const that) const noexcept { return this->compare(that) != 0; }
278  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator< (basic_substring<U> const that) const noexcept { return this->compare(that) < 0; }
279  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator> (basic_substring<U> const that) const noexcept { return this->compare(that) > 0; }
280  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator<= (basic_substring<U> const that) const noexcept { return this->compare(that) <= 0; }
281  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator>= (basic_substring<U> const that) const noexcept { return this->compare(that) >= 0; }
282 
283  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator== (const char (&that)[N]) const noexcept { return this->compare(that, N-1) == 0; }
284  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator!= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) != 0; }
285  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator< (const char (&that)[N]) const noexcept { return this->compare(that, N-1) < 0; }
286  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator> (const char (&that)[N]) const noexcept { return this->compare(that, N-1) > 0; }
287  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator<= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) <= 0; }
288  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator>= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) >= 0; }
289 
290  /** @} */
291 
292 public:
293 
294  /** @name Sub-selection methods */
295  /** @{ */
296 
297  /** true if *this is a substring of that (ie, from the same buffer) */
298  C4_ALWAYS_INLINE C4_PURE bool is_sub(ro_substr const that) const noexcept
299  {
300  return that.is_super(*this);
301  }
302 
303  /** true if that is a substring of *this (ie, from the same buffer) */
304  C4_ALWAYS_INLINE C4_PURE bool is_super(ro_substr const that) const noexcept
305  {
306  if(C4_LIKELY(len > 0))
307  return that.str >= str && that.str+that.len <= str+len;
308  else
309  return that.len == 0 && that.str == str && str != nullptr;
310  }
311 
312  /** true if there is overlap of at least one element between that and *this */
313  C4_ALWAYS_INLINE C4_PURE bool overlaps(ro_substr const that) const noexcept
314  {
315  // thanks @timwynants
316  return that.str+that.len > str && that.str < str+len;
317  }
318 
319 public:
320 
321  /** return [first,len[ */
322  C4_ALWAYS_INLINE C4_PURE basic_substring sub(size_t first) const noexcept
323  {
324  C4_ASSERT(first >= 0 && first <= len);
325  return basic_substring(str + first, len - first);
326  }
327 
328  /** return [first,first+num[. If num==npos, return [first,len[ */
329  C4_ALWAYS_INLINE C4_PURE basic_substring sub(size_t first, size_t num) const noexcept
330  {
331  C4_ASSERT(first >= 0 && first <= len);
332  C4_ASSERT((num >= 0 && num <= len) || (num == npos));
333  size_t rnum = num != npos ? num : len - first;
334  C4_ASSERT((first >= 0 && first + rnum <= len) || (num == 0));
335  return basic_substring(str + first, rnum);
336  }
337 
338  /** return [first,last[. If last==npos, return [first,len[ */
339  C4_ALWAYS_INLINE C4_PURE basic_substring range(size_t first, size_t last=npos) const noexcept
340  {
341  C4_ASSERT(first >= 0 && first <= len);
342  last = last != npos ? last : len;
343  C4_ASSERT(first <= last);
344  C4_ASSERT(last >= 0 && last <= len);
345  return basic_substring(str + first, last - first);
346  }
347 
348  /** return the first @p num elements: [0,num[*/
349  C4_ALWAYS_INLINE C4_PURE basic_substring first(size_t num) const noexcept
350  {
351  C4_ASSERT(num <= len || num == npos);
352  return basic_substring(str, num != npos ? num : len);
353  }
354 
355  /** return the last @p num elements: [len-num,len[*/
356  C4_ALWAYS_INLINE C4_PURE basic_substring last(size_t num) const noexcept
357  {
358  C4_ASSERT(num <= len || num == npos);
359  return num != npos ?
360  basic_substring(str + len - num, num) :
361  *this;
362  }
363 
364  /** offset from the ends: return [left,len-right[ ; ie, trim a
365  number of characters from the left and right. This is
366  equivalent to python's negative list indices. */
367  C4_ALWAYS_INLINE C4_PURE basic_substring offs(size_t left, size_t right) const noexcept
368  {
369  C4_ASSERT(left >= 0 && left <= len);
370  C4_ASSERT(right >= 0 && right <= len);
371  C4_ASSERT(left <= len - right + 1);
372  return basic_substring(str + left, len - right - left);
373  }
374 
375  /** return [0, pos[ . Same as .first(pos), but provided for compatibility with .right_of() */
376  C4_ALWAYS_INLINE C4_PURE basic_substring left_of(size_t pos) const noexcept
377  {
378  C4_ASSERT(pos <= len || pos == npos);
379  return (pos != npos) ?
380  basic_substring(str, pos) :
381  *this;
382  }
383 
384  /** return [0, pos+include_pos[ . Same as .first(pos+1), but provided for compatibility with .right_of() */
385  C4_ALWAYS_INLINE C4_PURE basic_substring left_of(size_t pos, bool include_pos) const noexcept
386  {
387  C4_ASSERT(pos <= len || pos == npos);
388  return (pos != npos) ?
389  basic_substring(str, pos+include_pos) :
390  *this;
391  }
392 
393  /** return [pos+1, len[ */
394  C4_ALWAYS_INLINE C4_PURE basic_substring right_of(size_t pos) const noexcept
395  {
396  C4_ASSERT(pos <= len || pos == npos);
397  return (pos != npos) ?
398  basic_substring(str + (pos + 1), len - (pos + 1)) :
399  basic_substring(str + len, size_t(0));
400  }
401 
402  /** return [pos+!include_pos, len[ */
403  C4_ALWAYS_INLINE C4_PURE basic_substring right_of(size_t pos, bool include_pos) const noexcept
404  {
405  C4_ASSERT(pos <= len || pos == npos);
406  return (pos != npos) ?
407  basic_substring(str + (pos + !include_pos), len - (pos + !include_pos)) :
408  basic_substring(str + len, size_t(0));
409  }
410 
411 public:
412 
413  /** given @p subs a substring of the current string, get the
414  * portion of the current string to the left of it */
415  C4_ALWAYS_INLINE C4_PURE basic_substring left_of(ro_substr const subs) const noexcept
416  {
417  C4_ASSERT(is_super(subs) || subs.empty());
418  auto ssb = subs.begin();
419  auto b = begin();
420  auto e = end();
421  if(ssb >= b && ssb <= e)
422  return sub(0, static_cast<size_t>(ssb - b));
423  else
424  return sub(0, 0);
425  }
426 
427  /** given @p subs a substring of the current string, get the
428  * portion of the current string to the right of it */
429  C4_ALWAYS_INLINE C4_PURE basic_substring right_of(ro_substr const subs) const noexcept
430  {
431  C4_ASSERT(is_super(subs) || subs.empty());
432  auto sse = subs.end();
433  auto b = begin();
434  auto e = end();
435  if(sse >= b && sse <= e)
436  return sub(static_cast<size_t>(sse - b), static_cast<size_t>(e - sse));
437  else
438  return sub(0, 0);
439  }
440 
441  /** @} */
442 
443 public:
444 
445  /** @name Removing characters (trim()) / patterns (strip()) from the tips of the string */
446  /** @{ */
447 
448  /** trim left */
449  basic_substring triml(const C c) const
450  {
451  if( ! empty())
452  {
453  size_t pos = first_not_of(c);
454  if(pos != npos)
455  return sub(pos);
456  }
457  return sub(0, 0);
458  }
459  /** trim left ANY of the characters.
460  * @see stripl() to remove a pattern from the left */
462  {
463  if( ! empty())
464  {
465  size_t pos = first_not_of(chars);
466  if(pos != npos)
467  return sub(pos);
468  }
469  return sub(0, 0);
470  }
471 
472  /** trim the character c from the right */
473  basic_substring trimr(const C c) const
474  {
475  if( ! empty())
476  {
477  size_t pos = last_not_of(c, npos);
478  if(pos != npos)
479  return sub(0, pos+1);
480  }
481  return sub(0, 0);
482  }
483  /** trim right ANY of the characters
484  * @see stripr() to remove a pattern from the right */
486  {
487  if( ! empty())
488  {
489  size_t pos = last_not_of(chars, npos);
490  if(pos != npos)
491  return sub(0, pos+1);
492  }
493  return sub(0, 0);
494  }
495 
496  /** trim the character c left and right */
497  basic_substring trim(const C c) const
498  {
499  return triml(c).trimr(c);
500  }
501  /** trim left and right ANY of the characters
502  * @see strip() to remove a pattern from the left and right */
503  basic_substring trim(ro_substr const chars) const
504  {
505  return triml(chars).trimr(chars);
506  }
507 
508  /** remove a pattern from the left
509  * @see triml() to remove characters*/
511  {
512  if( ! begins_with(pattern))
513  return *this;
514  return sub(pattern.len < len ? pattern.len : len);
515  }
516 
517  /** remove a pattern from the right
518  * @see trimr() to remove characters*/
520  {
521  if( ! ends_with(pattern))
522  return *this;
523  return left_of(len - (pattern.len < len ? pattern.len : len));
524  }
525 
526  /** @} */
527 
528 public:
529 
530  /** @name Lookup methods */
531  /** @{ */
532 
533  inline size_t find(const C c, size_t start_pos=0) const
534  {
535  return first_of(c, start_pos);
536  }
537  inline size_t find(ro_substr pattern, size_t start_pos=0) const
538  {
539  C4_ASSERT(start_pos == npos || (start_pos >= 0 && start_pos <= len));
540  if(len < pattern.len) return npos;
541  for(size_t i = start_pos, e = len - pattern.len + 1; i < e; ++i)
542  {
543  bool gotit = true;
544  for(size_t j = 0; j < pattern.len; ++j)
545  {
546  C4_ASSERT(i + j < len);
547  if(str[i + j] != pattern.str[j])
548  {
549  gotit = false;
550  break;
551  }
552  }
553  if(gotit)
554  {
555  return i;
556  }
557  }
558  return npos;
559  }
560 
561 public:
562 
563  /** count the number of occurrences of c */
564  inline size_t count(const C c, size_t pos=0) const
565  {
566  C4_ASSERT(pos >= 0 && pos <= len);
567  size_t num = 0;
568  pos = find(c, pos);
569  while(pos != npos)
570  {
571  ++num;
572  pos = find(c, pos + 1);
573  }
574  return num;
575  }
576 
577  /** count the number of occurrences of s */
578  inline size_t count(ro_substr c, size_t pos=0) const
579  {
580  C4_ASSERT(pos >= 0 && pos <= len);
581  size_t num = 0;
582  pos = find(c, pos);
583  while(pos != npos)
584  {
585  ++num;
586  pos = find(c, pos + c.len);
587  }
588  return num;
589  }
590 
591  /** get the substr consisting of the first occurrence of @p c after @p pos, or an empty substr if none occurs */
592  inline basic_substring select(const C c, size_t pos=0) const
593  {
594  pos = find(c, pos);
595  return pos != npos ? sub(pos, 1) : basic_substring();
596  }
597 
598  /** get the substr consisting of the first occurrence of @p pattern after @p pos, or an empty substr if none occurs */
599  inline basic_substring select(ro_substr pattern, size_t pos=0) const
600  {
601  pos = find(pattern, pos);
602  return pos != npos ? sub(pos, pattern.len) : basic_substring();
603  }
604 
605 public:
606 
608  {
609  size_t which;
610  size_t pos;
611  inline operator bool() const { return which != NONE && pos != npos; }
612  };
613 
615  {
616  ro_substr s[2] = {s0, s1};
617  return first_of_any_iter(&s[0], &s[0] + 2);
618  }
619 
621  {
622  ro_substr s[3] = {s0, s1, s2};
623  return first_of_any_iter(&s[0], &s[0] + 3);
624  }
625 
627  {
628  ro_substr s[4] = {s0, s1, s2, s3};
629  return first_of_any_iter(&s[0], &s[0] + 4);
630  }
631 
633  {
634  ro_substr s[5] = {s0, s1, s2, s3, s4};
635  return first_of_any_iter(&s[0], &s[0] + 5);
636  }
637 
638  template<class It>
639  first_of_any_result first_of_any_iter(It first_span, It last_span) const
640  {
641  for(size_t i = 0; i < len; ++i)
642  {
643  size_t curr = 0;
644  for(It it = first_span; it != last_span; ++curr, ++it)
645  {
646  auto const& chars = *it;
647  if((i + chars.len) > len) continue;
648  bool gotit = true;
649  for(size_t j = 0; j < chars.len; ++j)
650  {
651  C4_ASSERT(i + j < len);
652  if(str[i + j] != chars[j])
653  {
654  gotit = false;
655  break;
656  }
657  }
658  if(gotit)
659  {
660  return {curr, i};
661  }
662  }
663  }
664  return {NONE, npos};
665  }
666 
667 public:
668 
669  /** true if the first character of the string is @p c */
670  bool begins_with(const C c) const
671  {
672  return len > 0 ? str[0] == c : false;
673  }
674 
675  /** true if the first @p num characters of the string are @p c */
676  bool begins_with(const C c, size_t num) const
677  {
678  if(len < num)
679  {
680  return false;
681  }
682  for(size_t i = 0; i < num; ++i)
683  {
684  if(str[i] != c)
685  {
686  return false;
687  }
688  }
689  return true;
690  }
691 
692  /** true if the string begins with the given @p pattern */
693  bool begins_with(ro_substr pattern) const
694  {
695  if(len < pattern.len)
696  {
697  return false;
698  }
699  for(size_t i = 0; i < pattern.len; ++i)
700  {
701  if(str[i] != pattern[i])
702  {
703  return false;
704  }
705  }
706  return true;
707  }
708 
709  /** true if the first character of the string is any of the given @p chars */
710  bool begins_with_any(ro_substr chars) const
711  {
712  if(len == 0)
713  {
714  return false;
715  }
716  for(size_t i = 0; i < chars.len; ++i)
717  {
718  if(str[0] == chars.str[i])
719  {
720  return true;
721  }
722  }
723  return false;
724  }
725 
726  /** true if the last character of the string is @p c */
727  bool ends_with(const C c) const
728  {
729  return len > 0 ? str[len-1] == c : false;
730  }
731 
732  /** true if the last @p num characters of the string are @p c */
733  bool ends_with(const C c, size_t num) const
734  {
735  if(len < num)
736  {
737  return false;
738  }
739  for(size_t i = len - num; i < len; ++i)
740  {
741  if(str[i] != c)
742  {
743  return false;
744  }
745  }
746  return true;
747  }
748 
749  /** true if the string ends with the given @p pattern */
750  bool ends_with(ro_substr pattern) const
751  {
752  if(len < pattern.len)
753  {
754  return false;
755  }
756  for(size_t i = 0, s = len-pattern.len; i < pattern.len; ++i)
757  {
758  if(str[s+i] != pattern[i])
759  {
760  return false;
761  }
762  }
763  return true;
764  }
765 
766  /** true if the last character of the string is any of the given @p chars */
767  bool ends_with_any(ro_substr chars) const
768  {
769  if(len == 0)
770  {
771  return false;
772  }
773  for(size_t i = 0; i < chars.len; ++i)
774  {
775  if(str[len - 1] == chars[i])
776  {
777  return true;
778  }
779  }
780  return false;
781  }
782 
783 public:
784 
785  /** @return the first position where c is found in the string, or npos if none is found */
786  size_t first_of(const C c, size_t start=0) const
787  {
788  C4_ASSERT(start == npos || (start >= 0 && start <= len));
789  for(size_t i = start; i < len; ++i)
790  {
791  if(str[i] == c)
792  return i;
793  }
794  return npos;
795  }
796 
797  /** @return the last position where c is found in the string, or npos if none is found */
798  size_t last_of(const C c, size_t start=npos) const
799  {
800  C4_ASSERT(start == npos || (start >= 0 && start <= len));
801  if(start == npos)
802  start = len;
803  for(size_t i = start-1; i != size_t(-1); --i)
804  {
805  if(str[i] == c)
806  return i;
807  }
808  return npos;
809  }
810 
811  /** @return the first position where ANY of the chars is found in the string, or npos if none is found */
812  size_t first_of(ro_substr chars, size_t start=0) const
813  {
814  C4_ASSERT(start == npos || (start >= 0 && start <= len));
815  for(size_t i = start; i < len; ++i)
816  {
817  for(size_t j = 0; j < chars.len; ++j)
818  {
819  if(str[i] == chars[j])
820  return i;
821  }
822  }
823  return npos;
824  }
825 
826  /** @return the last position where ANY of the chars is found in the string, or npos if none is found */
827  size_t last_of(ro_substr chars, size_t start=npos) const
828  {
829  C4_ASSERT(start == npos || (start >= 0 && start <= len));
830  if(start == npos)
831  start = len;
832  for(size_t i = start-1; i != size_t(-1); --i)
833  {
834  for(size_t j = 0; j < chars.len; ++j)
835  {
836  if(str[i] == chars[j])
837  return i;
838  }
839  }
840  return npos;
841  }
842 
843 public:
844 
845  size_t first_not_of(const C c) const
846  {
847  for(size_t i = 0; i < len; ++i)
848  {
849  if(str[i] != c)
850  return i;
851  }
852  return npos;
853  }
854 
855  size_t first_not_of(const C c, size_t start) const
856  {
857  C4_ASSERT((start >= 0 && start <= len) || (start == len && len == 0));
858  for(size_t i = start; i < len; ++i)
859  {
860  if(str[i] != c)
861  return i;
862  }
863  return npos;
864  }
865 
866  size_t last_not_of(const C c) const
867  {
868  for(size_t i = len-1; i != size_t(-1); --i)
869  {
870  if(str[i] != c)
871  return i;
872  }
873  return npos;
874  }
875 
876  size_t last_not_of(const C c, size_t start) const
877  {
878  C4_ASSERT(start == npos || (start >= 0 && start <= len));
879  if(start == npos)
880  start = len;
881  for(size_t i = start-1; i != size_t(-1); --i)
882  {
883  if(str[i] != c)
884  return i;
885  }
886  return npos;
887  }
888 
889  size_t first_not_of(ro_substr chars) const
890  {
891  for(size_t i = 0; i < len; ++i)
892  {
893  bool gotit = true;
894  for(size_t j = 0; j < chars.len; ++j)
895  {
896  if(str[i] == chars.str[j])
897  {
898  gotit = false;
899  break;
900  }
901  }
902  if(gotit)
903  {
904  return i;
905  }
906  }
907  return npos;
908  }
909 
910  size_t first_not_of(ro_substr chars, size_t start) const
911  {
912  C4_ASSERT((start >= 0 && start <= len) || (start == len && len == 0));
913  for(size_t i = start; i < len; ++i)
914  {
915  bool gotit = true;
916  for(size_t j = 0; j < chars.len; ++j)
917  {
918  if(str[i] == chars.str[j])
919  {
920  gotit = false;
921  break;
922  }
923  }
924  if(gotit)
925  {
926  return i;
927  }
928  }
929  return npos;
930  }
931 
932  size_t last_not_of(ro_substr chars) const
933  {
934  for(size_t i = len-1; i != size_t(-1); --i)
935  {
936  bool gotit = true;
937  for(size_t j = 0; j < chars.len; ++j)
938  {
939  if(str[i] == chars.str[j])
940  {
941  gotit = false;
942  break;
943  }
944  }
945  if(gotit)
946  {
947  return i;
948  }
949  }
950  return npos;
951  }
952 
953  size_t last_not_of(ro_substr chars, size_t start) const
954  {
955  C4_ASSERT(start == npos || (start >= 0 && start <= len));
956  if(start == npos)
957  start = len;
958  for(size_t i = start-1; i != size_t(-1); --i)
959  {
960  bool gotit = true;
961  for(size_t j = 0; j < chars.len; ++j)
962  {
963  if(str[i] == chars.str[j])
964  {
965  gotit = false;
966  break;
967  }
968  }
969  if(gotit)
970  {
971  return i;
972  }
973  }
974  return npos;
975  }
976 
977  /** @} */
978 
979 public:
980 
981  /** @name Range lookup methods */
982  /** @{ */
983 
984  /** get the range delimited by an open-close pair of characters.
985  * @note There must be no nested pairs.
986  * @note No checks for escapes are performed. */
987  basic_substring pair_range(CC open, CC close) const
988  {
989  size_t b = find(open);
990  if(b == npos)
991  return basic_substring();
992  size_t e = find(close, b+1);
993  if(e == npos)
994  return basic_substring();
995  basic_substring ret = range(b, e+1);
996  C4_ASSERT(ret.sub(1).find(open) == npos);
997  return ret;
998  }
999 
1000  /** get the range delimited by a single open-close character (eg, quotes).
1001  * @note The open-close character can be escaped. */
1002  basic_substring pair_range_esc(CC open_close, CC escape=CC('\\'))
1003  {
1004  size_t b = find(open_close);
1005  if(b == npos) return basic_substring();
1006  for(size_t i = b+1; i < len; ++i)
1007  {
1008  CC c = str[i];
1009  if(c == open_close)
1010  {
1011  if(str[i-1] != escape)
1012  {
1013  return range(b, i+1);
1014  }
1015  }
1016  }
1017  return basic_substring();
1018  }
1019 
1020  /** get the range delimited by an open-close pair of characters,
1021  * with possibly nested occurrences. No checks for escapes are
1022  * performed. */
1024  {
1025  size_t b = find(open);
1026  if(b == npos) return basic_substring();
1027  size_t e, curr = b+1, count = 0;
1028  const char both[] = {open, close, '\0'};
1029  while((e = first_of(both, curr)) != npos)
1030  {
1031  if(str[e] == open)
1032  {
1033  ++count;
1034  curr = e+1;
1035  }
1036  else if(str[e] == close)
1037  {
1038  if(count == 0) return range(b, e+1);
1039  --count;
1040  curr = e+1;
1041  }
1042  }
1043  return basic_substring();
1044  }
1045 
1047  {
1048  constexpr const C dq('"'), sq('\'');
1049  if(len >= 2 && (str[len - 2] != C('\\')) &&
1050  ((begins_with(sq) && ends_with(sq))
1051  ||
1052  (begins_with(dq) && ends_with(dq))))
1053  {
1054  return range(1, len -1);
1055  }
1056  return *this;
1057  }
1058 
1059  /** @} */
1060 
1061 public:
1062 
1063  /** @name Number-matching query methods */
1064  /** @{ */
1065 
1066  /** @return true if the substring contents are a floating-point or integer number.
1067  * @note any leading or trailing whitespace will return false. */
1068  bool is_number() const
1069  {
1070  if(empty() || (first_non_empty_span().empty()))
1071  return false;
1072  if(first_uint_span() == *this)
1073  return true;
1074  if(first_int_span() == *this)
1075  return true;
1076  if(first_real_span() == *this)
1077  return true;
1078  return false;
1079  }
1080 
1081  /** @return true if the substring contents are a real number.
1082  * @note any leading or trailing whitespace will return false. */
1083  bool is_real() const
1084  {
1085  if(empty() || (first_non_empty_span().empty()))
1086  return false;
1087  if(first_real_span() == *this)
1088  return true;
1089  return false;
1090  }
1091 
1092  /** @return true if the substring contents are an integer number.
1093  * @note any leading or trailing whitespace will return false. */
1094  bool is_integer() const
1095  {
1096  if(empty() || (first_non_empty_span().empty()))
1097  return false;
1098  if(first_uint_span() == *this)
1099  return true;
1100  if(first_int_span() == *this)
1101  return true;
1102  return false;
1103  }
1104 
1105  /** @return true if the substring contents are an unsigned integer number.
1106  * @note any leading or trailing whitespace will return false. */
1107  bool is_unsigned_integer() const
1108  {
1109  if(empty() || (first_non_empty_span().empty()))
1110  return false;
1111  if(first_uint_span() == *this)
1112  return true;
1113  return false;
1114  }
1115 
1116  /** get the first span consisting exclusively of non-empty characters */
1118  {
1119  constexpr const ro_substr empty_chars(" \n\r\t");
1120  size_t pos = first_not_of(empty_chars);
1121  if(pos == npos)
1122  return first(0);
1123  auto ret = sub(pos);
1124  pos = ret.first_of(empty_chars);
1125  return ret.first(pos);
1126  }
1127 
1128  /** get the first span which can be interpreted as an unsigned integer */
1130  {
1131  basic_substring ne = first_non_empty_span();
1132  if(ne.empty())
1133  return ne;
1134  if(ne.str[0] == '-')
1135  return first(0);
1136  size_t skip_start = size_t(ne.str[0] == '+');
1137  return ne._first_integral_span(skip_start);
1138  }
1139 
1140  /** get the first span which can be interpreted as a signed integer */
1142  {
1143  basic_substring ne = first_non_empty_span();
1144  if(ne.empty())
1145  return ne;
1146  size_t skip_start = size_t(ne.str[0] == '+' || ne.str[0] == '-');
1147  return ne._first_integral_span(skip_start);
1148  }
1149 
1150  basic_substring _first_integral_span(size_t skip_start) const
1151  {
1152  C4_ASSERT(!empty());
1153  if(skip_start == len)
1154  return first(0);
1155  C4_ASSERT(skip_start < len);
1156  if(len >= skip_start + 3)
1157  {
1158  if(str[skip_start] != '0')
1159  {
1160  for(size_t i = skip_start; i < len; ++i)
1161  {
1162  char c = str[i];
1163  if(c < '0' || c > '9')
1164  return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
1165  }
1166  }
1167  else
1168  {
1169  char next = str[skip_start + 1];
1170  if(next == 'x' || next == 'X')
1171  {
1172  skip_start += 2;
1173  for(size_t i = skip_start; i < len; ++i)
1174  {
1175  const char c = str[i];
1176  if( ! _is_hex_char(c))
1177  return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
1178  }
1179  return *this;
1180  }
1181  else if(next == 'b' || next == 'B')
1182  {
1183  skip_start += 2;
1184  for(size_t i = skip_start; i < len; ++i)
1185  {
1186  const char c = str[i];
1187  if(c != '0' && c != '1')
1188  return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
1189  }
1190  return *this;
1191  }
1192  else if(next == 'o' || next == 'O')
1193  {
1194  skip_start += 2;
1195  for(size_t i = skip_start; i < len; ++i)
1196  {
1197  const char c = str[i];
1198  if(c < '0' || c > '7')
1199  return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
1200  }
1201  return *this;
1202  }
1203  }
1204  }
1205  // must be a decimal, or it is not a an number
1206  for(size_t i = skip_start; i < len; ++i)
1207  {
1208  const char c = str[i];
1209  if(c < '0' || c > '9')
1210  return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
1211  }
1212  return *this;
1213  }
1214 
1215  /** get the first span which can be interpreted as a real (floating-point) number */
1217  {
1218  basic_substring ne = first_non_empty_span();
1219  if(ne.empty())
1220  return ne;
1221  const size_t skip_start = (ne.str[0] == '+' || ne.str[0] == '-');
1222  C4_ASSERT(skip_start == 0 || skip_start == 1);
1223  // if we have at least three digits after the leading sign, it
1224  // can be decimal, or hex, or bin or oct. Ex:
1225  // non-decimal: 0x0, 0b0, 0o0
1226  // decimal: 1.0, 10., 1e1, 100, inf, nan, infinity
1227  if(ne.len >= skip_start+3)
1228  {
1229  // if it does not have leading 0, it must be decimal, or it is not a real
1230  if(ne.str[skip_start] != '0')
1231  {
1232  if(ne.str[skip_start] == 'i') // is it infinity or inf?
1233  {
1234  basic_substring word = ne._word_follows(skip_start + 1, "nfinity");
1235  if(word.len)
1236  return word;
1237  return ne._word_follows(skip_start + 1, "nf");
1238  }
1239  else if(ne.str[skip_start] == 'n') // is it nan?
1240  {
1241  return ne._word_follows(skip_start + 1, "an");
1242  }
1243  else // must be a decimal, or it is not a real
1244  {
1245  return ne._first_real_span_dec(skip_start);
1246  }
1247  }
1248  else // starts with 0. is it 0x, 0b or 0o?
1249  {
1250  const char next = ne.str[skip_start + 1];
1251  // hexadecimal
1252  if(next == 'x' || next == 'X')
1253  return ne._first_real_span_hex(skip_start + 2);
1254  // binary
1255  else if(next == 'b' || next == 'B')
1256  return ne._first_real_span_bin(skip_start + 2);
1257  // octal
1258  else if(next == 'o' || next == 'O')
1259  return ne._first_real_span_oct(skip_start + 2);
1260  // none of the above. may still be a decimal.
1261  else
1262  return ne._first_real_span_dec(skip_start); // do not skip the 0.
1263  }
1264  }
1265  // less than 3 chars after the leading sign. It is either a
1266  // decimal or it is not a real. (cannot be any of 0x0, etc).
1267  return ne._first_real_span_dec(skip_start);
1268  }
1269 
1270  /** true if the character is a delimiter character *at the end* */
1271  static constexpr C4_ALWAYS_INLINE C4_CONST bool _is_delim_char(char c) noexcept
1272  {
1273  return c == ' ' || c == '\n'
1274  || c == ']' || c == ')' || c == '}'
1275  || c == ',' || c == ';' || c == '\r' || c == '\t' || c == '\0';
1276  }
1277 
1278  /** true if the character is in [0-9a-fA-F] */
1279  static constexpr C4_ALWAYS_INLINE C4_CONST bool _is_hex_char(char c) noexcept
1280  {
1281  return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
1282  }
1283 
1284  C4_NO_INLINE C4_PURE basic_substring _word_follows(size_t pos, csubstr word) const noexcept
1285  {
1286  size_t posend = pos + word.len;
1287  if(len >= posend && sub(pos, word.len) == word)
1288  if(len == posend || _is_delim_char(str[posend]))
1289  return first(posend);
1290  return first(0);
1291  }
1292 
1293  // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
1294  C4_NO_INLINE C4_PURE basic_substring _first_real_span_dec(size_t pos) const noexcept
1295  {
1296  bool intchars = false;
1297  bool fracchars = false;
1298  bool powchars;
1299  // integral part
1300  for( ; pos < len; ++pos)
1301  {
1302  const char c = str[pos];
1303  if(c >= '0' && c <= '9')
1304  {
1305  intchars = true;
1306  }
1307  else if(c == '.')
1308  {
1309  ++pos;
1310  goto fractional_part_dec;
1311  }
1312  else if(c == 'e' || c == 'E')
1313  {
1314  ++pos;
1315  goto power_part_dec;
1316  }
1317  else if(_is_delim_char(c))
1318  {
1319  return intchars ? first(pos) : first(0);
1320  }
1321  else
1322  {
1323  return first(0);
1324  }
1325  }
1326  // no . or p were found; this is either an integral number
1327  // or not a number at all
1328  return intchars ?
1329  *this :
1330  first(0);
1331  fractional_part_dec:
1332  C4_ASSERT(pos > 0);
1333  C4_ASSERT(str[pos - 1] == '.');
1334  for( ; pos < len; ++pos)
1335  {
1336  const char c = str[pos];
1337  if(c >= '0' && c <= '9')
1338  {
1339  fracchars = true;
1340  }
1341  else if(c == 'e' || c == 'E')
1342  {
1343  ++pos;
1344  goto power_part_dec;
1345  }
1346  else if(_is_delim_char(c))
1347  {
1348  return intchars || fracchars ? first(pos) : first(0);
1349  }
1350  else
1351  {
1352  return first(0);
1353  }
1354  }
1355  return intchars || fracchars ?
1356  *this :
1357  first(0);
1358  power_part_dec:
1359  C4_ASSERT(pos > 0);
1360  C4_ASSERT(str[pos - 1] == 'e' || str[pos - 1] == 'E');
1361  // either digits, or +, or - are expected here, followed by more digits.
1362  if((len == pos) || ((!intchars) && (!fracchars)))
1363  return first(0);
1364  if(str[pos] == '-' || str[pos] == '+')
1365  ++pos; // skip the sign
1366  powchars = false;
1367  for( ; pos < len; ++pos)
1368  {
1369  const char c = str[pos];
1370  if(c >= '0' && c <= '9')
1371  powchars = true;
1372  else if(powchars && _is_delim_char(c))
1373  return first(pos);
1374  else
1375  return first(0);
1376  }
1377  return powchars ? *this : first(0);
1378  }
1379 
1380  // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
1381  C4_NO_INLINE C4_PURE basic_substring _first_real_span_hex(size_t pos) const noexcept
1382  {
1383  bool intchars = false;
1384  bool fracchars = false;
1385  bool powchars;
1386  // integral part
1387  for( ; pos < len; ++pos)
1388  {
1389  const char c = str[pos];
1390  if(_is_hex_char(c))
1391  {
1392  intchars = true;
1393  }
1394  else if(c == '.')
1395  {
1396  ++pos;
1397  goto fractional_part_hex;
1398  }
1399  else if(c == 'p' || c == 'P')
1400  {
1401  ++pos;
1402  goto power_part_hex;
1403  }
1404  else if(_is_delim_char(c))
1405  {
1406  return intchars ? first(pos) : first(0);
1407  }
1408  else
1409  {
1410  return first(0);
1411  }
1412  }
1413  // no . or p were found; this is either an integral number
1414  // or not a number at all
1415  return intchars ?
1416  *this :
1417  first(0);
1418  fractional_part_hex:
1419  C4_ASSERT(pos > 0);
1420  C4_ASSERT(str[pos - 1] == '.');
1421  for( ; pos < len; ++pos)
1422  {
1423  const char c = str[pos];
1424  if(_is_hex_char(c))
1425  {
1426  fracchars = true;
1427  }
1428  else if(c == 'p' || c == 'P')
1429  {
1430  ++pos;
1431  goto power_part_hex;
1432  }
1433  else if(_is_delim_char(c))
1434  {
1435  return intchars || fracchars ? first(pos) : first(0);
1436  }
1437  else
1438  {
1439  return first(0);
1440  }
1441  }
1442  return intchars || fracchars ?
1443  *this :
1444  first(0);
1445  power_part_hex:
1446  C4_ASSERT(pos > 0);
1447  C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P');
1448  // either a + or a - is expected here, followed by more chars.
1449  // also, using (pos+1) in this check will cause an early
1450  // return when no more chars follow the sign.
1451  if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars)))
1452  return first(0);
1453  ++pos; // this was the sign.
1454  // ... so the (pos+1) ensures that we enter the loop and
1455  // hence that there exist chars in the power part
1456  powchars = false;
1457  for( ; pos < len; ++pos)
1458  {
1459  const char c = str[pos];
1460  if(c >= '0' && c <= '9')
1461  powchars = true;
1462  else if(powchars && _is_delim_char(c))
1463  return first(pos);
1464  else
1465  return first(0);
1466  }
1467  return *this;
1468  }
1469 
1470  // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
1471  C4_NO_INLINE C4_PURE basic_substring _first_real_span_bin(size_t pos) const noexcept
1472  {
1473  bool intchars = false;
1474  bool fracchars = false;
1475  bool powchars;
1476  // integral part
1477  for( ; pos < len; ++pos)
1478  {
1479  const char c = str[pos];
1480  if(c == '0' || c == '1')
1481  {
1482  intchars = true;
1483  }
1484  else if(c == '.')
1485  {
1486  ++pos;
1487  goto fractional_part_bin;
1488  }
1489  else if(c == 'p' || c == 'P')
1490  {
1491  ++pos;
1492  goto power_part_bin;
1493  }
1494  else if(_is_delim_char(c))
1495  {
1496  return intchars ? first(pos) : first(0);
1497  }
1498  else
1499  {
1500  return first(0);
1501  }
1502  }
1503  // no . or p were found; this is either an integral number
1504  // or not a number at all
1505  return intchars ?
1506  *this :
1507  first(0);
1508  fractional_part_bin:
1509  C4_ASSERT(pos > 0);
1510  C4_ASSERT(str[pos - 1] == '.');
1511  for( ; pos < len; ++pos)
1512  {
1513  const char c = str[pos];
1514  if(c == '0' || c == '1')
1515  {
1516  fracchars = true;
1517  }
1518  else if(c == 'p' || c == 'P')
1519  {
1520  ++pos;
1521  goto power_part_bin;
1522  }
1523  else if(_is_delim_char(c))
1524  {
1525  return intchars || fracchars ? first(pos) : first(0);
1526  }
1527  else
1528  {
1529  return first(0);
1530  }
1531  }
1532  return intchars || fracchars ?
1533  *this :
1534  first(0);
1535  power_part_bin:
1536  C4_ASSERT(pos > 0);
1537  C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P');
1538  // either a + or a - is expected here, followed by more chars.
1539  // also, using (pos+1) in this check will cause an early
1540  // return when no more chars follow the sign.
1541  if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars)))
1542  return first(0);
1543  ++pos; // this was the sign.
1544  // ... so the (pos+1) ensures that we enter the loop and
1545  // hence that there exist chars in the power part
1546  powchars = false;
1547  for( ; pos < len; ++pos)
1548  {
1549  const char c = str[pos];
1550  if(c >= '0' && c <= '9')
1551  powchars = true;
1552  else if(powchars && _is_delim_char(c))
1553  return first(pos);
1554  else
1555  return first(0);
1556  }
1557  return *this;
1558  }
1559 
1560  // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
1561  C4_NO_INLINE C4_PURE basic_substring _first_real_span_oct(size_t pos) const noexcept
1562  {
1563  bool intchars = false;
1564  bool fracchars = false;
1565  bool powchars;
1566  // integral part
1567  for( ; pos < len; ++pos)
1568  {
1569  const char c = str[pos];
1570  if(c >= '0' && c <= '7')
1571  {
1572  intchars = true;
1573  }
1574  else if(c == '.')
1575  {
1576  ++pos;
1577  goto fractional_part_oct;
1578  }
1579  else if(c == 'p' || c == 'P')
1580  {
1581  ++pos;
1582  goto power_part_oct;
1583  }
1584  else if(_is_delim_char(c))
1585  {
1586  return intchars ? first(pos) : first(0);
1587  }
1588  else
1589  {
1590  return first(0);
1591  }
1592  }
1593  // no . or p were found; this is either an integral number
1594  // or not a number at all
1595  return intchars ?
1596  *this :
1597  first(0);
1598  fractional_part_oct:
1599  C4_ASSERT(pos > 0);
1600  C4_ASSERT(str[pos - 1] == '.');
1601  for( ; pos < len; ++pos)
1602  {
1603  const char c = str[pos];
1604  if(c >= '0' && c <= '7')
1605  {
1606  fracchars = true;
1607  }
1608  else if(c == 'p' || c == 'P')
1609  {
1610  ++pos;
1611  goto power_part_oct;
1612  }
1613  else if(_is_delim_char(c))
1614  {
1615  return intchars || fracchars ? first(pos) : first(0);
1616  }
1617  else
1618  {
1619  return first(0);
1620  }
1621  }
1622  return intchars || fracchars ?
1623  *this :
1624  first(0);
1625  power_part_oct:
1626  C4_ASSERT(pos > 0);
1627  C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P');
1628  // either a + or a - is expected here, followed by more chars.
1629  // also, using (pos+1) in this check will cause an early
1630  // return when no more chars follow the sign.
1631  if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars)))
1632  return first(0);
1633  ++pos; // this was the sign.
1634  // ... so the (pos+1) ensures that we enter the loop and
1635  // hence that there exist chars in the power part
1636  powchars = false;
1637  for( ; pos < len; ++pos)
1638  {
1639  const char c = str[pos];
1640  if(c >= '0' && c <= '9')
1641  powchars = true;
1642  else if(powchars && _is_delim_char(c))
1643  return first(pos);
1644  else
1645  return first(0);
1646  }
1647  return *this;
1648  }
1649 
1650  /** @} */
1651 
1652 public:
1653 
1654  /** @name Splitting methods */
1655  /** @{ */
1656 
1657  /** returns true if the string has not been exhausted yet, meaning
1658  * it's ok to call next_split() again. When no instance of sep
1659  * exists in the string, returns the full string. When the input
1660  * is an empty string, the output string is the empty string. */
1661  bool next_split(C sep, size_t *C4_RESTRICT start_pos, basic_substring *C4_RESTRICT out) const
1662  {
1663  if(C4_LIKELY(*start_pos < len))
1664  {
1665  for(size_t i = *start_pos; i < len; i++)
1666  {
1667  if(str[i] == sep)
1668  {
1669  out->assign(str + *start_pos, i - *start_pos);
1670  *start_pos = i+1;
1671  return true;
1672  }
1673  }
1674  out->assign(str + *start_pos, len - *start_pos);
1675  *start_pos = len + 1;
1676  return true;
1677  }
1678  else
1679  {
1680  bool valid = len > 0 && (*start_pos == len);
1681  if(valid && str && str[len-1] == sep)
1682  {
1683  out->assign(str + len, size_t(0)); // the cast is needed to prevent overload ambiguity
1684  }
1685  else
1686  {
1687  out->assign(str + len + 1, size_t(0)); // the cast is needed to prevent overload ambiguity
1688  }
1689  *start_pos = len + 1;
1690  return valid;
1691  }
1692  }
1693 
1694 private:
1695 
1696  struct split_proxy_impl
1697  {
1699  {
1700  split_proxy_impl const* m_proxy;
1702  size_t m_pos;
1704 
1705  split_iterator_impl(split_proxy_impl const* proxy, size_t pos, C sep)
1706  : m_proxy(proxy), m_pos(pos), m_sep(sep)
1707  {
1708  _tick();
1709  }
1710 
1711  void _tick()
1712  {
1713  m_proxy->m_str.next_split(m_sep, &m_pos, &m_str);
1714  }
1715 
1716  split_iterator_impl& operator++ () { _tick(); return *this; }
1717  split_iterator_impl operator++ (int) { split_iterator_impl it = *this; _tick(); return it; }
1718 
1719  basic_substring& operator* () { return m_str; }
1720  basic_substring* operator-> () { return &m_str; }
1721 
1722  bool operator!= (split_iterator_impl const& that) const
1723  {
1724  return !(this->operator==(that));
1725  }
1726  bool operator== (split_iterator_impl const& that) const
1727  {
1728  C4_XASSERT((m_sep == that.m_sep) && "cannot compare split iterators with different separators");
1729  if(m_str.size() != that.m_str.size())
1730  return false;
1731  if(m_str.data() != that.m_str.data())
1732  return false;
1733  return m_pos == that.m_pos;
1734  }
1735  };
1736 
1737  basic_substring m_str;
1738  size_t m_start_pos;
1739  C m_sep;
1740 
1741  split_proxy_impl(basic_substring str_, size_t start_pos, C sep)
1742  : m_str(str_), m_start_pos(start_pos), m_sep(sep)
1743  {
1744  }
1745 
1746  split_iterator_impl begin() const
1747  {
1748  auto it = split_iterator_impl(this, m_start_pos, m_sep);
1749  return it;
1750  }
1751  split_iterator_impl end() const
1752  {
1753  size_t pos = m_str.size() + 1;
1754  auto it = split_iterator_impl(this, pos, m_sep);
1755  return it;
1756  }
1757  };
1758 
1759 public:
1760 
1761  using split_proxy = split_proxy_impl;
1762 
1763  /** a view into the splits */
1764  split_proxy split(C sep, size_t start_pos=0) const
1765  {
1766  C4_XASSERT((start_pos >= 0 && start_pos < len) || empty());
1767  auto ss = sub(0, len);
1768  auto it = split_proxy(ss, start_pos, sep);
1769  return it;
1770  }
1771 
1772 public:
1773 
1774  /** pop right: return the first split from the right. Use
1775  * gpop_left() to get the reciprocal part.
1776  */
1777  basic_substring pop_right(C sep=C('/'), bool skip_empty=false) const
1778  {
1779  if(C4_LIKELY(len > 1))
1780  {
1781  auto pos = last_of(sep);
1782  if(pos != npos)
1783  {
1784  if(pos + 1 < len) // does not end with sep
1785  {
1786  return sub(pos + 1); // return from sep to end
1787  }
1788  else // the string ends with sep
1789  {
1790  if( ! skip_empty)
1791  {
1792  return sub(pos + 1, 0);
1793  }
1794  auto ppos = last_not_of(sep); // skip repeated seps
1795  if(ppos == npos) // the string is all made of seps
1796  {
1797  return sub(0, 0);
1798  }
1799  // find the previous sep
1800  auto pos0 = last_of(sep, ppos);
1801  if(pos0 == npos) // only the last sep exists
1802  {
1803  return sub(0); // return the full string (because skip_empty is true)
1804  }
1805  ++pos0;
1806  return sub(pos0);
1807  }
1808  }
1809  else // no sep was found, return the full string
1810  {
1811  return *this;
1812  }
1813  }
1814  else if(len == 1)
1815  {
1816  if(begins_with(sep))
1817  {
1818  return sub(0, 0);
1819  }
1820  return *this;
1821  }
1822  else // an empty string
1823  {
1824  return basic_substring();
1825  }
1826  }
1827 
1828  /** return the first split from the left. Use gpop_right() to get
1829  * the reciprocal part. */
1830  basic_substring pop_left(C sep = C('/'), bool skip_empty=false) const
1831  {
1832  if(C4_LIKELY(len > 1))
1833  {
1834  auto pos = first_of(sep);
1835  if(pos != npos)
1836  {
1837  if(pos > 0) // does not start with sep
1838  {
1839  return sub(0, pos); // return everything up to it
1840  }
1841  else // the string starts with sep
1842  {
1843  if( ! skip_empty)
1844  {
1845  return sub(0, 0);
1846  }
1847  auto ppos = first_not_of(sep); // skip repeated seps
1848  if(ppos == npos) // the string is all made of seps
1849  {
1850  return sub(0, 0);
1851  }
1852  // find the next sep
1853  auto pos0 = first_of(sep, ppos);
1854  if(pos0 == npos) // only the first sep exists
1855  {
1856  return sub(0); // return the full string (because skip_empty is true)
1857  }
1858  C4_XASSERT(pos0 > 0);
1859  // return everything up to the second sep
1860  return sub(0, pos0);
1861  }
1862  }
1863  else // no sep was found, return the full string
1864  {
1865  return sub(0);
1866  }
1867  }
1868  else if(len == 1)
1869  {
1870  if(begins_with(sep))
1871  {
1872  return sub(0, 0);
1873  }
1874  return sub(0);
1875  }
1876  else // an empty string
1877  {
1878  return basic_substring();
1879  }
1880  }
1881 
1882 public:
1883 
1884  /** greedy pop left. eg, csubstr("a/b/c").gpop_left('/')="c" */
1885  basic_substring gpop_left(C sep = C('/'), bool skip_empty=false) const
1886  {
1887  auto ss = pop_right(sep, skip_empty);
1888  ss = left_of(ss);
1889  if(ss.find(sep) != npos)
1890  {
1891  if(ss.ends_with(sep))
1892  {
1893  if(skip_empty)
1894  {
1895  ss = ss.trimr(sep);
1896  }
1897  else
1898  {
1899  ss = ss.sub(0, ss.len-1); // safe to subtract because ends_with(sep) is true
1900  }
1901  }
1902  }
1903  return ss;
1904  }
1905 
1906  /** greedy pop right. eg, csubstr("a/b/c").gpop_right('/')="a" */
1907  basic_substring gpop_right(C sep = C('/'), bool skip_empty=false) const
1908  {
1909  auto ss = pop_left(sep, skip_empty);
1910  ss = right_of(ss);
1911  if(ss.find(sep) != npos)
1912  {
1913  if(ss.begins_with(sep))
1914  {
1915  if(skip_empty)
1916  {
1917  ss = ss.triml(sep);
1918  }
1919  else
1920  {
1921  ss = ss.sub(1);
1922  }
1923  }
1924  }
1925  return ss;
1926  }
1927 
1928  /** @} */
1929 
1930 public:
1931 
1932  /** @name Path-like manipulation methods */
1933  /** @{ */
1934 
1935  basic_substring basename(C sep=C('/')) const
1936  {
1937  auto ss = pop_right(sep, /*skip_empty*/true);
1938  ss = ss.trimr(sep);
1939  return ss;
1940  }
1941 
1942  basic_substring dirname(C sep=C('/')) const
1943  {
1944  auto ss = basename(sep);
1945  ss = ss.empty() ? *this : left_of(ss);
1946  return ss;
1947  }
1948 
1949  C4_ALWAYS_INLINE basic_substring name_wo_extshort() const
1950  {
1951  return gpop_left('.');
1952  }
1953 
1954  C4_ALWAYS_INLINE basic_substring name_wo_extlong() const
1955  {
1956  return pop_left('.');
1957  }
1958 
1959  C4_ALWAYS_INLINE basic_substring extshort() const
1960  {
1961  return pop_right('.');
1962  }
1963 
1964  C4_ALWAYS_INLINE basic_substring extlong() const
1965  {
1966  return gpop_right('.');
1967  }
1968 
1969  /** @} */
1970 
1971 public:
1972 
1973  /** @name Content-modification methods (only for non-const C) */
1974  /** @{ */
1975 
1976  /** convert the string to upper-case
1977  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
1978  C4_REQUIRE_RW(void) toupper()
1979  {
1980  for(size_t i = 0; i < len; ++i)
1981  {
1982  str[i] = static_cast<C>(::toupper(str[i]));
1983  }
1984  }
1985 
1986  /** convert the string to lower-case
1987  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
1988  C4_REQUIRE_RW(void) tolower()
1989  {
1990  for(size_t i = 0; i < len; ++i)
1991  {
1992  str[i] = static_cast<C>(::tolower(str[i]));
1993  }
1994  }
1995 
1996 public:
1997 
1998  /** fill the entire contents with the given @p val
1999  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2000  C4_REQUIRE_RW(void) fill(C val)
2001  {
2002  for(size_t i = 0; i < len; ++i)
2003  {
2004  str[i] = val;
2005  }
2006  }
2007 
2008 public:
2009 
2010  /** set the current substring to a copy of the given csubstr
2011  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2012  C4_REQUIRE_RW(void) copy_from(ro_substr that, size_t ifirst=0, size_t num=npos)
2013  {
2014  C4_ASSERT(ifirst >= 0 && ifirst <= len);
2015  num = num != npos ? num : len - ifirst;
2016  num = num < that.len ? num : that.len;
2017  C4_ASSERT(ifirst + num >= 0 && ifirst + num <= len);
2018  // calling memcpy with null strings is undefined behavior
2019  // and will wreak havoc in calling code's branches.
2020  // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
2021  if(num)
2022  memcpy(str + sizeof(C) * ifirst, that.str, sizeof(C) * num);
2023  }
2024 
2025 public:
2026 
2027  /** reverse in place
2028  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2029  C4_REQUIRE_RW(void) reverse()
2030  {
2031  if(len == 0) return;
2032  detail::_do_reverse(str, str + len - 1);
2033  }
2034 
2035  /** revert a subpart in place
2036  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2037  C4_REQUIRE_RW(void) reverse_sub(size_t ifirst, size_t num)
2038  {
2039  C4_ASSERT(ifirst >= 0 && ifirst <= len);
2040  C4_ASSERT(ifirst + num >= 0 && ifirst + num <= len);
2041  if(num == 0) return;
2042  detail::_do_reverse(str + ifirst, str + ifirst + num - 1);
2043  }
2044 
2045  /** revert a range in place
2046  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2047  C4_REQUIRE_RW(void) reverse_range(size_t ifirst, size_t ilast)
2048  {
2049  C4_ASSERT(ifirst >= 0 && ifirst <= len);
2050  C4_ASSERT(ilast >= 0 && ilast <= len);
2051  if(ifirst == ilast) return;
2052  detail::_do_reverse(str + ifirst, str + ilast - 1);
2053  }
2054 
2055 public:
2056 
2057  /** erase part of the string. eg, with char s[] = "0123456789",
2058  * substr(s).erase(3, 2) = "01256789", and s is now "01245678989"
2059  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2060  C4_REQUIRE_RW(basic_substring) erase(size_t pos, size_t num)
2061  {
2062  C4_ASSERT(pos >= 0 && pos+num <= len);
2063  size_t num_to_move = len - pos - num;
2064  memmove(str + pos, str + pos + num, sizeof(C) * num_to_move);
2065  return basic_substring{str, len - num};
2066  }
2067 
2068  /** @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2069  C4_REQUIRE_RW(basic_substring) erase_range(size_t first, size_t last)
2070  {
2071  C4_ASSERT(first <= last);
2072  return erase(first, static_cast<size_t>(last-first));
2073  }
2074 
2075  /** erase a part of the string.
2076  * @note @p sub must be a substring of this string
2077  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2078  C4_REQUIRE_RW(basic_substring) erase(ro_substr sub)
2079  {
2080  C4_ASSERT(is_super(sub));
2081  C4_ASSERT(sub.str >= str);
2082  return erase(static_cast<size_t>(sub.str - str), sub.len);
2083  }
2084 
2085 public:
2086 
2087  /** replace every occurrence of character @p value with the character @p repl
2088  * @return the number of characters that were replaced
2089  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2090  C4_REQUIRE_RW(size_t) replace(C value, C repl, size_t pos=0)
2091  {
2092  C4_ASSERT((pos >= 0 && pos <= len) || pos == npos);
2093  size_t did_it = 0;
2094  while((pos = find(value, pos)) != npos)
2095  {
2096  str[pos++] = repl;
2097  ++did_it;
2098  }
2099  return did_it;
2100  }
2101 
2102  /** replace every occurrence of each character in @p value with
2103  * the character @p repl.
2104  * @return the number of characters that were replaced
2105  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2106  C4_REQUIRE_RW(size_t) replace(ro_substr chars, C repl, size_t pos=0)
2107  {
2108  C4_ASSERT((pos >= 0 && pos <= len) || pos == npos);
2109  size_t did_it = 0;
2110  while((pos = first_of(chars, pos)) != npos)
2111  {
2112  str[pos++] = repl;
2113  ++did_it;
2114  }
2115  return did_it;
2116  }
2117 
2118  /** replace @p pattern with @p repl, and write the result into
2119  * @p dst. pattern and repl don't need equal sizes.
2120  *
2121  * @return the required size for dst. No overflow occurs if
2122  * dst.len is smaller than the required size; this can be used to
2123  * determine the required size for an existing container. */
2124  size_t replace_all(rw_substr dst, ro_substr pattern, ro_substr repl, size_t pos=0) const
2125  {
2126  C4_ASSERT( ! pattern.empty()); //!< @todo relax this precondition
2127  C4_ASSERT( ! this ->overlaps(dst)); //!< @todo relax this precondition
2128  C4_ASSERT( ! pattern.overlaps(dst));
2129  C4_ASSERT( ! repl .overlaps(dst));
2130  C4_ASSERT((pos >= 0 && pos <= len) || pos == npos);
2131  C4_SUPPRESS_WARNING_GCC_PUSH
2132  C4_SUPPRESS_WARNING_GCC("-Warray-bounds") // gcc11 has a false positive here
2133  #if (!defined(__clang__)) && (defined(__GNUC__) && (__GNUC__ >= 7))
2134  C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow") // gcc11 has a false positive here
2135  #endif
2136  #define _c4append(first, last) \
2137  { \
2138  C4_ASSERT((last) >= (first)); \
2139  size_t num = static_cast<size_t>((last) - (first)); \
2140  if(num > 0 && sz + num <= dst.len) \
2141  { \
2142  memcpy(dst.str + sz, first, num * sizeof(C)); \
2143  } \
2144  sz += num; \
2145  }
2146  size_t sz = 0;
2147  size_t b = pos;
2148  _c4append(str, str + pos);
2149  do {
2150  size_t e = find(pattern, b);
2151  if(e == npos)
2152  {
2153  _c4append(str + b, str + len);
2154  break;
2155  }
2156  _c4append(str + b, str + e);
2157  _c4append(repl.begin(), repl.end());
2158  b = e + pattern.size();
2159  } while(b < len && b != npos);
2160  return sz;
2161  #undef _c4append
2162  C4_SUPPRESS_WARNING_GCC_POP
2163  }
2164 
2165  /** @} */
2166 
2167 }; // template class basic_substring
2168 
2169 
2170 #undef C4_REQUIRE_RW
2171 
2172 
2173 //-----------------------------------------------------------------------------
2174 //-----------------------------------------------------------------------------
2175 //-----------------------------------------------------------------------------
2176 
2177 
2178 /** @defgroup doc_substr_adapters substr adapters
2179  *
2180  * to_substr() and to_csubstr() is used in generic code like
2181  * format(), and allow adding construction of substrings from new
2182  * types like containers.
2183  * @{ */
2184 
2185 
2186 /** neutral version for use in generic code */
2187 C4_ALWAYS_INLINE substr to_substr(substr s) noexcept { return s; }
2188 /** neutral version for use in generic code */
2189 C4_ALWAYS_INLINE csubstr to_csubstr(substr s) noexcept { return s; }
2190 /** neutral version for use in generic code */
2191 C4_ALWAYS_INLINE csubstr to_csubstr(csubstr s) noexcept { return s; }
2192 
2193 
2194 template<size_t N>
2195 C4_ALWAYS_INLINE substr
2196 to_substr(char (&s)[N]) noexcept { substr ss(s, N-1); return ss; }
2197 template<size_t N>
2198 C4_ALWAYS_INLINE csubstr
2199 to_csubstr(const char (&s)[N]) noexcept { csubstr ss(s, N-1); return ss; }
2200 
2201 
2202 /** @note this overload uses SFINAE to prevent it from overriding the array overload
2203  * @see For a more detailed explanation on why the plain overloads cannot
2204  * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
2205 template<class U>
2206 C4_ALWAYS_INLINE typename std::enable_if<std::is_same<U, char*>::value, substr>::type
2207 to_substr(U s) noexcept { substr ss(s); return ss; }
2208 /** @note this overload uses SFINAE to prevent it from overriding the array overload
2209  * @see For a more detailed explanation on why the plain overloads cannot
2210  * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
2211 template<class U>
2212 C4_ALWAYS_INLINE typename std::enable_if<std::is_same<U, const char*>::value || std::is_same<U, char*>::value, csubstr>::type
2213 to_csubstr(U s) noexcept { csubstr ss(s); return ss; }
2214 
2215 
2216 /** @} */
2217 
2218 
2219 //-----------------------------------------------------------------------------
2220 //-----------------------------------------------------------------------------
2221 //-----------------------------------------------------------------------------
2222 
2223 /** @defgroup doc_substr_cmp substr comparison operators
2224  * @{ */
2225 
2226 template<typename C, size_t N> inline bool operator== (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) == 0; }
2227 template<typename C, size_t N> inline bool operator!= (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) != 0; }
2228 template<typename C, size_t N> inline bool operator< (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) > 0; }
2229 template<typename C, size_t N> inline bool operator> (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) < 0; }
2230 template<typename C, size_t N> inline bool operator<= (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) >= 0; }
2231 template<typename C, size_t N> inline bool operator>= (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) <= 0; }
2232 
2233 template<typename C> inline bool operator== (const char c, basic_substring<C> const that) noexcept { return that.compare(c) == 0; }
2234 template<typename C> inline bool operator!= (const char c, basic_substring<C> const that) noexcept { return that.compare(c) != 0; }
2235 template<typename C> inline bool operator< (const char c, basic_substring<C> const that) noexcept { return that.compare(c) > 0; }
2236 template<typename C> inline bool operator> (const char c, basic_substring<C> const that) noexcept { return that.compare(c) < 0; }
2237 template<typename C> inline bool operator<= (const char c, basic_substring<C> const that) noexcept { return that.compare(c) >= 0; }
2238 template<typename C> inline bool operator>= (const char c, basic_substring<C> const that) noexcept { return that.compare(c) <= 0; }
2239 
2240 /** @} */
2241 
2242 
2243 //-----------------------------------------------------------------------------
2244 //-----------------------------------------------------------------------------
2245 //-----------------------------------------------------------------------------
2246 
2247 /* C4_SUBSTR_NO_OSTREAM_LSHIFT doctest does not deal well with
2248  * template operator<<
2249  * @see https://github.com/onqtam/doctest/pull/431 */
2250 #ifndef C4_SUBSTR_NO_OSTREAM_LSHIFT
2251 #ifdef __clang__
2252 # pragma clang diagnostic push
2253 # pragma clang diagnostic ignored "-Wsign-conversion"
2254 #elif defined(__GNUC__)
2255 # pragma GCC diagnostic push
2256 # pragma GCC diagnostic ignored "-Wsign-conversion"
2257 #endif
2258 
2259 /** output the string to a stream */
2260 template<class OStream, class C>
2261 inline OStream& operator<< (OStream& os, basic_substring<C> s)
2262 {
2263  os.write(s.str, s.len);
2264  return os;
2265 }
2266 
2267 // this causes ambiguity
2268 ///** this is used by google test */
2269 //template<class OStream, class C>
2270 //inline void PrintTo(basic_substring<C> s, OStream* os)
2271 //{
2272 // os->write(s.str, s.len);
2273 //}
2274 
2275 #ifdef __clang__
2276 # pragma clang diagnostic pop
2277 #elif defined(__GNUC__)
2278 # pragma GCC diagnostic pop
2279 #endif
2280 #endif // !C4_SUBSTR_NO_OSTREAM_LSHIFT
2281 
2282 /** @} */
2283 
2284 } // namespace c4
2285 
2286 
2287 #ifdef __clang__
2288 # pragma clang diagnostic pop
2289 #elif defined(__GNUC__)
2290 # pragma GCC diagnostic pop
2291 #endif
2292 
2293 #endif /* _C4_SUBSTR_HPP_ */
left_< T > left(T val, size_t width, char padchar=' ')
mark an argument to be aligned left
Definition: format.hpp:524
right_< T > right(T val, size_t width, char padchar=' ')
mark an argument to be aligned right
Definition: format.hpp:531
csubstr to_csubstr(substr s) noexcept
neutral version for use in generic code
Definition: substr.hpp:2189
substr to_substr(substr s) noexcept
neutral version for use in generic code
Definition: substr.hpp:2187
bool operator!=(const char(&s)[N], basic_substring< C > const that) noexcept
Definition: substr.hpp:2227
bool operator>(const char(&s)[N], basic_substring< C > const that) noexcept
Definition: substr.hpp:2229
bool operator>=(const char(&s)[N], basic_substring< C > const that) noexcept
Definition: substr.hpp:2231
bool operator<=(const char(&s)[N], basic_substring< C > const that) noexcept
Definition: substr.hpp:2230
bool operator==(const char(&s)[N], basic_substring< C > const that) noexcept
Definition: substr.hpp:2226
bool operator<(const char(&s)[N], basic_substring< C > const that) noexcept
Definition: substr.hpp:2228
OStream & operator<<(OStream &os, basic_substring< C > s)
output the string to a stream
Definition: substr.hpp:2261
@ npos
a null string position
Definition: common.hpp:266
@ NONE
an index to none
Definition: common.hpp:259
Definition: common.cpp:12
split_iterator_impl(split_proxy_impl const *proxy, size_t pos, C sep)
Definition: substr.hpp:1705
a non-owning string-view, consisting of a character pointer and a length.
Definition: substr.hpp:76
basic_substring _first_real_span_hex(size_t pos) const noexcept
Definition: substr.hpp:1381
void reverse()
reverse in place
Definition: substr.hpp:2029
basic_substring first_uint_span() const
get the first span which can be interpreted as an unsigned integer
Definition: substr.hpp:1129
first_of_any_result first_of_any_iter(It first_span, It last_span) const
Definition: substr.hpp:639
int compare(ro_substr const that) const noexcept
Definition: substr.hpp:264
basic_substring(U s_) noexcept
Construct from a C-string (zero-terminated string)
Definition: substr.hpp:155
size_t first_not_of(ro_substr chars) const
Definition: substr.hpp:889
basic_substring gpop_right(C sep=C('/'), bool skip_empty=false) const
greedy pop right.
Definition: substr.hpp:1907
basic_substring trim(const C c) const
trim the character c left and right
Definition: substr.hpp:497
C const & front() const noexcept
Definition: substr.hpp:216
size_t count(const C c, size_t pos=0) const
count the number of occurrences of c
Definition: substr.hpp:564
basic_substring _first_real_span_oct(size_t pos) const noexcept
Definition: substr.hpp:1561
bool begins_with(const C c) const
true if the first character of the string is c
Definition: substr.hpp:670
first_of_any_result first_of_any(ro_substr s0, ro_substr s1) const
Definition: substr.hpp:614
basic_substring sub(size_t first, size_t num) const noexcept
return [first,first+num[.
Definition: substr.hpp:329
basic_substring pair_range(CC open, CC close) const
get the range delimited by an open-close pair of characters.
Definition: substr.hpp:987
basic_substring pop_right(C sep=C('/'), bool skip_empty=false) const
pop right: return the first split from the right.
Definition: substr.hpp:1777
basic_substring range(size_t first, size_t last=npos) const noexcept
return [first,last[.
Definition: substr.hpp:339
int compare(C const c) const noexcept
Definition: substr.hpp:228
C const & back() const noexcept
Definition: substr.hpp:219
size_t first_not_of(const C c) const
Definition: substr.hpp:845
const_iterator begin() const noexcept
Definition: substr.hpp:206
size_t last_not_of(const C c, size_t start) const
Definition: substr.hpp:876
basic_substring left_of(ro_substr const subs) const noexcept
given subs a substring of the current string, get the portion of the current string to the left of it
Definition: substr.hpp:415
basic_substring triml(const C c) const
trim left
Definition: substr.hpp:449
bool ends_with(const C c) const
true if the last character of the string is c
Definition: substr.hpp:727
size_t last_of(const C c, size_t start=npos) const
Definition: substr.hpp:798
bool is_integer() const
Definition: substr.hpp:1094
void tolower()
convert the string to lower-case
Definition: substr.hpp:1988
basic_substring trimr(ro_substr chars) const
trim right ANY of the characters
Definition: substr.hpp:485
basic_substring(basic_substring const &) noexcept=default
basic_substring _first_real_span_bin(size_t pos) const noexcept
Definition: substr.hpp:1471
void toupper()
convert the string to upper-case
Definition: substr.hpp:1978
first_of_any_result first_of_any(ro_substr s0, ro_substr s1, ro_substr s2, ro_substr s3, ro_substr s4) const
Definition: substr.hpp:632
basic_substring offs(size_t left, size_t right) const noexcept
offset from the ends: return [left,len-right[ ; ie, trim a number of characters from the left and rig...
Definition: substr.hpp:367
basic_substring first_real_span() const
get the first span which can be interpreted as a real (floating-point) number
Definition: substr.hpp:1216
basic_substring unquoted() const
Definition: substr.hpp:1046
C & front() noexcept
Definition: substr.hpp:215
split_proxy_impl split_proxy
Definition: substr.hpp:1761
basic_substring first_int_span() const
get the first span which can be interpreted as a signed integer
Definition: substr.hpp:1141
basic_substring select(ro_substr pattern, size_t pos=0) const
get the substr consisting of the first occurrence of pattern after pos, or an empty substr if none oc...
Definition: substr.hpp:599
basic_substring pair_range_esc(CC open_close, CC escape=CC('\\'))
get the range delimited by a single open-close character (eg, quotes).
Definition: substr.hpp:1002
size_t replace_all(rw_substr dst, ro_substr pattern, ro_substr repl, size_t pos=0) const
replace pattern with repl, and write the result into dst.
Definition: substr.hpp:2124
size_t count(ro_substr c, size_t pos=0) const
count the number of occurrences of s
Definition: substr.hpp:578
basic_substring name_wo_extshort() const
Definition: substr.hpp:1949
split_proxy split(C sep, size_t start_pos=0) const
a view into the splits
Definition: substr.hpp:1764
basic_substring name_wo_extlong() const
Definition: substr.hpp:1954
basic_substring erase(ro_substr sub)
erase a part of the string.
Definition: substr.hpp:2078
C & back() noexcept
Definition: substr.hpp:218
size_t last_not_of(ro_substr chars) const
Definition: substr.hpp:932
bool is_real() const
Definition: substr.hpp:1083
constexpr basic_substring() noexcept
Definition: substr.hpp:117
basic_substring triml(ro_substr chars) const
trim left ANY of the characters.
Definition: substr.hpp:461
size_t len
the length of the substring
Definition: substr.hpp:82
basic_substring left_of(size_t pos, bool include_pos) const noexcept
return [0, pos+include_pos[ .
Definition: substr.hpp:385
size_t last_not_of(ro_substr chars, size_t start) const
Definition: substr.hpp:953
basic_substring last(size_t num) const noexcept
return the last num elements: [len-num,len[
Definition: substr.hpp:356
size_t first_of(const C c, size_t start=0) const
Definition: substr.hpp:786
basic_substring stripl(ro_substr pattern) const
remove a pattern from the left
Definition: substr.hpp:510
bool ends_with(ro_substr pattern) const
true if the string ends with the given pattern
Definition: substr.hpp:750
basic_substring right_of(size_t pos, bool include_pos) const noexcept
return [pos+!include_pos, len[
Definition: substr.hpp:403
size_t find(const C c, size_t start_pos=0) const
Definition: substr.hpp:533
basic_substring trim(ro_substr const chars) const
trim left and right ANY of the characters
Definition: substr.hpp:503
const_iterator end() const noexcept
Definition: substr.hpp:207
typename std::add_const< C >::type CC
CC=const char.
Definition: substr.hpp:89
basic_substring(C *s_, size_t len_) noexcept
Construct from a pointer and length.
Definition: substr.hpp:143
basic_substring erase_range(size_t first, size_t last)
Definition: substr.hpp:2069
basic_substring extshort() const
Definition: substr.hpp:1959
void assign(U s_) noexcept
Assign from a C-string (zero-terminated string)
Definition: substr.hpp:176
basic_substring stripr(ro_substr pattern) const
remove a pattern from the right
Definition: substr.hpp:519
size_t first_of(ro_substr chars, size_t start=0) const
Definition: substr.hpp:812
size_t find(ro_substr pattern, size_t start_pos=0) const
Definition: substr.hpp:537
void assign(C *s_, size_t len_) noexcept
Assign from a pointer and length.
Definition: substr.hpp:164
iterator begin() noexcept
Definition: substr.hpp:203
void assign(C(&s_)[N]) noexcept
Assign from an array.
Definition: substr.hpp:161
bool ends_with_any(ro_substr chars) const
true if the last character of the string is any of the given chars
Definition: substr.hpp:767
void fill(C val)
fill the entire contents with the given val
Definition: substr.hpp:2000
size_t size() const noexcept
Definition: substr.hpp:201
basic_substring basename(C sep=C('/')) const
Definition: substr.hpp:1935
basic_substring(C *beg_, C *end_) noexcept
Construct from two pointers.
Definition: substr.hpp:147
bool is_unsigned_integer() const
Definition: substr.hpp:1107
bool overlaps(ro_substr const that) const noexcept
true if there is overlap of at least one element between that and *this
Definition: substr.hpp:313
basic_substring _first_integral_span(size_t skip_start) const
Definition: substr.hpp:1150
iterator end() noexcept
Definition: substr.hpp:204
bool not_empty() const noexcept
Definition: substr.hpp:200
basic_substring first(size_t num) const noexcept
return the first num elements: [0,num[
Definition: substr.hpp:349
basic_substring left_of(size_t pos) const noexcept
return [0, pos[ .
Definition: substr.hpp:376
void reverse_range(size_t ifirst, size_t ilast)
revert a range in place
Definition: substr.hpp:2047
basic_substring pair_range_nested(CC open, CC close) const
get the range delimited by an open-close pair of characters, with possibly nested occurrences.
Definition: substr.hpp:1023
C const * data() const noexcept
Definition: substr.hpp:210
size_t replace(C value, C repl, size_t pos=0)
replace every occurrence of character value with the character repl
Definition: substr.hpp:2090
bool has_str() const noexcept
Definition: substr.hpp:198
bool begins_with(ro_substr pattern) const
true if the string begins with the given pattern
Definition: substr.hpp:693
bool is_number() const
Definition: substr.hpp:1068
basic_substring sub(size_t first) const noexcept
return [first,len[
Definition: substr.hpp:322
basic_substring _first_real_span_dec(size_t pos) const noexcept
Definition: substr.hpp:1294
first_of_any_result first_of_any(ro_substr s0, ro_substr s1, ro_substr s2) const
Definition: substr.hpp:620
bool begins_with_any(ro_substr chars) const
true if the first character of the string is any of the given chars
Definition: substr.hpp:710
bool next_split(C sep, size_t *start_pos, basic_substring *out) const
returns true if the string has not been exhausted yet, meaning it's ok to call next_split() again.
Definition: substr.hpp:1661
basic_substring(basic_substring &&) noexcept=default
bool empty() const noexcept
Definition: substr.hpp:199
basic_substring right_of(ro_substr const subs) const noexcept
given subs a substring of the current string, get the portion of the current string to the right of i...
Definition: substr.hpp:429
size_t first_not_of(const C c, size_t start) const
Definition: substr.hpp:855
void assign(C *beg_, C *end_) noexcept
Assign from two pointers.
Definition: substr.hpp:168
basic_substring trimr(const C c) const
trim the character c from the right
Definition: substr.hpp:473
first_of_any_result first_of_any(ro_substr s0, ro_substr s1, ro_substr s2, ro_substr s3) const
Definition: substr.hpp:626
int compare(const char *that, size_t sz) const noexcept
Definition: substr.hpp:237
constexpr basic_substring(C(&s_)[N]) noexcept
Construct from an array.
Definition: substr.hpp:140
basic_substring dirname(C sep=C('/')) const
Definition: substr.hpp:1942
size_t replace(ro_substr chars, C repl, size_t pos=0)
replace every occurrence of each character in value with the character repl.
Definition: substr.hpp:2106
bool is_super(ro_substr const that) const noexcept
true if that is a substring of *this (ie, from the same buffer)
Definition: substr.hpp:304
size_t last_not_of(const C c) const
Definition: substr.hpp:866
bool ends_with(const C c, size_t num) const
true if the last num characters of the string are c
Definition: substr.hpp:733
bool begins_with(const C c, size_t num) const
true if the first num characters of the string are c
Definition: substr.hpp:676
static constexpr C4_CONST bool _is_hex_char(char c) noexcept
true if the character is in [0-9a-fA-F]
Definition: substr.hpp:1279
basic_substring erase(size_t pos, size_t num)
erase part of the string.
Definition: substr.hpp:2060
C * data() noexcept
Definition: substr.hpp:209
basic_substring _word_follows(size_t pos, csubstr word) const noexcept
Definition: substr.hpp:1284
void reverse_sub(size_t ifirst, size_t num)
revert a subpart in place
Definition: substr.hpp:2037
basic_substring gpop_left(C sep=C('/'), bool skip_empty=false) const
greedy pop left.
Definition: substr.hpp:1885
size_t first_not_of(ro_substr chars, size_t start) const
Definition: substr.hpp:910
void clear() noexcept
Definition: substr.hpp:127
size_t last_of(ro_substr chars, size_t start=npos) const
Definition: substr.hpp:827
void copy_from(ro_substr that, size_t ifirst=0, size_t num=npos)
set the current substring to a copy of the given csubstr
Definition: substr.hpp:2012
typename std::remove_const< C >::type NCC_
NCC_=non const char.
Definition: substr.hpp:90
basic_substring first_non_empty_span() const
get the first span consisting exclusively of non-empty characters
Definition: substr.hpp:1117
C * str
a restricted pointer to the first character of the substring
Definition: substr.hpp:80
basic_substring right_of(size_t pos) const noexcept
return [pos+1, len[
Definition: substr.hpp:394
basic_substring pop_left(C sep=C('/'), bool skip_empty=false) const
return the first split from the left.
Definition: substr.hpp:1830
static constexpr C4_CONST bool _is_delim_char(char c) noexcept
true if the character is a delimiter character at the end
Definition: substr.hpp:1271
bool is_sub(ro_substr const that) const noexcept
true if *this is a substring of that (ie, from the same buffer)
Definition: substr.hpp:298
basic_substring select(const C c, size_t pos=0) const
get the substr consisting of the first occurrence of c after pos, or an empty substr if none occurs
Definition: substr.hpp:592
basic_substring extlong() const
Definition: substr.hpp:1964
#define _c4append(first, last)