rapidyaml  0.9.0
parse and emit YAML, and do it fast
substr.hpp
Go to the documentation of this file.
1 #ifndef _C4_SUBSTR_HPP_
2 #define _C4_SUBSTR_HPP_
3 
4 /** @file substr.hpp read+write string views */
5 
6 #include <string.h>
7 #include <ctype.h>
8 #include <type_traits>
9 
10 #include "c4/config.hpp"
11 #include "c4/error.hpp"
12 #include "c4/substr_fwd.hpp"
13 
14 #ifdef __clang__
15 # pragma clang diagnostic push
16 # pragma clang diagnostic ignored "-Wold-style-cast"
17 #elif defined(__GNUC__)
18 # pragma GCC diagnostic push
19 # pragma GCC diagnostic ignored "-Wtype-limits" // disable warnings on size_t>=0, used heavily in assertions below. These assertions are a preparation step for providing the index type as a template parameter.
20 # pragma GCC diagnostic ignored "-Wuseless-cast"
21 # pragma GCC diagnostic ignored "-Wold-style-cast"
22 #endif
23 
24 
25 namespace c4 {
26 
27 /** @defgroup doc_substr Substring: read/write string views
28  * @{ */
29 
30 //-----------------------------------------------------------------------------
31 //-----------------------------------------------------------------------------
32 //-----------------------------------------------------------------------------
33 
34 /** @cond dev */
35 namespace detail {
36 template<typename C>
37 static inline void _do_reverse(C *C4_RESTRICT first, C *C4_RESTRICT last)
38 {
39  while(last > first)
40  {
41  C tmp = *last;
42  *last-- = *first;
43  *first++ = tmp;
44  }
45 }
46 } // namespace detail
47 /** @endcond */
48 
49 //-----------------------------------------------------------------------------
50 //-----------------------------------------------------------------------------
51 //-----------------------------------------------------------------------------
52 
53 /** @cond dev */
54 // utility macros to deuglify SFINAE code; undefined after the class.
55 // https://stackoverflow.com/questions/43051882/how-to-disable-a-class-member-funrtion-for-certain-template-types
56 #define C4_REQUIRE_RW(ret_type) \
57  template <typename U=C> \
58  typename std::enable_if< ! std::is_const<U>::value, ret_type>::type
59 /** @endcond */
60 
61 
62 /** a non-owning string-view, consisting of a character pointer
63  * and a length.
64  *
65  * @note The pointer is explicitly restricted.
66  *
67  * @see a [quickstart
68  * sample](https://rapidyaml.readthedocs.io/latest/doxygen/group__doc__quickstart.html#ga43e253da0692c13967019446809c1113)
69  * in rapidyaml's documentation.
70  */
71 template<class C>
72 struct C4CORE_EXPORT basic_substring // NOLINT(cppcoreguidelines-special-member-functions,hicpp-special-member-functions)
73 {
74 public:
75 
76  /** a restricted pointer to the first character of the substring */
77  C * C4_RESTRICT str;
78  /** the length of the substring */
79  size_t len;
80 
81 public:
82 
83  /** @name Types */
84  /** @{ */
85 
86  using CC = typename std::add_const<C>::type; //!< CC=const char
87  using NCC_ = typename std::remove_const<C>::type; //!< NCC_=non const char
88 
91 
92  using char_type = C;
93  using size_type = size_t;
94 
95  using iterator = C*;
96  using const_iterator = CC*;
97 
98  enum : size_t { npos = (size_t)-1, NONE = (size_t)-1 };
99 
100  /// convert automatically to substring of const C
101  template<class U=C>
102  C4_ALWAYS_INLINE operator typename std::enable_if<!std::is_const<U>::value, ro_substr const&>::type () const noexcept
103  {
104  return *(ro_substr const*)this; // don't call the str+len ctor because it does a check
105  }
106 
107  /** @} */
108 
109 public:
110 
111  /** @name Default construction and assignment */
112  /** @{ */
113 
114  C4_ALWAYS_INLINE constexpr basic_substring() noexcept : str(), len() {}
115 
116  C4_ALWAYS_INLINE basic_substring(basic_substring const&) noexcept = default;
117  C4_ALWAYS_INLINE basic_substring(basic_substring &&) noexcept = default;
118  C4_ALWAYS_INLINE basic_substring(std::nullptr_t) noexcept : str(nullptr), len(0) {}
119 
120  C4_ALWAYS_INLINE basic_substring& operator= (basic_substring const&) noexcept = default;
121  C4_ALWAYS_INLINE basic_substring& operator= (basic_substring &&) noexcept = default;
122  C4_ALWAYS_INLINE basic_substring& operator= (std::nullptr_t) noexcept { str = nullptr; len = 0; return *this; }
123 
124  C4_ALWAYS_INLINE void clear() noexcept { str = nullptr; len = 0; }
125 
126  /** @} */
127 
128 public:
129 
130  /** @name Construction and assignment from characters with the same type */
131  /** @{ */
132 
133  /** Construct from an array.
134  * @warning the input string need not be zero terminated, but the
135  * length is taken as if the string was zero terminated */
136  template<size_t N>
137  C4_ALWAYS_INLINE constexpr basic_substring(C (&s_)[N]) noexcept : str(s_), len(N-1) {}
138  /** Construct from a pointer and length.
139  * @warning the input string need not be zero terminated. */
140  C4_ALWAYS_INLINE basic_substring(C *s_, size_t len_) noexcept : str(s_), len(len_) { C4_ASSERT(str || !len_); }
141  /** Construct from two pointers.
142  * @warning the end pointer MUST BE larger than or equal to the begin pointer
143  * @warning the input string need not be zero terminated */
144  C4_ALWAYS_INLINE basic_substring(C *beg_, C *end_) noexcept : str(beg_), len(static_cast<size_t>(end_ - beg_)) { C4_ASSERT(end_ >= beg_); }
145  /** Construct from a C-string (zero-terminated string)
146  * @warning the input string MUST BE zero terminated.
147  * @warning will call strlen()
148  * @note this overload uses SFINAE to prevent it from overriding the array ctor
149  * @see For a more detailed explanation on why the plain overloads cannot
150  * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
151  template<class U, typename std::enable_if<std::is_same<U, C*>::value || std::is_same<U, NCC_*>::value, int>::type=0>
152  C4_ALWAYS_INLINE basic_substring(U s_) noexcept : str(s_), len(s_ ? strlen(s_) : 0) {}
153 
154  /** Assign from an array.
155  * @warning the input string need not be zero terminated, but the
156  * length is taken as if the string was zero terminated */
157  template<size_t N>
158  C4_ALWAYS_INLINE void assign(C (&s_)[N]) noexcept { str = (s_); len = (N-1); }
159  /** Assign from a pointer and length.
160  * @warning the input string need not be zero terminated. */
161  C4_ALWAYS_INLINE void assign(C *s_, size_t len_) noexcept { str = s_; len = len_; C4_ASSERT(str || !len_); }
162  /** Assign from two pointers.
163  * @warning the end pointer MUST BE larger than or equal to the begin pointer
164  * @warning the input string need not be zero terminated. */
165  C4_ALWAYS_INLINE void assign(C *beg_, C *end_) noexcept { C4_ASSERT(end_ >= beg_); str = (beg_); len = static_cast<size_t>(end_ - beg_); }
166  /** Assign from a C-string (zero-terminated string)
167  * @warning the input string must be zero terminated.
168  * @warning will call strlen()
169  * @note this overload uses SFINAE to prevent it from overriding the array ctor
170  * @see For a more detailed explanation on why the plain overloads cannot
171  * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
172  template<class U, typename std::enable_if<std::is_same<U, C*>::value || std::is_same<U, NCC_*>::value, int>::type=0>
173  C4_ALWAYS_INLINE void assign(U s_) noexcept { str = (s_); len = (s_ ? strlen(s_) : 0); }
174 
175  /** Assign from an array.
176  * @warning the input string need not be zero terminated. */
177  template<size_t N>
178  C4_ALWAYS_INLINE basic_substring& operator= (C (&s_)[N]) noexcept { str = (s_); len = (N-1); return *this; }
179  /** Assign from a C-string (zero-terminated string)
180  * @warning the input string MUST BE zero terminated.
181  * @warning will call strlen()
182  * @note this overload uses SFINAE to prevent it from overriding the array ctor
183  * @see For a more detailed explanation on why the plain overloads cannot
184  * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
185  template<class U, typename std::enable_if<std::is_same<U, C*>::value || std::is_same<U, NCC_*>::value, int>::type=0>
186  C4_ALWAYS_INLINE basic_substring& operator= (U s_) noexcept { str = s_; len = s_ ? strlen(s_) : 0; return *this; }
187 
188  /** @} */
189 
190 public:
191 
192  /** @name Standard accessor methods */
193  /** @{ */
194 
195  C4_ALWAYS_INLINE C4_PURE bool has_str() const noexcept { return ! empty() && str[0] != C(0); }
196  C4_ALWAYS_INLINE C4_PURE bool empty() const noexcept { return (len == 0 || str == nullptr); }
197  C4_ALWAYS_INLINE C4_PURE bool not_empty() const noexcept { return (len != 0 && str != nullptr); }
198  C4_ALWAYS_INLINE C4_PURE size_t size() const noexcept { return len; }
199 
200  C4_ALWAYS_INLINE C4_PURE iterator begin() noexcept { return str; }
201  C4_ALWAYS_INLINE C4_PURE iterator end () noexcept { return str + len; }
202 
203  C4_ALWAYS_INLINE C4_PURE const_iterator begin() const noexcept { return str; }
204  C4_ALWAYS_INLINE C4_PURE const_iterator end () const noexcept { return str + len; }
205 
206  C4_ALWAYS_INLINE C4_PURE C * data() noexcept { return str; }
207  C4_ALWAYS_INLINE C4_PURE C const* data() const noexcept { return str; }
208 
209  C4_ALWAYS_INLINE C4_PURE C & operator[] (size_t i) noexcept { C4_ASSERT(i >= 0 && i < len); return str[i]; }
210  C4_ALWAYS_INLINE C4_PURE C const& operator[] (size_t i) const noexcept { C4_ASSERT(i >= 0 && i < len); return str[i]; }
211 
212  C4_ALWAYS_INLINE C4_PURE C & front() noexcept { C4_ASSERT(len > 0 && str != nullptr); return *str; }
213  C4_ALWAYS_INLINE C4_PURE C const& front() const noexcept { C4_ASSERT(len > 0 && str != nullptr); return *str; }
214 
215  C4_ALWAYS_INLINE C4_PURE C & back() noexcept { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); }
216  C4_ALWAYS_INLINE C4_PURE C const& back() const noexcept { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); }
217 
218  /** @} */
219 
220 public:
221 
222  /** @name Comparison methods */
223  /** @{ */
224 
225  C4_PURE int compare(C const c) const noexcept
226  {
227  C4_XASSERT((str != nullptr) || len == 0);
228  if(C4_LIKELY(str != nullptr && len > 0))
229  return (*str != c) ? *str - c : (static_cast<int>(len) - 1);
230  else
231  return -1;
232  }
233 
234  C4_PURE int compare(const char *C4_RESTRICT that, size_t sz) const noexcept
235  {
236  C4_XASSERT(that || sz == 0);
237  C4_XASSERT(str || len == 0);
238  if(C4_LIKELY(str && that))
239  {
240  {
241  const size_t min = len < sz ? len : sz;
242  for(size_t i = 0; i < min; ++i)
243  if(str[i] != that[i])
244  return str[i] < that[i] ? -1 : 1;
245  }
246  if(len < sz)
247  return -1;
248  else if(len == sz)
249  return 0;
250  else
251  return 1;
252  }
253  else if(len == sz)
254  {
255  C4_XASSERT(len == 0 && sz == 0);
256  return 0;
257  }
258  return len < sz ? -1 : 1;
259  }
260 
261  C4_ALWAYS_INLINE C4_PURE int compare(ro_substr const that) const noexcept { return this->compare(that.str, that.len); }
262 
263  C4_ALWAYS_INLINE C4_PURE bool operator== (std::nullptr_t) const noexcept { return str == nullptr; }
264  C4_ALWAYS_INLINE C4_PURE bool operator!= (std::nullptr_t) const noexcept { return str != nullptr; }
265 
266  C4_ALWAYS_INLINE C4_PURE bool operator== (C const c) const noexcept { return this->compare(c) == 0; }
267  C4_ALWAYS_INLINE C4_PURE bool operator!= (C const c) const noexcept { return this->compare(c) != 0; }
268  C4_ALWAYS_INLINE C4_PURE bool operator< (C const c) const noexcept { return this->compare(c) < 0; }
269  C4_ALWAYS_INLINE C4_PURE bool operator> (C const c) const noexcept { return this->compare(c) > 0; }
270  C4_ALWAYS_INLINE C4_PURE bool operator<= (C const c) const noexcept { return this->compare(c) <= 0; }
271  C4_ALWAYS_INLINE C4_PURE bool operator>= (C const c) const noexcept { return this->compare(c) >= 0; }
272 
273  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator== (basic_substring<U> const that) const noexcept { return this->compare(that) == 0; }
274  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator!= (basic_substring<U> const that) const noexcept { return this->compare(that) != 0; }
275  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator< (basic_substring<U> const that) const noexcept { return this->compare(that) < 0; }
276  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator> (basic_substring<U> const that) const noexcept { return this->compare(that) > 0; }
277  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator<= (basic_substring<U> const that) const noexcept { return this->compare(that) <= 0; }
278  template<class U> C4_ALWAYS_INLINE C4_PURE bool operator>= (basic_substring<U> const that) const noexcept { return this->compare(that) >= 0; }
279 
280  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator== (const char (&that)[N]) const noexcept { return this->compare(that, N-1) == 0; }
281  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator!= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) != 0; }
282  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator< (const char (&that)[N]) const noexcept { return this->compare(that, N-1) < 0; }
283  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator> (const char (&that)[N]) const noexcept { return this->compare(that, N-1) > 0; }
284  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator<= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) <= 0; }
285  template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator>= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) >= 0; }
286 
287  /** @} */
288 
289 public:
290 
291  /** @name Sub-selection methods */
292  /** @{ */
293 
294  /** true if *this is a substring of that (ie, from the same buffer) */
295  C4_ALWAYS_INLINE C4_PURE bool is_sub(ro_substr const that) const noexcept
296  {
297  return that.is_super(*this);
298  }
299 
300  /** true if that is a substring of *this (ie, from the same buffer) */
301  C4_ALWAYS_INLINE C4_PURE bool is_super(ro_substr const that) const noexcept
302  {
303  if(C4_LIKELY(len > 0))
304  return that.str >= str && that.str+that.len <= str+len;
305  else
306  return that.len == 0 && that.str == str && str != nullptr;
307  }
308 
309  /** true if there is overlap of at least one element between that and *this */
310  C4_ALWAYS_INLINE C4_PURE bool overlaps(ro_substr const that) const noexcept
311  {
312  // thanks @timwynants
313  return that.str+that.len > str && that.str < str+len;
314  }
315 
316 public:
317 
318  /** return [first,len[ */
319  C4_ALWAYS_INLINE C4_PURE basic_substring sub(size_t first) const noexcept
320  {
321  C4_ASSERT(first >= 0 && first <= len);
322  return basic_substring(str + first, len - first);
323  }
324 
325  /** return [first,first+num[. If num==npos, return [first,len[ */
326  C4_ALWAYS_INLINE C4_PURE basic_substring sub(size_t first, size_t num) const noexcept
327  {
328  C4_ASSERT(first >= 0 && first <= len);
329  C4_ASSERT((num >= 0 && num <= len) || (num == npos));
330  size_t rnum = num != npos ? num : len - first;
331  C4_ASSERT((first >= 0 && first + rnum <= len) || (num == 0));
332  return basic_substring(str + first, rnum);
333  }
334 
335  /** return [first,last[. If last==npos, return [first,len[ */
336  C4_ALWAYS_INLINE C4_PURE basic_substring range(size_t first, size_t last=npos) const noexcept
337  {
338  C4_ASSERT(first >= 0 && first <= len);
339  last = last != npos ? last : len;
340  C4_ASSERT(first <= last);
341  C4_ASSERT(last >= 0 && last <= len);
342  return basic_substring(str + first, last - first);
343  }
344 
345  /** return the first @p num elements: [0,num[*/
346  C4_ALWAYS_INLINE C4_PURE basic_substring first(size_t num) const noexcept
347  {
348  C4_ASSERT(num <= len || num == npos);
349  return basic_substring(str, num != npos ? num : len);
350  }
351 
352  /** return the last @p num elements: [len-num,len[*/
353  C4_ALWAYS_INLINE C4_PURE basic_substring last(size_t num) const noexcept
354  {
355  C4_ASSERT(num <= len || num == npos);
356  return num != npos ?
357  basic_substring(str + len - num, num) :
358  *this;
359  }
360 
361  /** offset from the ends: return [left,len-right[ ; ie, trim a
362  number of characters from the left and right. This is
363  equivalent to python's negative list indices. */
364  C4_ALWAYS_INLINE C4_PURE basic_substring offs(size_t left, size_t right) const noexcept
365  {
366  C4_ASSERT(left >= 0 && left <= len);
367  C4_ASSERT(right >= 0 && right <= len);
368  C4_ASSERT(left <= len - right + 1);
369  return basic_substring(str + left, len - right - left);
370  }
371 
372  /** return [0, pos[ . Same as .first(pos), but provided for compatibility with .right_of() */
373  C4_ALWAYS_INLINE C4_PURE basic_substring left_of(size_t pos) const noexcept
374  {
375  C4_ASSERT(pos <= len || pos == npos);
376  return (pos != npos) ?
377  basic_substring(str, pos) :
378  *this;
379  }
380 
381  /** return [0, pos+include_pos[ . Same as .first(pos+1), but provided for compatibility with .right_of() */
382  C4_ALWAYS_INLINE C4_PURE basic_substring left_of(size_t pos, bool include_pos) const noexcept
383  {
384  C4_ASSERT(pos <= len || pos == npos);
385  return (pos != npos) ?
386  basic_substring(str, pos+include_pos) :
387  *this;
388  }
389 
390  /** return [pos+1, len[ */
391  C4_ALWAYS_INLINE C4_PURE basic_substring right_of(size_t pos) const noexcept
392  {
393  C4_ASSERT(pos <= len || pos == npos);
394  return (pos != npos) ?
395  basic_substring(str + (pos + 1), len - (pos + 1)) :
396  basic_substring(str + len, size_t(0));
397  }
398 
399  /** return [pos+!include_pos, len[ */
400  C4_ALWAYS_INLINE C4_PURE basic_substring right_of(size_t pos, bool include_pos) const noexcept
401  {
402  C4_ASSERT(pos <= len || pos == npos);
403  return (pos != npos) ?
404  basic_substring(str + (pos + !include_pos), len - (pos + !include_pos)) :
405  basic_substring(str + len, size_t(0));
406  }
407 
408 public:
409 
410  /** given @p subs a substring of the current string, get the
411  * portion of the current string to the left of it */
412  C4_ALWAYS_INLINE C4_PURE basic_substring left_of(ro_substr const subs) const noexcept
413  {
414  C4_ASSERT(is_super(subs) || subs.empty());
415  auto ssb = subs.begin();
416  auto b = begin();
417  auto e = end();
418  if(ssb >= b && ssb <= e)
419  return sub(0, static_cast<size_t>(ssb - b));
420  else
421  return sub(0, 0);
422  }
423 
424  /** given @p subs a substring of the current string, get the
425  * portion of the current string to the right of it */
426  C4_ALWAYS_INLINE C4_PURE basic_substring right_of(ro_substr const subs) const noexcept
427  {
428  C4_ASSERT(is_super(subs) || subs.empty());
429  auto sse = subs.end();
430  auto b = begin();
431  auto e = end();
432  if(sse >= b && sse <= e)
433  return sub(static_cast<size_t>(sse - b), static_cast<size_t>(e - sse));
434  else
435  return sub(0, 0);
436  }
437 
438  /** @} */
439 
440 public:
441 
442  /** @name Removing characters (trim()) / patterns (strip()) from the tips of the string */
443  /** @{ */
444 
445  /** trim left */
446  basic_substring triml(const C c) const
447  {
448  if( ! empty())
449  {
450  size_t pos = first_not_of(c);
451  if(pos != npos)
452  return sub(pos);
453  }
454  return sub(0, 0);
455  }
456  /** trim left ANY of the characters.
457  * @see stripl() to remove a pattern from the left */
459  {
460  if( ! empty())
461  {
462  size_t pos = first_not_of(chars);
463  if(pos != npos)
464  return sub(pos);
465  }
466  return sub(0, 0);
467  }
468 
469  /** trim the character c from the right */
470  basic_substring trimr(const C c) const
471  {
472  if( ! empty())
473  {
474  size_t pos = last_not_of(c, npos);
475  if(pos != npos)
476  return sub(0, pos+1);
477  }
478  return sub(0, 0);
479  }
480  /** trim right ANY of the characters
481  * @see stripr() to remove a pattern from the right */
483  {
484  if( ! empty())
485  {
486  size_t pos = last_not_of(chars, npos);
487  if(pos != npos)
488  return sub(0, pos+1);
489  }
490  return sub(0, 0);
491  }
492 
493  /** trim the character c left and right */
494  basic_substring trim(const C c) const
495  {
496  return triml(c).trimr(c);
497  }
498  /** trim left and right ANY of the characters
499  * @see strip() to remove a pattern from the left and right */
500  basic_substring trim(ro_substr const chars) const
501  {
502  return triml(chars).trimr(chars);
503  }
504 
505  /** remove a pattern from the left
506  * @see triml() to remove characters*/
508  {
509  if( ! begins_with(pattern))
510  return *this;
511  return sub(pattern.len < len ? pattern.len : len);
512  }
513 
514  /** remove a pattern from the right
515  * @see trimr() to remove characters*/
517  {
518  if( ! ends_with(pattern))
519  return *this;
520  return left_of(len - (pattern.len < len ? pattern.len : len));
521  }
522 
523  /** @} */
524 
525 public:
526 
527  /** @name Lookup methods */
528  /** @{ */
529 
530  size_t find(const C c, size_t start_pos=0) const
531  {
532  return first_of(c, start_pos);
533  }
534  size_t find(ro_substr pattern, size_t start_pos=0) const
535  {
536  C4_ASSERT(start_pos == npos || (start_pos >= 0 && start_pos <= len));
537  if(len < pattern.len) return npos;
538  for(size_t i = start_pos, e = len - pattern.len + 1; i < e; ++i)
539  {
540  bool gotit = true;
541  for(size_t j = 0; j < pattern.len; ++j)
542  {
543  C4_ASSERT(i + j < len);
544  if(str[i + j] != pattern.str[j])
545  {
546  gotit = false;
547  break;
548  }
549  }
550  if(gotit)
551  {
552  return i;
553  }
554  }
555  return npos;
556  }
557 
558 public:
559 
560  /** count the number of occurrences of c */
561  size_t count(const C c, size_t pos=0) const
562  {
563  C4_ASSERT(pos >= 0 && pos <= len);
564  size_t num = 0;
565  pos = find(c, pos);
566  while(pos != npos)
567  {
568  ++num;
569  pos = find(c, pos + 1);
570  }
571  return num;
572  }
573 
574  /** count the number of occurrences of s */
575  size_t count(ro_substr c, size_t pos=0) const
576  {
577  C4_ASSERT(pos >= 0 && pos <= len);
578  size_t num = 0;
579  pos = find(c, pos);
580  while(pos != npos)
581  {
582  ++num;
583  pos = find(c, pos + c.len);
584  }
585  return num;
586  }
587 
588  /** get the substr consisting of the first occurrence of @p c after @p pos, or an empty substr if none occurs */
589  basic_substring select(const C c, size_t pos=0) const
590  {
591  pos = find(c, pos);
592  return pos != npos ? sub(pos, 1) : basic_substring();
593  }
594 
595  /** get the substr consisting of the first occurrence of @p pattern after @p pos, or an empty substr if none occurs */
596  basic_substring select(ro_substr pattern, size_t pos=0) const
597  {
598  pos = find(pattern, pos);
599  return pos != npos ? sub(pos, pattern.len) : basic_substring();
600  }
601 
602 public:
603 
605  {
606  size_t which;
607  size_t pos;
608  operator bool() const { return which != NONE && pos != npos; }
609  };
610 
612  {
613  ro_substr s[2] = {s0, s1};
614  return first_of_any_iter(&s[0], &s[0] + 2);
615  }
616 
618  {
619  ro_substr s[3] = {s0, s1, s2};
620  return first_of_any_iter(&s[0], &s[0] + 3);
621  }
622 
624  {
625  ro_substr s[4] = {s0, s1, s2, s3};
626  return first_of_any_iter(&s[0], &s[0] + 4);
627  }
628 
630  {
631  ro_substr s[5] = {s0, s1, s2, s3, s4};
632  return first_of_any_iter(&s[0], &s[0] + 5);
633  }
634 
635  template<class It>
636  first_of_any_result first_of_any_iter(It first_span, It last_span) const
637  {
638  for(size_t i = 0; i < len; ++i)
639  {
640  size_t curr = 0;
641  for(It it = first_span; it != last_span; ++curr, ++it)
642  {
643  auto const& chars = *it;
644  if((i + chars.len) > len) continue;
645  bool gotit = true;
646  for(size_t j = 0; j < chars.len; ++j)
647  {
648  C4_ASSERT(i + j < len);
649  if(str[i + j] != chars[j])
650  {
651  gotit = false;
652  break;
653  }
654  }
655  if(gotit)
656  {
657  return {curr, i};
658  }
659  }
660  }
661  return {NONE, npos};
662  }
663 
664 public:
665 
666  /** true if the first character of the string is @p c */
667  bool begins_with(const C c) const
668  {
669  return len > 0 ? str[0] == c : false;
670  }
671 
672  /** true if the first @p num characters of the string are @p c */
673  bool begins_with(const C c, size_t num) const
674  {
675  if(len < num)
676  {
677  return false;
678  }
679  for(size_t i = 0; i < num; ++i)
680  {
681  if(str[i] != c)
682  {
683  return false;
684  }
685  }
686  return true;
687  }
688 
689  /** true if the string begins with the given @p pattern */
690  bool begins_with(ro_substr pattern) const
691  {
692  if(len < pattern.len)
693  {
694  return false;
695  }
696  for(size_t i = 0; i < pattern.len; ++i)
697  {
698  if(str[i] != pattern[i])
699  {
700  return false;
701  }
702  }
703  return true;
704  }
705 
706  /** true if the first character of the string is any of the given @p chars */
707  bool begins_with_any(ro_substr chars) const
708  {
709  if(len == 0)
710  {
711  return false;
712  }
713  for(size_t i = 0; i < chars.len; ++i)
714  {
715  if(str[0] == chars.str[i])
716  {
717  return true;
718  }
719  }
720  return false;
721  }
722 
723  /** true if the last character of the string is @p c */
724  bool ends_with(const C c) const
725  {
726  return len > 0 ? str[len-1] == c : false;
727  }
728 
729  /** true if the last @p num characters of the string are @p c */
730  bool ends_with(const C c, size_t num) const
731  {
732  if(len < num)
733  {
734  return false;
735  }
736  for(size_t i = len - num; i < len; ++i)
737  {
738  if(str[i] != c)
739  {
740  return false;
741  }
742  }
743  return true;
744  }
745 
746  /** true if the string ends with the given @p pattern */
747  bool ends_with(ro_substr pattern) const
748  {
749  if(len < pattern.len)
750  {
751  return false;
752  }
753  for(size_t i = 0, s = len-pattern.len; i < pattern.len; ++i)
754  {
755  if(str[s+i] != pattern[i])
756  {
757  return false;
758  }
759  }
760  return true;
761  }
762 
763  /** true if the last character of the string is any of the given @p chars */
764  bool ends_with_any(ro_substr chars) const
765  {
766  if(len == 0)
767  {
768  return false;
769  }
770  for(size_t i = 0; i < chars.len; ++i)
771  {
772  if(str[len - 1] == chars[i])
773  {
774  return true;
775  }
776  }
777  return false;
778  }
779 
780 public:
781 
782  /** @return the first position where c is found in the string, or npos if none is found */
783  size_t first_of(const C c, size_t start=0) const
784  {
785  C4_ASSERT(start == npos || (start >= 0 && start <= len));
786  for(size_t i = start; i < len; ++i)
787  {
788  if(str[i] == c)
789  return i;
790  }
791  return npos;
792  }
793 
794  /** @return the last position where c is found in the string, or npos if none is found */
795  size_t last_of(const C c, size_t start=npos) const
796  {
797  C4_ASSERT(start == npos || (start >= 0 && start <= len));
798  if(start == npos)
799  start = len;
800  for(size_t i = start-1; i != size_t(-1); --i)
801  {
802  if(str[i] == c)
803  return i;
804  }
805  return npos;
806  }
807 
808  /** @return the first position where ANY of the chars is found in the string, or npos if none is found */
809  size_t first_of(ro_substr chars, size_t start=0) const
810  {
811  C4_ASSERT(start == npos || (start >= 0 && start <= len));
812  for(size_t i = start; i < len; ++i)
813  {
814  for(size_t j = 0; j < chars.len; ++j)
815  {
816  if(str[i] == chars[j])
817  return i;
818  }
819  }
820  return npos;
821  }
822 
823  /** @return the last position where ANY of the chars is found in the string, or npos if none is found */
824  size_t last_of(ro_substr chars, size_t start=npos) const
825  {
826  C4_ASSERT(start == npos || (start >= 0 && start <= len));
827  if(start == npos)
828  start = len;
829  for(size_t i = start-1; i != size_t(-1); --i)
830  {
831  for(size_t j = 0; j < chars.len; ++j)
832  {
833  if(str[i] == chars[j])
834  return i;
835  }
836  }
837  return npos;
838  }
839 
840 public:
841 
842  size_t first_not_of(const C c) const
843  {
844  for(size_t i = 0; i < len; ++i)
845  {
846  if(str[i] != c)
847  return i;
848  }
849  return npos;
850  }
851 
852  size_t first_not_of(const C c, size_t start) const
853  {
854  C4_ASSERT((start >= 0 && start <= len) || (start == len && len == 0));
855  for(size_t i = start; i < len; ++i)
856  {
857  if(str[i] != c)
858  return i;
859  }
860  return npos;
861  }
862 
863  size_t last_not_of(const C c) const
864  {
865  for(size_t i = len-1; i != size_t(-1); --i)
866  {
867  if(str[i] != c)
868  return i;
869  }
870  return npos;
871  }
872 
873  size_t last_not_of(const C c, size_t start) const
874  {
875  C4_ASSERT(start == npos || (start >= 0 && start <= len));
876  if(start == npos)
877  start = len;
878  for(size_t i = start-1; i != size_t(-1); --i)
879  {
880  if(str[i] != c)
881  return i;
882  }
883  return npos;
884  }
885 
886  size_t first_not_of(ro_substr chars) const
887  {
888  for(size_t i = 0; i < len; ++i)
889  {
890  bool gotit = true;
891  for(size_t j = 0; j < chars.len; ++j)
892  {
893  if(str[i] == chars.str[j])
894  {
895  gotit = false;
896  break;
897  }
898  }
899  if(gotit)
900  {
901  return i;
902  }
903  }
904  return npos;
905  }
906 
907  size_t first_not_of(ro_substr chars, size_t start) const
908  {
909  C4_ASSERT((start >= 0 && start <= len) || (start == len && len == 0));
910  for(size_t i = start; i < len; ++i)
911  {
912  bool gotit = true;
913  for(size_t j = 0; j < chars.len; ++j)
914  {
915  if(str[i] == chars.str[j])
916  {
917  gotit = false;
918  break;
919  }
920  }
921  if(gotit)
922  {
923  return i;
924  }
925  }
926  return npos;
927  }
928 
929  size_t last_not_of(ro_substr chars) const
930  {
931  for(size_t i = len-1; i != size_t(-1); --i)
932  {
933  bool gotit = true;
934  for(size_t j = 0; j < chars.len; ++j)
935  {
936  if(str[i] == chars.str[j])
937  {
938  gotit = false;
939  break;
940  }
941  }
942  if(gotit)
943  {
944  return i;
945  }
946  }
947  return npos;
948  }
949 
950  size_t last_not_of(ro_substr chars, size_t start) const
951  {
952  C4_ASSERT(start == npos || (start >= 0 && start <= len));
953  if(start == npos)
954  start = len;
955  for(size_t i = start-1; i != size_t(-1); --i)
956  {
957  bool gotit = true;
958  for(size_t j = 0; j < chars.len; ++j)
959  {
960  if(str[i] == chars.str[j])
961  {
962  gotit = false;
963  break;
964  }
965  }
966  if(gotit)
967  {
968  return i;
969  }
970  }
971  return npos;
972  }
973 
974  /** @} */
975 
976 public:
977 
978  /** @name Range lookup methods */
979  /** @{ */
980 
981  /** get the range delimited by an open-close pair of characters.
982  * @note There must be no nested pairs.
983  * @note No checks for escapes are performed. */
984  basic_substring pair_range(CC open, CC close) const
985  {
986  size_t b = find(open);
987  if(b == npos)
988  return basic_substring();
989  size_t e = find(close, b+1);
990  if(e == npos)
991  return basic_substring();
992  basic_substring ret = range(b, e+1);
993  C4_ASSERT(ret.sub(1).find(open) == npos);
994  return ret;
995  }
996 
997  /** get the range delimited by a single open-close character (eg, quotes).
998  * @note The open-close character can be escaped. */
999  basic_substring pair_range_esc(CC open_close, CC escape=CC('\\'))
1000  {
1001  size_t b = find(open_close);
1002  if(b == npos) return basic_substring();
1003  for(size_t i = b+1; i < len; ++i)
1004  {
1005  CC c = str[i];
1006  if(c == open_close)
1007  {
1008  if(str[i-1] != escape)
1009  {
1010  return range(b, i+1);
1011  }
1012  }
1013  }
1014  return basic_substring();
1015  }
1016 
1017  /** get the range delimited by an open-close pair of characters,
1018  * with possibly nested occurrences. No checks for escapes are
1019  * performed. */
1021  {
1022  size_t b = find(open);
1023  if(b == npos) return basic_substring();
1024  size_t e, curr = b+1, count = 0;
1025  const char both[] = {open, close, '\0'};
1026  while((e = first_of(both, curr)) != npos)
1027  {
1028  if(str[e] == open)
1029  {
1030  ++count;
1031  curr = e+1;
1032  }
1033  else if(str[e] == close)
1034  {
1035  if(count == 0) return range(b, e+1);
1036  --count;
1037  curr = e+1;
1038  }
1039  }
1040  return basic_substring();
1041  }
1042 
1044  {
1045  constexpr const C dq('"'), sq('\'');
1046  if(len >= 2 && (str[len - 2] != C('\\')) &&
1047  ((begins_with(sq) && ends_with(sq))
1048  ||
1049  (begins_with(dq) && ends_with(dq))))
1050  {
1051  return range(1, len -1);
1052  }
1053  return *this;
1054  }
1055 
1056  /** @} */
1057 
1058 public:
1059 
1060  /** @name Number-matching query methods */
1061  /** @{ */
1062 
1063  /** @return true if the substring contents are a floating-point or integer number.
1064  * @note any leading or trailing whitespace will return false. */
1065  bool is_number() const
1066  {
1067  if(empty() || (first_non_empty_span().empty()))
1068  return false;
1069  if(first_uint_span() == *this)
1070  return true;
1071  if(first_int_span() == *this)
1072  return true;
1073  if(first_real_span() == *this)
1074  return true;
1075  return false;
1076  }
1077 
1078  /** @return true if the substring contents are a real number.
1079  * @note any leading or trailing whitespace will return false. */
1080  bool is_real() const
1081  {
1082  if(empty() || (first_non_empty_span().empty()))
1083  return false;
1084  if(first_real_span() == *this)
1085  return true;
1086  return false;
1087  }
1088 
1089  /** @return true if the substring contents are an integer number.
1090  * @note any leading or trailing whitespace will return false. */
1091  bool is_integer() const
1092  {
1093  if(empty() || (first_non_empty_span().empty()))
1094  return false;
1095  if(first_uint_span() == *this)
1096  return true;
1097  if(first_int_span() == *this)
1098  return true;
1099  return false;
1100  }
1101 
1102  /** @return true if the substring contents are an unsigned integer number.
1103  * @note any leading or trailing whitespace will return false. */
1104  bool is_unsigned_integer() const
1105  {
1106  if(empty() || (first_non_empty_span().empty()))
1107  return false;
1108  if(first_uint_span() == *this)
1109  return true;
1110  return false;
1111  }
1112 
1113  /** get the first span consisting exclusively of non-empty characters */
1115  {
1116  constexpr const ro_substr empty_chars(" \n\r\t");
1117  size_t pos = first_not_of(empty_chars);
1118  if(pos == npos)
1119  return first(0);
1120  auto ret = sub(pos);
1121  pos = ret.first_of(empty_chars);
1122  return ret.first(pos);
1123  }
1124 
1125  /** get the first span which can be interpreted as an unsigned integer */
1127  {
1128  basic_substring ne = first_non_empty_span();
1129  if(ne.empty())
1130  return ne;
1131  if(ne.str[0] == '-')
1132  return first(0);
1133  size_t skip_start = size_t(ne.str[0] == '+');
1134  return ne._first_integral_span(skip_start);
1135  }
1136 
1137  /** get the first span which can be interpreted as a signed integer */
1139  {
1140  basic_substring ne = first_non_empty_span();
1141  if(ne.empty())
1142  return ne;
1143  size_t skip_start = size_t(ne.str[0] == '+' || ne.str[0] == '-');
1144  return ne._first_integral_span(skip_start);
1145  }
1146 
1147  basic_substring _first_integral_span(size_t skip_start) const
1148  {
1149  C4_ASSERT(!empty());
1150  if(skip_start == len)
1151  return first(0);
1152  C4_ASSERT(skip_start < len);
1153  if(len >= skip_start + 3)
1154  {
1155  if(str[skip_start] != '0')
1156  {
1157  for(size_t i = skip_start; i < len; ++i)
1158  {
1159  char c = str[i];
1160  if(c < '0' || c > '9')
1161  return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
1162  }
1163  }
1164  else
1165  {
1166  char next = str[skip_start + 1];
1167  if(next == 'x' || next == 'X')
1168  {
1169  skip_start += 2;
1170  for(size_t i = skip_start; i < len; ++i)
1171  {
1172  const char c = str[i];
1173  if( ! _is_hex_char(c))
1174  return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
1175  }
1176  return *this;
1177  }
1178  else if(next == 'b' || next == 'B')
1179  {
1180  skip_start += 2;
1181  for(size_t i = skip_start; i < len; ++i)
1182  {
1183  const char c = str[i];
1184  if(c != '0' && c != '1')
1185  return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
1186  }
1187  return *this;
1188  }
1189  else if(next == 'o' || next == 'O')
1190  {
1191  skip_start += 2;
1192  for(size_t i = skip_start; i < len; ++i)
1193  {
1194  const char c = str[i];
1195  if(c < '0' || c > '7')
1196  return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
1197  }
1198  return *this;
1199  }
1200  }
1201  }
1202  // must be a decimal, or it is not a an number
1203  for(size_t i = skip_start; i < len; ++i)
1204  {
1205  const char c = str[i];
1206  if(c < '0' || c > '9')
1207  return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
1208  }
1209  return *this;
1210  }
1211 
1212  /** get the first span which can be interpreted as a real (floating-point) number */
1214  {
1215  basic_substring ne = first_non_empty_span();
1216  if(ne.empty())
1217  return ne;
1218  const size_t skip_start = (ne.str[0] == '+' || ne.str[0] == '-');
1219  C4_ASSERT(skip_start == 0 || skip_start == 1);
1220  // if we have at least three digits after the leading sign, it
1221  // can be decimal, or hex, or bin or oct. Ex:
1222  // non-decimal: 0x0, 0b0, 0o0
1223  // decimal: 1.0, 10., 1e1, 100, inf, nan, infinity
1224  if(ne.len >= skip_start+3)
1225  {
1226  // if it does not have leading 0, it must be decimal, or it is not a real
1227  if(ne.str[skip_start] != '0')
1228  {
1229  if(ne.str[skip_start] == 'i') // is it infinity or inf?
1230  {
1231  basic_substring word = ne._word_follows(skip_start + 1, "nfinity");
1232  if(word.len)
1233  return word;
1234  return ne._word_follows(skip_start + 1, "nf");
1235  }
1236  else if(ne.str[skip_start] == 'n') // is it nan?
1237  {
1238  return ne._word_follows(skip_start + 1, "an");
1239  }
1240  else // must be a decimal, or it is not a real
1241  {
1242  return ne._first_real_span_dec(skip_start);
1243  }
1244  }
1245  else // starts with 0. is it 0x, 0b or 0o?
1246  {
1247  const char next = ne.str[skip_start + 1];
1248  // hexadecimal
1249  if(next == 'x' || next == 'X')
1250  return ne._first_real_span_hex(skip_start + 2);
1251  // binary
1252  else if(next == 'b' || next == 'B')
1253  return ne._first_real_span_bin(skip_start + 2);
1254  // octal
1255  else if(next == 'o' || next == 'O')
1256  return ne._first_real_span_oct(skip_start + 2);
1257  // none of the above. may still be a decimal.
1258  else
1259  return ne._first_real_span_dec(skip_start); // do not skip the 0.
1260  }
1261  }
1262  // less than 3 chars after the leading sign. It is either a
1263  // decimal or it is not a real. (cannot be any of 0x0, etc).
1264  return ne._first_real_span_dec(skip_start);
1265  }
1266 
1267  /** true if the character is a delimiter character *at the end* */
1268  static constexpr C4_ALWAYS_INLINE C4_CONST bool _is_delim_char(char c) noexcept
1269  {
1270  return c == ' ' || c == '\n'
1271  || c == ']' || c == ')' || c == '}'
1272  || c == ',' || c == ';' || c == '\r' || c == '\t' || c == '\0';
1273  }
1274 
1275  /** true if the character is in [0-9a-fA-F] */
1276  static constexpr C4_ALWAYS_INLINE C4_CONST bool _is_hex_char(char c) noexcept
1277  {
1278  return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
1279  }
1280 
1281  C4_NO_INLINE C4_PURE basic_substring _word_follows(size_t pos, csubstr word) const noexcept
1282  {
1283  size_t posend = pos + word.len;
1284  if(len >= posend && sub(pos, word.len) == word)
1285  if(len == posend || _is_delim_char(str[posend]))
1286  return first(posend);
1287  return first(0);
1288  }
1289 
1290  // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
1291  C4_NO_INLINE C4_PURE basic_substring _first_real_span_dec(size_t pos) const noexcept
1292  {
1293  bool intchars = false;
1294  bool fracchars = false;
1295  bool powchars;
1296  // integral part
1297  for( ; pos < len; ++pos)
1298  {
1299  const char c = str[pos];
1300  if(c >= '0' && c <= '9')
1301  {
1302  intchars = true;
1303  }
1304  else if(c == '.')
1305  {
1306  ++pos;
1307  goto fractional_part_dec; // NOLINT
1308  }
1309  else if(c == 'e' || c == 'E')
1310  {
1311  ++pos;
1312  goto power_part_dec; // NOLINT
1313  }
1314  else if(_is_delim_char(c))
1315  {
1316  return intchars ? first(pos) : first(0);
1317  }
1318  else
1319  {
1320  return first(0);
1321  }
1322  }
1323  // no . or p were found; this is either an integral number
1324  // or not a number at all
1325  return intchars ?
1326  *this :
1327  first(0);
1328  fractional_part_dec:
1329  C4_ASSERT(pos > 0);
1330  C4_ASSERT(str[pos - 1] == '.');
1331  for( ; pos < len; ++pos)
1332  {
1333  const char c = str[pos];
1334  if(c >= '0' && c <= '9')
1335  {
1336  fracchars = true;
1337  }
1338  else if(c == 'e' || c == 'E')
1339  {
1340  ++pos;
1341  goto power_part_dec; // NOLINT
1342  }
1343  else if(_is_delim_char(c))
1344  {
1345  return intchars || fracchars ? first(pos) : first(0);
1346  }
1347  else
1348  {
1349  return first(0);
1350  }
1351  }
1352  return intchars || fracchars ?
1353  *this :
1354  first(0);
1355  power_part_dec:
1356  C4_ASSERT(pos > 0);
1357  C4_ASSERT(str[pos - 1] == 'e' || str[pos - 1] == 'E');
1358  // either digits, or +, or - are expected here, followed by more digits.
1359  if((len == pos) || ((!intchars) && (!fracchars)))
1360  return first(0);
1361  if(str[pos] == '-' || str[pos] == '+')
1362  ++pos; // skip the sign
1363  powchars = false;
1364  for( ; pos < len; ++pos)
1365  {
1366  const char c = str[pos];
1367  if(c >= '0' && c <= '9')
1368  powchars = true;
1369  else if(powchars && _is_delim_char(c))
1370  return first(pos);
1371  else
1372  return first(0);
1373  }
1374  return powchars ? *this : first(0);
1375  }
1376 
1377  // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
1378  C4_NO_INLINE C4_PURE basic_substring _first_real_span_hex(size_t pos) const noexcept
1379  {
1380  bool intchars = false;
1381  bool fracchars = false;
1382  bool powchars;
1383  // integral part
1384  for( ; pos < len; ++pos)
1385  {
1386  const char c = str[pos];
1387  if(_is_hex_char(c))
1388  {
1389  intchars = true;
1390  }
1391  else if(c == '.')
1392  {
1393  ++pos;
1394  goto fractional_part_hex; // NOLINT
1395  }
1396  else if(c == 'p' || c == 'P')
1397  {
1398  ++pos;
1399  goto power_part_hex; // NOLINT
1400  }
1401  else if(_is_delim_char(c))
1402  {
1403  return intchars ? first(pos) : first(0);
1404  }
1405  else
1406  {
1407  return first(0);
1408  }
1409  }
1410  // no . or p were found; this is either an integral number
1411  // or not a number at all
1412  return intchars ?
1413  *this :
1414  first(0);
1415  fractional_part_hex:
1416  C4_ASSERT(pos > 0);
1417  C4_ASSERT(str[pos - 1] == '.');
1418  for( ; pos < len; ++pos)
1419  {
1420  const char c = str[pos];
1421  if(_is_hex_char(c))
1422  {
1423  fracchars = true;
1424  }
1425  else if(c == 'p' || c == 'P')
1426  {
1427  ++pos;
1428  goto power_part_hex; // NOLINT
1429  }
1430  else if(_is_delim_char(c))
1431  {
1432  return intchars || fracchars ? first(pos) : first(0);
1433  }
1434  else
1435  {
1436  return first(0);
1437  }
1438  }
1439  return intchars || fracchars ?
1440  *this :
1441  first(0);
1442  power_part_hex:
1443  C4_ASSERT(pos > 0);
1444  C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P');
1445  // either a + or a - is expected here, followed by more chars.
1446  // also, using (pos+1) in this check will cause an early
1447  // return when no more chars follow the sign.
1448  if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars)))
1449  return first(0);
1450  ++pos; // this was the sign.
1451  // ... so the (pos+1) ensures that we enter the loop and
1452  // hence that there exist chars in the power part
1453  powchars = false;
1454  for( ; pos < len; ++pos)
1455  {
1456  const char c = str[pos];
1457  if(c >= '0' && c <= '9')
1458  powchars = true;
1459  else if(powchars && _is_delim_char(c))
1460  return first(pos);
1461  else
1462  return first(0);
1463  }
1464  return *this;
1465  }
1466 
1467  // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
1468  C4_NO_INLINE C4_PURE basic_substring _first_real_span_bin(size_t pos) const noexcept
1469  {
1470  bool intchars = false;
1471  bool fracchars = false;
1472  bool powchars;
1473  // integral part
1474  for( ; pos < len; ++pos)
1475  {
1476  const char c = str[pos];
1477  if(c == '0' || c == '1')
1478  {
1479  intchars = true;
1480  }
1481  else if(c == '.')
1482  {
1483  ++pos;
1484  goto fractional_part_bin; // NOLINT
1485  }
1486  else if(c == 'p' || c == 'P')
1487  {
1488  ++pos;
1489  goto power_part_bin; // NOLINT
1490  }
1491  else if(_is_delim_char(c))
1492  {
1493  return intchars ? first(pos) : first(0);
1494  }
1495  else
1496  {
1497  return first(0);
1498  }
1499  }
1500  // no . or p were found; this is either an integral number
1501  // or not a number at all
1502  return intchars ?
1503  *this :
1504  first(0);
1505  fractional_part_bin:
1506  C4_ASSERT(pos > 0);
1507  C4_ASSERT(str[pos - 1] == '.');
1508  for( ; pos < len; ++pos)
1509  {
1510  const char c = str[pos];
1511  if(c == '0' || c == '1')
1512  {
1513  fracchars = true;
1514  }
1515  else if(c == 'p' || c == 'P')
1516  {
1517  ++pos;
1518  goto power_part_bin; // NOLINT
1519  }
1520  else if(_is_delim_char(c))
1521  {
1522  return intchars || fracchars ? first(pos) : first(0);
1523  }
1524  else
1525  {
1526  return first(0);
1527  }
1528  }
1529  return intchars || fracchars ?
1530  *this :
1531  first(0);
1532  power_part_bin:
1533  C4_ASSERT(pos > 0);
1534  C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P');
1535  // either a + or a - is expected here, followed by more chars.
1536  // also, using (pos+1) in this check will cause an early
1537  // return when no more chars follow the sign.
1538  if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars)))
1539  return first(0);
1540  ++pos; // this was the sign.
1541  // ... so the (pos+1) ensures that we enter the loop and
1542  // hence that there exist chars in the power part
1543  powchars = false;
1544  for( ; pos < len; ++pos)
1545  {
1546  const char c = str[pos];
1547  if(c >= '0' && c <= '9')
1548  powchars = true;
1549  else if(powchars && _is_delim_char(c))
1550  return first(pos);
1551  else
1552  return first(0);
1553  }
1554  return *this;
1555  }
1556 
1557  // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
1558  C4_NO_INLINE C4_PURE basic_substring _first_real_span_oct(size_t pos) const noexcept
1559  {
1560  bool intchars = false;
1561  bool fracchars = false;
1562  bool powchars;
1563  // integral part
1564  for( ; pos < len; ++pos)
1565  {
1566  const char c = str[pos];
1567  if(c >= '0' && c <= '7')
1568  {
1569  intchars = true;
1570  }
1571  else if(c == '.')
1572  {
1573  ++pos;
1574  goto fractional_part_oct; // NOLINT
1575  }
1576  else if(c == 'p' || c == 'P')
1577  {
1578  ++pos;
1579  goto power_part_oct; // NOLINT
1580  }
1581  else if(_is_delim_char(c))
1582  {
1583  return intchars ? first(pos) : first(0);
1584  }
1585  else
1586  {
1587  return first(0);
1588  }
1589  }
1590  // no . or p were found; this is either an integral number
1591  // or not a number at all
1592  return intchars ?
1593  *this :
1594  first(0);
1595  fractional_part_oct:
1596  C4_ASSERT(pos > 0);
1597  C4_ASSERT(str[pos - 1] == '.');
1598  for( ; pos < len; ++pos)
1599  {
1600  const char c = str[pos];
1601  if(c >= '0' && c <= '7')
1602  {
1603  fracchars = true;
1604  }
1605  else if(c == 'p' || c == 'P')
1606  {
1607  ++pos;
1608  goto power_part_oct; // NOLINT
1609  }
1610  else if(_is_delim_char(c))
1611  {
1612  return intchars || fracchars ? first(pos) : first(0);
1613  }
1614  else
1615  {
1616  return first(0);
1617  }
1618  }
1619  return intchars || fracchars ?
1620  *this :
1621  first(0);
1622  power_part_oct:
1623  C4_ASSERT(pos > 0);
1624  C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P');
1625  // either a + or a - is expected here, followed by more chars.
1626  // also, using (pos+1) in this check will cause an early
1627  // return when no more chars follow the sign.
1628  if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars)))
1629  return first(0);
1630  ++pos; // this was the sign.
1631  // ... so the (pos+1) ensures that we enter the loop and
1632  // hence that there exist chars in the power part
1633  powchars = false;
1634  for( ; pos < len; ++pos)
1635  {
1636  const char c = str[pos];
1637  if(c >= '0' && c <= '9')
1638  powchars = true;
1639  else if(powchars && _is_delim_char(c))
1640  return first(pos);
1641  else
1642  return first(0);
1643  }
1644  return *this;
1645  }
1646 
1647  /** @} */
1648 
1649 public:
1650 
1651  /** @name Splitting methods */
1652  /** @{ */
1653 
1654  /** returns true if the string has not been exhausted yet, meaning
1655  * it's ok to call next_split() again. When no instance of sep
1656  * exists in the string, returns the full string. When the input
1657  * is an empty string, the output string is the empty string. */
1658  bool next_split(C sep, size_t *C4_RESTRICT start_pos, basic_substring *C4_RESTRICT out) const
1659  {
1660  if(C4_LIKELY(*start_pos < len))
1661  {
1662  for(size_t i = *start_pos; i < len; i++)
1663  {
1664  if(str[i] == sep)
1665  {
1666  out->assign(str + *start_pos, i - *start_pos);
1667  *start_pos = i+1;
1668  return true;
1669  }
1670  }
1671  out->assign(str + *start_pos, len - *start_pos);
1672  *start_pos = len + 1;
1673  return true;
1674  }
1675  else
1676  {
1677  bool valid = len > 0 && (*start_pos == len);
1678  if(valid && str && str[len-1] == sep)
1679  {
1680  out->assign(str + len, size_t(0)); // the cast is needed to prevent overload ambiguity
1681  }
1682  else
1683  {
1684  out->assign(str + len + 1, size_t(0)); // the cast is needed to prevent overload ambiguity
1685  }
1686  *start_pos = len + 1;
1687  return valid;
1688  }
1689  }
1690 
1691 private:
1692 
1693  struct split_proxy_impl
1694  {
1696  {
1697  split_proxy_impl const* m_proxy;
1699  size_t m_pos;
1701 
1702  split_iterator_impl(split_proxy_impl const* proxy, size_t pos, C sep)
1703  : m_proxy(proxy), m_pos(pos), m_sep(sep)
1704  {
1705  _tick();
1706  }
1707 
1708  void _tick()
1709  {
1710  m_proxy->m_str.next_split(m_sep, &m_pos, &m_str);
1711  }
1712 
1713  split_iterator_impl& operator++ () { _tick(); return *this; }
1714  split_iterator_impl operator++ (int) { split_iterator_impl it = *this; _tick(); return it; } // NOLINT
1715 
1716  basic_substring& operator* () { return m_str; }
1717  basic_substring* operator-> () { return &m_str; }
1718 
1719  bool operator!= (split_iterator_impl const& that) const
1720  {
1721  return !(this->operator==(that));
1722  }
1723  bool operator== (split_iterator_impl const& that) const
1724  {
1725  C4_XASSERT((m_sep == that.m_sep) && "cannot compare split iterators with different separators");
1726  if(m_str.size() != that.m_str.size())
1727  return false;
1728  if(m_str.data() != that.m_str.data())
1729  return false;
1730  return m_pos == that.m_pos;
1731  }
1732  };
1733 
1734  basic_substring m_str;
1735  size_t m_start_pos;
1736  C m_sep;
1737 
1738  split_proxy_impl(basic_substring str_, size_t start_pos, C sep)
1739  : m_str(str_), m_start_pos(start_pos), m_sep(sep)
1740  {
1741  }
1742 
1743  split_iterator_impl begin() const
1744  {
1745  auto it = split_iterator_impl(this, m_start_pos, m_sep);
1746  return it;
1747  }
1748  split_iterator_impl end() const
1749  {
1750  size_t pos = m_str.size() + 1;
1751  auto it = split_iterator_impl(this, pos, m_sep);
1752  return it;
1753  }
1754  };
1755 
1756 public:
1757 
1758  using split_proxy = split_proxy_impl;
1759 
1760  /** a view into the splits */
1761  split_proxy split(C sep, size_t start_pos=0) const
1762  {
1763  C4_XASSERT((start_pos >= 0 && start_pos < len) || empty());
1764  auto ss = sub(0, len);
1765  auto it = split_proxy(ss, start_pos, sep);
1766  return it;
1767  }
1768 
1769 public:
1770 
1771  /** pop right: return the first split from the right. Use
1772  * gpop_left() to get the reciprocal part.
1773  */
1774  basic_substring pop_right(C sep=C('/'), bool skip_empty=false) const
1775  {
1776  if(C4_LIKELY(len > 1))
1777  {
1778  auto pos = last_of(sep);
1779  if(pos != npos)
1780  {
1781  if(pos + 1 < len) // does not end with sep
1782  {
1783  return sub(pos + 1); // return from sep to end
1784  }
1785  else // the string ends with sep
1786  {
1787  if( ! skip_empty)
1788  {
1789  return sub(pos + 1, 0);
1790  }
1791  auto ppos = last_not_of(sep); // skip repeated seps
1792  if(ppos == npos) // the string is all made of seps
1793  {
1794  return sub(0, 0);
1795  }
1796  // find the previous sep
1797  auto pos0 = last_of(sep, ppos);
1798  if(pos0 == npos) // only the last sep exists
1799  {
1800  return sub(0); // return the full string (because skip_empty is true)
1801  }
1802  ++pos0;
1803  return sub(pos0);
1804  }
1805  }
1806  else // no sep was found, return the full string
1807  {
1808  return *this;
1809  }
1810  }
1811  else if(len == 1)
1812  {
1813  if(begins_with(sep))
1814  {
1815  return sub(0, 0);
1816  }
1817  return *this;
1818  }
1819  else // an empty string
1820  {
1821  return basic_substring();
1822  }
1823  }
1824 
1825  /** return the first split from the left. Use gpop_right() to get
1826  * the reciprocal part. */
1827  basic_substring pop_left(C sep = C('/'), bool skip_empty=false) const
1828  {
1829  if(C4_LIKELY(len > 1))
1830  {
1831  auto pos = first_of(sep);
1832  if(pos != npos)
1833  {
1834  if(pos > 0) // does not start with sep
1835  {
1836  return sub(0, pos); // return everything up to it
1837  }
1838  else // the string starts with sep
1839  {
1840  if( ! skip_empty)
1841  {
1842  return sub(0, 0);
1843  }
1844  auto ppos = first_not_of(sep); // skip repeated seps
1845  if(ppos == npos) // the string is all made of seps
1846  {
1847  return sub(0, 0);
1848  }
1849  // find the next sep
1850  auto pos0 = first_of(sep, ppos);
1851  if(pos0 == npos) // only the first sep exists
1852  {
1853  return sub(0); // return the full string (because skip_empty is true)
1854  }
1855  C4_XASSERT(pos0 > 0);
1856  // return everything up to the second sep
1857  return sub(0, pos0);
1858  }
1859  }
1860  else // no sep was found, return the full string
1861  {
1862  return sub(0);
1863  }
1864  }
1865  else if(len == 1)
1866  {
1867  if(begins_with(sep))
1868  {
1869  return sub(0, 0);
1870  }
1871  return sub(0);
1872  }
1873  else // an empty string
1874  {
1875  return basic_substring();
1876  }
1877  }
1878 
1879 public:
1880 
1881  /** greedy pop left. eg, csubstr("a/b/c").gpop_left('/')="c" */
1882  basic_substring gpop_left(C sep = C('/'), bool skip_empty=false) const
1883  {
1884  auto ss = pop_right(sep, skip_empty);
1885  ss = left_of(ss);
1886  if(ss.find(sep) != npos)
1887  {
1888  if(ss.ends_with(sep))
1889  {
1890  if(skip_empty)
1891  {
1892  ss = ss.trimr(sep);
1893  }
1894  else
1895  {
1896  ss = ss.sub(0, ss.len-1); // safe to subtract because ends_with(sep) is true
1897  }
1898  }
1899  }
1900  return ss;
1901  }
1902 
1903  /** greedy pop right. eg, csubstr("a/b/c").gpop_right('/')="a" */
1904  basic_substring gpop_right(C sep = C('/'), bool skip_empty=false) const
1905  {
1906  auto ss = pop_left(sep, skip_empty);
1907  ss = right_of(ss);
1908  if(ss.find(sep) != npos)
1909  {
1910  if(ss.begins_with(sep))
1911  {
1912  if(skip_empty)
1913  {
1914  ss = ss.triml(sep);
1915  }
1916  else
1917  {
1918  ss = ss.sub(1);
1919  }
1920  }
1921  }
1922  return ss;
1923  }
1924 
1925  /** @} */
1926 
1927 public:
1928 
1929  /** @name Path-like manipulation methods */
1930  /** @{ */
1931 
1932  basic_substring basename(C sep=C('/')) const
1933  {
1934  auto ss = pop_right(sep, /*skip_empty*/true);
1935  ss = ss.trimr(sep);
1936  return ss;
1937  }
1938 
1939  basic_substring dirname(C sep=C('/')) const
1940  {
1941  auto ss = basename(sep);
1942  ss = ss.empty() ? *this : left_of(ss);
1943  return ss;
1944  }
1945 
1946  C4_ALWAYS_INLINE basic_substring name_wo_extshort() const
1947  {
1948  return gpop_left('.');
1949  }
1950 
1951  C4_ALWAYS_INLINE basic_substring name_wo_extlong() const
1952  {
1953  return pop_left('.');
1954  }
1955 
1956  C4_ALWAYS_INLINE basic_substring extshort() const
1957  {
1958  return pop_right('.');
1959  }
1960 
1961  C4_ALWAYS_INLINE basic_substring extlong() const
1962  {
1963  return gpop_right('.');
1964  }
1965 
1966  /** @} */
1967 
1968 public:
1969 
1970  /** @name Content-modification methods (only for non-const C) */
1971  /** @{ */
1972 
1973  /** convert the string to upper-case
1974  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
1975  C4_REQUIRE_RW(void) toupper()
1976  {
1977  for(size_t i = 0; i < len; ++i)
1978  {
1979  str[i] = static_cast<C>(::toupper(str[i]));
1980  }
1981  }
1982 
1983  /** convert the string to lower-case
1984  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
1985  C4_REQUIRE_RW(void) tolower()
1986  {
1987  for(size_t i = 0; i < len; ++i)
1988  {
1989  str[i] = static_cast<C>(::tolower(str[i]));
1990  }
1991  }
1992 
1993 public:
1994 
1995  /** fill the entire contents with the given @p val
1996  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
1997  C4_REQUIRE_RW(void) fill(C val)
1998  {
1999  for(size_t i = 0; i < len; ++i)
2000  {
2001  str[i] = val;
2002  }
2003  }
2004 
2005 public:
2006 
2007  /** set the current substring to a copy of the given csubstr
2008  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2009  C4_REQUIRE_RW(void) copy_from(ro_substr that, size_t ifirst=0, size_t num=npos)
2010  {
2011  C4_ASSERT(ifirst >= 0 && ifirst <= len);
2012  num = num != npos ? num : len - ifirst;
2013  num = num < that.len ? num : that.len;
2014  C4_ASSERT(ifirst + num >= 0 && ifirst + num <= len);
2015  // calling memcpy with null strings is undefined behavior
2016  // and will wreak havoc in calling code's branches.
2017  // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
2018  if(num)
2019  memcpy(str + sizeof(C) * ifirst, that.str, sizeof(C) * num);
2020  }
2021 
2022 public:
2023 
2024  /** reverse in place
2025  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2026  C4_REQUIRE_RW(void) reverse()
2027  {
2028  if(len == 0) return;
2029  detail::_do_reverse(str, str + len - 1);
2030  }
2031 
2032  /** revert a subpart in place
2033  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2034  C4_REQUIRE_RW(void) reverse_sub(size_t ifirst, size_t num)
2035  {
2036  C4_ASSERT(ifirst >= 0 && ifirst <= len);
2037  C4_ASSERT(ifirst + num >= 0 && ifirst + num <= len);
2038  if(num == 0) return;
2039  detail::_do_reverse(str + ifirst, str + ifirst + num - 1);
2040  }
2041 
2042  /** revert a range in place
2043  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2044  C4_REQUIRE_RW(void) reverse_range(size_t ifirst, size_t ilast)
2045  {
2046  C4_ASSERT(ifirst >= 0 && ifirst <= len);
2047  C4_ASSERT(ilast >= 0 && ilast <= len);
2048  if(ifirst == ilast) return;
2049  detail::_do_reverse(str + ifirst, str + ilast - 1);
2050  }
2051 
2052 public:
2053 
2054  /** erase part of the string. eg, with char s[] = "0123456789",
2055  * substr(s).erase(3, 2) = "01256789", and s is now "01245678989"
2056  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2057  C4_REQUIRE_RW(basic_substring) erase(size_t pos, size_t num)
2058  {
2059  C4_ASSERT(pos >= 0 && pos+num <= len);
2060  size_t num_to_move = len - pos - num;
2061  memmove(str + pos, str + pos + num, sizeof(C) * num_to_move);
2062  return basic_substring{str, len - num};
2063  }
2064 
2065  /** @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2066  C4_REQUIRE_RW(basic_substring) erase_range(size_t first, size_t last)
2067  {
2068  C4_ASSERT(first <= last);
2069  return erase(first, static_cast<size_t>(last-first)); // NOLINT
2070  }
2071 
2072  /** erase a part of the string.
2073  * @note @p sub must be a substring of this string
2074  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2075  C4_REQUIRE_RW(basic_substring) erase(ro_substr sub)
2076  {
2077  C4_ASSERT(is_super(sub));
2078  C4_ASSERT(sub.str >= str);
2079  return erase(static_cast<size_t>(sub.str - str), sub.len);
2080  }
2081 
2082 public:
2083 
2084  /** replace every occurrence of character @p value with the character @p repl
2085  * @return the number of characters that were replaced
2086  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2087  C4_REQUIRE_RW(size_t) replace(C value, C repl, size_t pos=0)
2088  {
2089  C4_ASSERT((pos >= 0 && pos <= len) || pos == npos);
2090  size_t did_it = 0;
2091  while((pos = find(value, pos)) != npos)
2092  {
2093  str[pos++] = repl;
2094  ++did_it;
2095  }
2096  return did_it;
2097  }
2098 
2099  /** replace every occurrence of each character in @p value with
2100  * the character @p repl.
2101  * @return the number of characters that were replaced
2102  * @note this method requires that the string memory is writeable and is SFINAEd out for const C */
2103  C4_REQUIRE_RW(size_t) replace(ro_substr chars, C repl, size_t pos=0)
2104  {
2105  C4_ASSERT((pos >= 0 && pos <= len) || pos == npos);
2106  size_t did_it = 0;
2107  while((pos = first_of(chars, pos)) != npos)
2108  {
2109  str[pos++] = repl;
2110  ++did_it;
2111  }
2112  return did_it;
2113  }
2114 
2115  /** replace @p pattern with @p repl, and write the result into
2116  * @p dst. pattern and repl don't need equal sizes.
2117  *
2118  * @return the required size for dst. No overflow occurs if
2119  * dst.len is smaller than the required size; this can be used to
2120  * determine the required size for an existing container. */
2121  size_t replace_all(rw_substr dst, ro_substr pattern, ro_substr repl, size_t pos=0) const
2122  {
2123  C4_ASSERT( ! pattern.empty()); //!< @todo relax this precondition
2124  C4_ASSERT( ! this ->overlaps(dst)); //!< @todo relax this precondition
2125  C4_ASSERT( ! pattern.overlaps(dst));
2126  C4_ASSERT( ! repl .overlaps(dst));
2127  C4_ASSERT((pos >= 0 && pos <= len) || pos == npos);
2128  C4_SUPPRESS_WARNING_GCC_PUSH
2129  C4_SUPPRESS_WARNING_GCC("-Warray-bounds") // gcc11 has a false positive here
2130  #if (!defined(__clang__)) && (defined(__GNUC__) && (__GNUC__ >= 7))
2131  C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow") // gcc11 has a false positive here
2132  #endif
2133  #define _c4append(first, last) \
2134  { \
2135  C4_ASSERT((last) >= (first)); \
2136  size_t num = static_cast<size_t>((last) - (first)); \
2137  if(num > 0 && sz + num <= dst.len) \
2138  { \
2139  memcpy(dst.str + sz, first, num * sizeof(C)); \
2140  } \
2141  sz += num; \
2142  }
2143  size_t sz = 0;
2144  size_t b = pos;
2145  _c4append(str, str + pos);
2146  do {
2147  size_t e = find(pattern, b);
2148  if(e == npos)
2149  {
2150  _c4append(str + b, str + len);
2151  break;
2152  }
2153  _c4append(str + b, str + e);
2154  _c4append(repl.begin(), repl.end());
2155  b = e + pattern.size();
2156  } while(b < len && b != npos);
2157  return sz;
2158  #undef _c4append
2159  C4_SUPPRESS_WARNING_GCC_POP
2160  }
2161 
2162  /** @} */
2163 
2164 }; // template class basic_substring
2165 
2166 
2167 #undef C4_REQUIRE_RW
2168 
2169 
2170 //-----------------------------------------------------------------------------
2171 //-----------------------------------------------------------------------------
2172 //-----------------------------------------------------------------------------
2173 
2174 
2175 /** @defgroup doc_substr_adapters substr adapters
2176  *
2177  * to_substr() and to_csubstr() is used in generic code like
2178  * format(), and allow adding construction of substrings from new
2179  * types like containers.
2180  * @{ */
2181 
2182 
2183 /** neutral version for use in generic code */
2184 C4_ALWAYS_INLINE substr to_substr(substr s) noexcept { return s; }
2185 /** neutral version for use in generic code */
2186 C4_ALWAYS_INLINE csubstr to_csubstr(substr s) noexcept { return s; }
2187 /** neutral version for use in generic code */
2188 C4_ALWAYS_INLINE csubstr to_csubstr(csubstr s) noexcept { return s; }
2189 
2190 
2191 template<size_t N>
2192 C4_ALWAYS_INLINE substr
2193 to_substr(char (&s)[N]) noexcept { substr ss(s, N-1); return ss; }
2194 template<size_t N>
2195 C4_ALWAYS_INLINE csubstr
2196 to_csubstr(const char (&s)[N]) noexcept { csubstr ss(s, N-1); return ss; }
2197 
2198 
2199 /** @note this overload uses SFINAE to prevent it from overriding the array overload
2200  * @see For a more detailed explanation on why the plain overloads cannot
2201  * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
2202 template<class U>
2203 C4_ALWAYS_INLINE typename std::enable_if<std::is_same<U, char*>::value, substr>::type
2204 to_substr(U s) noexcept { substr ss(s); return ss; }
2205 /** @note this overload uses SFINAE to prevent it from overriding the array overload
2206  * @see For a more detailed explanation on why the plain overloads cannot
2207  * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
2208 template<class U>
2209 C4_ALWAYS_INLINE typename std::enable_if<std::is_same<U, const char*>::value || std::is_same<U, char*>::value, csubstr>::type
2210 to_csubstr(U s) noexcept { csubstr ss(s); return ss; }
2211 
2212 
2213 /** @} */
2214 
2215 
2216 //-----------------------------------------------------------------------------
2217 //-----------------------------------------------------------------------------
2218 //-----------------------------------------------------------------------------
2219 
2220 /** @defgroup doc_substr_cmp substr comparison operators
2221  * @{ */
2222 
2223 template<typename C, size_t N> inline bool operator== (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) == 0; }
2224 template<typename C, size_t N> inline bool operator!= (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) != 0; }
2225 template<typename C, size_t N> inline bool operator< (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) > 0; }
2226 template<typename C, size_t N> inline bool operator> (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) < 0; }
2227 template<typename C, size_t N> inline bool operator<= (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) >= 0; }
2228 template<typename C, size_t N> inline bool operator>= (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) <= 0; }
2229 
2230 template<typename C> inline bool operator== (const char c, basic_substring<C> const that) noexcept { return that.compare(c) == 0; }
2231 template<typename C> inline bool operator!= (const char c, basic_substring<C> const that) noexcept { return that.compare(c) != 0; }
2232 template<typename C> inline bool operator< (const char c, basic_substring<C> const that) noexcept { return that.compare(c) > 0; }
2233 template<typename C> inline bool operator> (const char c, basic_substring<C> const that) noexcept { return that.compare(c) < 0; }
2234 template<typename C> inline bool operator<= (const char c, basic_substring<C> const that) noexcept { return that.compare(c) >= 0; }
2235 template<typename C> inline bool operator>= (const char c, basic_substring<C> const that) noexcept { return that.compare(c) <= 0; }
2236 
2237 /** @} */
2238 
2239 
2240 //-----------------------------------------------------------------------------
2241 //-----------------------------------------------------------------------------
2242 //-----------------------------------------------------------------------------
2243 
2244 /* C4_SUBSTR_NO_OSTREAM_LSHIFT doctest does not deal well with
2245  * template operator<<
2246  * @see https://github.com/onqtam/doctest/pull/431 */
2247 #ifndef C4_SUBSTR_NO_OSTREAM_LSHIFT
2248 #ifdef __clang__
2249 # pragma clang diagnostic push
2250 # pragma clang diagnostic ignored "-Wsign-conversion"
2251 #elif defined(__GNUC__)
2252 # pragma GCC diagnostic push
2253 # pragma GCC diagnostic ignored "-Wsign-conversion"
2254 #endif
2255 
2256 /** output the string to a stream */
2257 template<class OStream, class C>
2258 inline OStream& operator<< (OStream& os, basic_substring<C> s)
2259 {
2260  os.write(s.str, s.len);
2261  return os;
2262 }
2263 
2264 // this causes ambiguity
2265 ///** this is used by google test */
2266 //template<class OStream, class C>
2267 //inline void PrintTo(basic_substring<C> s, OStream* os)
2268 //{
2269 // os->write(s.str, s.len);
2270 //}
2271 
2272 #ifdef __clang__
2273 # pragma clang diagnostic pop
2274 #elif defined(__GNUC__)
2275 # pragma GCC diagnostic pop
2276 #endif
2277 #endif // !C4_SUBSTR_NO_OSTREAM_LSHIFT
2278 
2279 /** @} */
2280 
2281 } // namespace c4
2282 
2283 
2284 #ifdef __clang__
2285 # pragma clang diagnostic pop
2286 #elif defined(__GNUC__)
2287 # pragma GCC diagnostic pop
2288 #endif
2289 
2290 #endif /* _C4_SUBSTR_HPP_ */
left_< T > left(T val, size_t width, char padchar=' ')
mark an argument to be aligned left
Definition: format.hpp:525
right_< T > right(T val, size_t width, char padchar=' ')
mark an argument to be aligned right
Definition: format.hpp:532
csubstr to_csubstr(substr s) noexcept
neutral version for use in generic code
Definition: substr.hpp:2186
substr to_substr(substr s) noexcept
neutral version for use in generic code
Definition: substr.hpp:2184
bool operator!=(const char(&s)[N], basic_substring< C > const that) noexcept
Definition: substr.hpp:2224
bool operator>(const char(&s)[N], basic_substring< C > const that) noexcept
Definition: substr.hpp:2226
bool operator>=(const char(&s)[N], basic_substring< C > const that) noexcept
Definition: substr.hpp:2228
bool operator<=(const char(&s)[N], basic_substring< C > const that) noexcept
Definition: substr.hpp:2227
bool operator==(const char(&s)[N], basic_substring< C > const that) noexcept
Definition: substr.hpp:2223
bool operator<(const char(&s)[N], basic_substring< C > const that) noexcept
Definition: substr.hpp:2225
OStream & operator<<(OStream &os, basic_substring< C > s)
output the string to a stream
Definition: substr.hpp:2258
@ npos
a null string position
Definition: common.hpp:267
@ NONE
an index to none
Definition: common.hpp:260
Definition: common.cpp:12
split_iterator_impl(split_proxy_impl const *proxy, size_t pos, C sep)
Definition: substr.hpp:1702
a non-owning string-view, consisting of a character pointer and a length.
Definition: substr.hpp:73
basic_substring _first_real_span_hex(size_t pos) const noexcept
Definition: substr.hpp:1378
void reverse()
reverse in place
Definition: substr.hpp:2026
basic_substring first_uint_span() const
get the first span which can be interpreted as an unsigned integer
Definition: substr.hpp:1126
first_of_any_result first_of_any_iter(It first_span, It last_span) const
Definition: substr.hpp:636
int compare(ro_substr const that) const noexcept
Definition: substr.hpp:261
basic_substring(U s_) noexcept
Construct from a C-string (zero-terminated string)
Definition: substr.hpp:152
size_t first_not_of(ro_substr chars) const
Definition: substr.hpp:886
basic_substring gpop_right(C sep=C('/'), bool skip_empty=false) const
greedy pop right.
Definition: substr.hpp:1904
basic_substring trim(const C c) const
trim the character c left and right
Definition: substr.hpp:494
C const & front() const noexcept
Definition: substr.hpp:213
size_t count(const C c, size_t pos=0) const
count the number of occurrences of c
Definition: substr.hpp:561
basic_substring _first_real_span_oct(size_t pos) const noexcept
Definition: substr.hpp:1558
bool begins_with(const C c) const
true if the first character of the string is c
Definition: substr.hpp:667
first_of_any_result first_of_any(ro_substr s0, ro_substr s1) const
Definition: substr.hpp:611
basic_substring sub(size_t first, size_t num) const noexcept
return [first,first+num[.
Definition: substr.hpp:326
basic_substring pair_range(CC open, CC close) const
get the range delimited by an open-close pair of characters.
Definition: substr.hpp:984
basic_substring pop_right(C sep=C('/'), bool skip_empty=false) const
pop right: return the first split from the right.
Definition: substr.hpp:1774
basic_substring range(size_t first, size_t last=npos) const noexcept
return [first,last[.
Definition: substr.hpp:336
int compare(C const c) const noexcept
Definition: substr.hpp:225
C const & back() const noexcept
Definition: substr.hpp:216
size_t first_not_of(const C c) const
Definition: substr.hpp:842
const_iterator begin() const noexcept
Definition: substr.hpp:203
size_t last_not_of(const C c, size_t start) const
Definition: substr.hpp:873
basic_substring left_of(ro_substr const subs) const noexcept
given subs a substring of the current string, get the portion of the current string to the left of it
Definition: substr.hpp:412
basic_substring triml(const C c) const
trim left
Definition: substr.hpp:446
bool ends_with(const C c) const
true if the last character of the string is c
Definition: substr.hpp:724
size_t last_of(const C c, size_t start=npos) const
Definition: substr.hpp:795
bool is_integer() const
Definition: substr.hpp:1091
void tolower()
convert the string to lower-case
Definition: substr.hpp:1985
basic_substring trimr(ro_substr chars) const
trim right ANY of the characters
Definition: substr.hpp:482
basic_substring(basic_substring const &) noexcept=default
basic_substring _first_real_span_bin(size_t pos) const noexcept
Definition: substr.hpp:1468
void toupper()
convert the string to upper-case
Definition: substr.hpp:1975
first_of_any_result first_of_any(ro_substr s0, ro_substr s1, ro_substr s2, ro_substr s3, ro_substr s4) const
Definition: substr.hpp:629
basic_substring offs(size_t left, size_t right) const noexcept
offset from the ends: return [left,len-right[ ; ie, trim a number of characters from the left and rig...
Definition: substr.hpp:364
basic_substring first_real_span() const
get the first span which can be interpreted as a real (floating-point) number
Definition: substr.hpp:1213
basic_substring unquoted() const
Definition: substr.hpp:1043
C & front() noexcept
Definition: substr.hpp:212
split_proxy_impl split_proxy
Definition: substr.hpp:1758
basic_substring first_int_span() const
get the first span which can be interpreted as a signed integer
Definition: substr.hpp:1138
basic_substring select(ro_substr pattern, size_t pos=0) const
get the substr consisting of the first occurrence of pattern after pos, or an empty substr if none oc...
Definition: substr.hpp:596
basic_substring pair_range_esc(CC open_close, CC escape=CC('\\'))
get the range delimited by a single open-close character (eg, quotes).
Definition: substr.hpp:999
size_t replace_all(rw_substr dst, ro_substr pattern, ro_substr repl, size_t pos=0) const
replace pattern with repl, and write the result into dst.
Definition: substr.hpp:2121
size_t count(ro_substr c, size_t pos=0) const
count the number of occurrences of s
Definition: substr.hpp:575
basic_substring name_wo_extshort() const
Definition: substr.hpp:1946
split_proxy split(C sep, size_t start_pos=0) const
a view into the splits
Definition: substr.hpp:1761
basic_substring name_wo_extlong() const
Definition: substr.hpp:1951
basic_substring erase(ro_substr sub)
erase a part of the string.
Definition: substr.hpp:2075
C & back() noexcept
Definition: substr.hpp:215
size_t last_not_of(ro_substr chars) const
Definition: substr.hpp:929
bool is_real() const
Definition: substr.hpp:1080
constexpr basic_substring() noexcept
Definition: substr.hpp:114
basic_substring triml(ro_substr chars) const
trim left ANY of the characters.
Definition: substr.hpp:458
size_t len
the length of the substring
Definition: substr.hpp:79
basic_substring left_of(size_t pos, bool include_pos) const noexcept
return [0, pos+include_pos[ .
Definition: substr.hpp:382
size_t last_not_of(ro_substr chars, size_t start) const
Definition: substr.hpp:950
basic_substring last(size_t num) const noexcept
return the last num elements: [len-num,len[
Definition: substr.hpp:353
size_t first_of(const C c, size_t start=0) const
Definition: substr.hpp:783
basic_substring stripl(ro_substr pattern) const
remove a pattern from the left
Definition: substr.hpp:507
bool ends_with(ro_substr pattern) const
true if the string ends with the given pattern
Definition: substr.hpp:747
basic_substring right_of(size_t pos, bool include_pos) const noexcept
return [pos+!include_pos, len[
Definition: substr.hpp:400
size_t find(const C c, size_t start_pos=0) const
Definition: substr.hpp:530
basic_substring trim(ro_substr const chars) const
trim left and right ANY of the characters
Definition: substr.hpp:500
const_iterator end() const noexcept
Definition: substr.hpp:204
typename std::add_const< C >::type CC
CC=const char.
Definition: substr.hpp:86
basic_substring(C *s_, size_t len_) noexcept
Construct from a pointer and length.
Definition: substr.hpp:140
basic_substring erase_range(size_t first, size_t last)
Definition: substr.hpp:2066
basic_substring extshort() const
Definition: substr.hpp:1956
void assign(U s_) noexcept
Assign from a C-string (zero-terminated string)
Definition: substr.hpp:173
basic_substring stripr(ro_substr pattern) const
remove a pattern from the right
Definition: substr.hpp:516
size_t first_of(ro_substr chars, size_t start=0) const
Definition: substr.hpp:809
size_t find(ro_substr pattern, size_t start_pos=0) const
Definition: substr.hpp:534
void assign(C *s_, size_t len_) noexcept
Assign from a pointer and length.
Definition: substr.hpp:161
iterator begin() noexcept
Definition: substr.hpp:200
void assign(C(&s_)[N]) noexcept
Assign from an array.
Definition: substr.hpp:158
bool ends_with_any(ro_substr chars) const
true if the last character of the string is any of the given chars
Definition: substr.hpp:764
void fill(C val)
fill the entire contents with the given val
Definition: substr.hpp:1997
size_t size() const noexcept
Definition: substr.hpp:198
basic_substring basename(C sep=C('/')) const
Definition: substr.hpp:1932
basic_substring(C *beg_, C *end_) noexcept
Construct from two pointers.
Definition: substr.hpp:144
bool is_unsigned_integer() const
Definition: substr.hpp:1104
bool overlaps(ro_substr const that) const noexcept
true if there is overlap of at least one element between that and *this
Definition: substr.hpp:310
basic_substring _first_integral_span(size_t skip_start) const
Definition: substr.hpp:1147
iterator end() noexcept
Definition: substr.hpp:201
bool not_empty() const noexcept
Definition: substr.hpp:197
basic_substring first(size_t num) const noexcept
return the first num elements: [0,num[
Definition: substr.hpp:346
basic_substring left_of(size_t pos) const noexcept
return [0, pos[ .
Definition: substr.hpp:373
void reverse_range(size_t ifirst, size_t ilast)
revert a range in place
Definition: substr.hpp:2044
basic_substring pair_range_nested(CC open, CC close) const
get the range delimited by an open-close pair of characters, with possibly nested occurrences.
Definition: substr.hpp:1020
C const * data() const noexcept
Definition: substr.hpp:207
size_t replace(C value, C repl, size_t pos=0)
replace every occurrence of character value with the character repl
Definition: substr.hpp:2087
bool has_str() const noexcept
Definition: substr.hpp:195
bool begins_with(ro_substr pattern) const
true if the string begins with the given pattern
Definition: substr.hpp:690
bool is_number() const
Definition: substr.hpp:1065
basic_substring sub(size_t first) const noexcept
return [first,len[
Definition: substr.hpp:319
basic_substring _first_real_span_dec(size_t pos) const noexcept
Definition: substr.hpp:1291
first_of_any_result first_of_any(ro_substr s0, ro_substr s1, ro_substr s2) const
Definition: substr.hpp:617
bool begins_with_any(ro_substr chars) const
true if the first character of the string is any of the given chars
Definition: substr.hpp:707
bool next_split(C sep, size_t *start_pos, basic_substring *out) const
returns true if the string has not been exhausted yet, meaning it's ok to call next_split() again.
Definition: substr.hpp:1658
basic_substring(basic_substring &&) noexcept=default
bool empty() const noexcept
Definition: substr.hpp:196
basic_substring right_of(ro_substr const subs) const noexcept
given subs a substring of the current string, get the portion of the current string to the right of i...
Definition: substr.hpp:426
size_t first_not_of(const C c, size_t start) const
Definition: substr.hpp:852
void assign(C *beg_, C *end_) noexcept
Assign from two pointers.
Definition: substr.hpp:165
basic_substring trimr(const C c) const
trim the character c from the right
Definition: substr.hpp:470
first_of_any_result first_of_any(ro_substr s0, ro_substr s1, ro_substr s2, ro_substr s3) const
Definition: substr.hpp:623
int compare(const char *that, size_t sz) const noexcept
Definition: substr.hpp:234
constexpr basic_substring(C(&s_)[N]) noexcept
Construct from an array.
Definition: substr.hpp:137
basic_substring dirname(C sep=C('/')) const
Definition: substr.hpp:1939
size_t replace(ro_substr chars, C repl, size_t pos=0)
replace every occurrence of each character in value with the character repl.
Definition: substr.hpp:2103
bool is_super(ro_substr const that) const noexcept
true if that is a substring of *this (ie, from the same buffer)
Definition: substr.hpp:301
size_t last_not_of(const C c) const
Definition: substr.hpp:863
bool ends_with(const C c, size_t num) const
true if the last num characters of the string are c
Definition: substr.hpp:730
bool begins_with(const C c, size_t num) const
true if the first num characters of the string are c
Definition: substr.hpp:673
static constexpr C4_CONST bool _is_hex_char(char c) noexcept
true if the character is in [0-9a-fA-F]
Definition: substr.hpp:1276
basic_substring erase(size_t pos, size_t num)
erase part of the string.
Definition: substr.hpp:2057
C * data() noexcept
Definition: substr.hpp:206
basic_substring _word_follows(size_t pos, csubstr word) const noexcept
Definition: substr.hpp:1281
void reverse_sub(size_t ifirst, size_t num)
revert a subpart in place
Definition: substr.hpp:2034
basic_substring gpop_left(C sep=C('/'), bool skip_empty=false) const
greedy pop left.
Definition: substr.hpp:1882
size_t first_not_of(ro_substr chars, size_t start) const
Definition: substr.hpp:907
void clear() noexcept
Definition: substr.hpp:124
size_t last_of(ro_substr chars, size_t start=npos) const
Definition: substr.hpp:824
void copy_from(ro_substr that, size_t ifirst=0, size_t num=npos)
set the current substring to a copy of the given csubstr
Definition: substr.hpp:2009
typename std::remove_const< C >::type NCC_
NCC_=non const char.
Definition: substr.hpp:87
basic_substring first_non_empty_span() const
get the first span consisting exclusively of non-empty characters
Definition: substr.hpp:1114
C * str
a restricted pointer to the first character of the substring
Definition: substr.hpp:77
basic_substring right_of(size_t pos) const noexcept
return [pos+1, len[
Definition: substr.hpp:391
basic_substring pop_left(C sep=C('/'), bool skip_empty=false) const
return the first split from the left.
Definition: substr.hpp:1827
static constexpr C4_CONST bool _is_delim_char(char c) noexcept
true if the character is a delimiter character at the end
Definition: substr.hpp:1268
bool is_sub(ro_substr const that) const noexcept
true if *this is a substring of that (ie, from the same buffer)
Definition: substr.hpp:295
basic_substring select(const C c, size_t pos=0) const
get the substr consisting of the first occurrence of c after pos, or an empty substr if none occurs
Definition: substr.hpp:589
basic_substring extlong() const
Definition: substr.hpp:1961
#define _c4append(first, last)