Sane C++ Libraries
C++ Platform Abstraction Libraries
Loading...
Searching...
No Matches
StringIterator.h
1// Copyright (c) Stefano Cristiano
2// SPDX-License-Identifier: MIT
3#pragma once
4#include "../Foundation/Assert.h" //Assert::unreachable
5#include "../Foundation/Span.h"
6#include "../Foundation/StringSpan.h"
7#include "StringsExport.h"
8
9namespace SC
10{
13
16
21constexpr bool StringEncodingAreBinaryCompatible(StringEncoding encoding1, StringEncoding encoding2)
22{
23 return (encoding1 == encoding2) or (encoding2 == StringEncoding::Ascii and encoding1 == StringEncoding::Utf8) or
24 (encoding2 == StringEncoding::Utf8 and encoding1 == StringEncoding::Ascii);
25}
26
35template <typename CharIterator>
36struct SC_STRINGS_EXPORT StringIterator
37{
38 static constexpr StringEncoding getEncoding() { return CharIterator::getEncoding(); }
39
40 using CodeUnit = char;
41 using CodePoint = StringCodePoint;
42
44 constexpr void setToStart() { it = start; }
45
47 constexpr void setToEnd() { it = end; }
48
51 [[nodiscard]] constexpr bool isAtEnd() const { return it >= end; }
52
55 [[nodiscard]] constexpr bool isAtStart() const { return it <= start; }
56
60 [[nodiscard]] constexpr bool advanceUntilMatches(CodePoint c);
61
65 [[nodiscard]] bool reverseAdvanceUntilMatches(CodePoint c);
66
71 template <typename OtherIterator>
73 {
74 return advanceBeforeOrAfterFinding<OtherIterator, true>(other);
75 }
76
77 [[nodiscard]] bool advanceAfterFinding(StringIterator other) { return advanceAfterFindingSameIterator(other); }
78
83 template <typename OtherIterator>
85 {
86 return advanceBeforeOrAfterFinding<OtherIterator, false>(other);
87 }
88
89 [[nodiscard]] bool advanceBeforeFinding(StringIterator other) { return advanceBeforeFindingSameIterator(other); }
90
94 [[nodiscard]] bool advanceByLengthOf(StringIterator other) { return advanceOfBytes(other.end - other.it); }
95
100 [[nodiscard]] bool advanceUntilMatchesAny(Span<const CodePoint> items, CodePoint& matched);
101
106 [[nodiscard]] bool reverseAdvanceUntilMatchesAny(Span<const CodePoint> items, CodePoint& matched);
107
112 [[nodiscard]] bool advanceUntilDifferentFrom(CodePoint c, CodePoint* optionalReadChar = nullptr);
113
117 [[nodiscard]] constexpr bool advanceIfMatches(CodePoint c);
118
122 [[nodiscard]] bool advanceBackwardIfMatches(CodePoint c);
123
128
133 [[nodiscard]] bool advanceIfMatchesRange(CodePoint first, CodePoint last);
134
139 [[nodiscard]] bool match(CodePoint c) { return it < end and CharIterator::decode(it, end) == c; }
140
144 [[nodiscard]] constexpr bool advanceRead(CodePoint& c);
145
149 [[nodiscard]] bool read(CodePoint& c);
150
154 [[nodiscard]] bool advanceBackwardRead(CodePoint& c);
155
158 [[nodiscard]] constexpr bool stepForward();
159
162 [[nodiscard]] constexpr bool stepBackward();
163
167 [[nodiscard]] constexpr bool advanceCodePoints(size_t numCodePoints);
168
172 [[nodiscard]] bool reverseAdvanceCodePoints(size_t numCodePoints);
173
177 [[nodiscard]] constexpr bool isFollowedBy(CodePoint c);
178
182 [[nodiscard]] constexpr bool isPrecededBy(CodePoint c);
183
187 [[nodiscard]] constexpr StringIterator sliceFromStartUntil(StringIterator otherPoint) const;
188
192 [[nodiscard]] constexpr ssize_t bytesDistanceFrom(StringIterator other) const;
193
197 [[nodiscard]] bool endsWithAnyOf(Span<const CodePoint> codePoints) const;
198
202 [[nodiscard]] bool startsWithAnyOf(Span<const CodePoint> codePoints) const;
203
207 template <typename IteratorType>
208 [[nodiscard]] bool endsWith(IteratorType other) const;
209
213 template <typename IteratorType>
214 [[nodiscard]] bool startsWith(IteratorType other) const;
215
216 protected:
217 template <typename OtherIterator, bool after>
218 [[nodiscard]] bool advanceBeforeOrAfterFinding(StringIterator<OtherIterator> other);
219 [[nodiscard]] bool advanceAfterFindingSameIterator(StringIterator other);
220 [[nodiscard]] bool advanceBeforeFindingSameIterator(StringIterator other);
221 [[nodiscard]] bool advanceOfBytes(ssize_t bytesLength);
222
223 friend struct StringView;
224 static constexpr const CodeUnit* getNextOf(const CodeUnit* src, const char* end)
225 {
226 return CharIterator::getNextOf(src, end);
227 }
228 static constexpr const CodeUnit* getPreviousOf(const CodeUnit* src, const char* start)
229 {
230 return CharIterator::getPreviousOf(src, start);
231 }
232 constexpr StringIterator(const CodeUnit* it, const CodeUnit* end) : it(it), start(it), end(end) {}
233 const CodeUnit* it;
234 const CodeUnit* start;
235 const CodeUnit* end;
236};
237
239struct SC_STRINGS_EXPORT StringIteratorASCII : public StringIterator<StringIteratorASCII>
240{
241 [[nodiscard]] constexpr bool advanceUntilMatches(CodePoint c);
242
243 private:
244 [[nodiscard]] bool advanceUntilMatchesNonConstexpr(CodePoint c);
245 using StringIterator::StringIterator;
246 constexpr StringIteratorASCII(const CodeUnit* it, const CodeUnit* end) : StringIterator(it, end) {}
248 friend Parent;
249 friend struct StringView;
250
251 [[nodiscard]] static constexpr StringEncoding getEncoding() { return StringEncoding::Ascii; }
252
253 [[nodiscard]] static constexpr const char* getNextOf(const char* src, const char*) { return src + 1; }
254 [[nodiscard]] static constexpr const char* getPreviousOf(const char* src, const char*) { return src - 1; }
255 [[nodiscard]] static constexpr CodePoint decode(const char* src, const char*)
256 {
257 return static_cast<CodePoint>(*src);
258 }
259};
260
262struct SC_STRINGS_EXPORT StringIteratorUTF16 : public StringIterator<StringIteratorUTF16>
263{
264 private:
265 using StringIterator::StringIterator;
266 constexpr StringIteratorUTF16(const CodeUnit* it, const CodeUnit* end) : StringIterator(it, end) {}
268 friend Parent;
269 friend struct StringView;
270
271 [[nodiscard]] static StringEncoding getEncoding() { return StringEncoding::Utf16; }
272
273 [[nodiscard]] static const char* getNextOf(const char* bytes, const char* end);
274 [[nodiscard]] static const char* getPreviousOf(const char* bytes, const char* start);
275
276 [[nodiscard]] static uint32_t decode(const char* bytes, const char* end);
277};
278
280struct SC_STRINGS_EXPORT StringIteratorUTF8 : public StringIterator<StringIteratorUTF8>
281{
282 private:
284 friend Parent;
285 friend struct StringView;
286 using StringIterator::StringIterator;
287 constexpr StringIteratorUTF8(const CodeUnit* it, const CodeUnit* end) : StringIterator(it, end) {}
288
289 [[nodiscard]] static StringEncoding getEncoding() { return StringEncoding::Utf8; }
290
291 [[nodiscard]] static const char* getNextOf(const char* src, const char* end);
292 [[nodiscard]] static const char* getPreviousOf(const char* src, const char* start);
293
294 [[nodiscard]] static uint32_t decode(const char* src, const char* end);
295};
296
299{
300 bool matches[256] = {false};
302 {
303 for (auto c : chars)
304 {
305 matches[static_cast<int>(c)] = true;
306 }
307 }
308};
310
311//-----------------------------------------------------------------------------------------------------------------------
312// Implementations Details
313//-----------------------------------------------------------------------------------------------------------------------
314template <typename CharIterator>
316{
317 while (it < end)
318 {
319 if (CharIterator::decode(it, end) == c)
320 return true;
321 it = getNextOf(it, end);
322 }
323 return false;
324}
325
326template <typename CharIterator>
328{
329 if (it < end and CharIterator::decode(it, end) == c)
330 {
331 it = getNextOf(it, end);
332 return true;
333 }
334 return false;
335}
336
337template <typename CharIterator>
339{
340 if (it < end)
341 {
342 c = CharIterator::decode(it, end);
343 it = getNextOf(it, end);
344 return true;
345 }
346 return false;
347}
348
349template <typename CharIterator>
351{
352 if (it < end)
353 {
354 it = getNextOf(it, end);
355 return true;
356 }
357 return false;
358}
359
360template <typename CharIterator>
362{
363 if (it > start)
364 {
365 it = getPreviousOf(it, start);
366 return true;
367 }
368 return false;
369}
370
371template <typename CharIterator>
372constexpr bool StringIterator<CharIterator>::advanceCodePoints(size_t numCodePoints)
373{
374 while (numCodePoints > 0)
375 {
376 numCodePoints -= 1;
377 if (it >= end)
378 {
379 return false;
380 }
381 it = getNextOf(it, end);
382 }
383 return true;
384}
385
386template <typename CharIterator>
388{
389 return it < end ? CharIterator::decode(getNextOf(it, end), end) == c : false;
390}
391
392template <typename CharIterator>
394{
395 return it > start ? CharIterator::decode(getPreviousOf(it, start), it) == c : false;
396}
397
398template <typename CharIterator>
400 StringIterator otherPoint) const
401{
402 SC_ASSERT_RELEASE(it <= otherPoint.it);
403 return StringIterator(it, otherPoint.it);
404}
405
406template <typename CharIterator>
408{
409 return (it - other.it) * static_cast<ssize_t>(sizeof(CodeUnit));
410}
411
412// StringIteratorASCII
413[[nodiscard]] constexpr bool StringIteratorASCII::advanceUntilMatches(CodePoint c)
414{
415#if defined(__clang__)
416#pragma clang diagnostic push
417#pragma clang diagnostic ignored "-Wunreachable-code"
418#endif
419 return __builtin_is_constant_evaluated() ? StringIterator::advanceUntilMatches(c)
420 : advanceUntilMatchesNonConstexpr(c);
421#if defined(__clang__)
422#pragma clang diagnostic pop
423#endif
424}
425
426} // namespace SC
#define SC_ASSERT_RELEASE(e)
Assert expression e to be true.
Definition Assert.h:48
decltype(static_cast< char * >(nullptr) - static_cast< char * >(nullptr)) ssize_t
Platform independent signed size type.
Definition PrimitiveTypes.h:46
unsigned int uint32_t
Platform independent (4) bytes unsigned int.
Definition PrimitiveTypes.h:29
constexpr bool StringEncodingAreBinaryCompatible(StringEncoding encoding1, StringEncoding encoding2)
Checks if two encodings have the same utf unit size.
Definition StringIterator.h:21
uint32_t StringCodePoint
UTF code point (32 bit)
Definition StringIterator.h:15
View over a contiguous sequence of items (pointer + size in elements).
Definition Span.h:29
A string iterator for ASCII strings.
Definition StringIterator.h:240
Builds a constexpr bool skip table of 256 entries used in some parsers.
Definition StringIterator.h:299
A string iterator for UTF16 strings.
Definition StringIterator.h:263
A string iterator for UTF8 strings.
Definition StringIterator.h:281
A position inside a fixed range [start, end) of UTF code points.
Definition StringIterator.h:37
constexpr void setToStart()
Rewind current position to start of iterator range.
Definition StringIterator.h:44
constexpr ssize_t bytesDistanceFrom(StringIterator other) const
Get distance in bytes from current position to another StringIterator current position.
Definition StringIterator.h:407
constexpr void setToEnd()
Set current position to end of iterator range.
Definition StringIterator.h:47
bool advanceUntilMatchesAny(Span< const CodePoint > items, CodePoint &matched)
Advances position until any CodePoint in the given Span is found.
bool advanceBackwardIfMatches(CodePoint c)
Move position by one code point towards start if previous code point matches c
constexpr bool advanceIfMatches(CodePoint c)
Advance position only if next code point matches c.
Definition StringIterator.h:327
bool advanceBeforeFinding(StringIterator< OtherIterator > other)
Advances position towards end until a matching range of character equal to other[it,...
Definition StringIterator.h:84
constexpr bool isAtStart() const
Check if current position is at start of iterator range.
Definition StringIterator.h:55
bool reverseAdvanceUntilMatches(CodePoint c)
Moves position towards start until CodePoint c is found or position == end
bool advanceAfterFinding(StringIterator< OtherIterator > other)
Advances position towards end until a matching range of character equal to other[it,...
Definition StringIterator.h:72
bool advanceUntilDifferentFrom(CodePoint c, CodePoint *optionalReadChar=nullptr)
Advances position until a code point different from c is found or end is reached.
bool startsWith(IteratorType other) const
Check if this Iterator at its start matches entirely another Iterator's range.
bool reverseAdvanceCodePoints(size_t numCodePoints)
Move position backwards (towards start) by variable number of code pints.
constexpr bool stepBackward()
Move position to previous code point.
Definition StringIterator.h:361
constexpr bool advanceCodePoints(size_t numCodePoints)
Move position forward (towards end) by variable number of code points.
Definition StringIterator.h:372
bool reverseAdvanceUntilMatchesAny(Span< const CodePoint > items, CodePoint &matched)
Moves position towards start until any CodePoint in the given Span is found.
constexpr bool stepForward()
Move position to next code point.
Definition StringIterator.h:350
bool endsWithAnyOf(Span< const CodePoint > codePoints) const
Check if this Iterator ends with any code point in the given span.
bool advanceIfMatchesAny(Span< const CodePoint > items)
Advance position only if any of the code points in given Span is matched.
constexpr bool isFollowedBy(CodePoint c)
Check if next code point is c
Definition StringIterator.h:387
bool advanceBackwardRead(CodePoint &c)
Move to previous position and read code unit.
bool startsWithAnyOf(Span< const CodePoint > codePoints) const
Check if this Iterator starts with any code point in the given span.
constexpr StringIterator sliceFromStartUntil(StringIterator otherPoint) const
Returns another StringIterator range, starting from start to otherPoint position.
Definition StringIterator.h:399
constexpr bool isPrecededBy(CodePoint c)
Check if previous code point is c
Definition StringIterator.h:393
constexpr bool advanceUntilMatches(CodePoint c)
Advances position towards end until it matches CodePoint c or position == end
Definition StringIterator.h:315
bool read(CodePoint &c)
Read code unit at current position.
constexpr bool advanceRead(CodePoint &c)
Decode code unit at current position and advance.
Definition StringIterator.h:338
bool match(CodePoint c)
Check if code unit at current position matches CodePoint c
Definition StringIterator.h:139
constexpr bool isAtEnd() const
Check if current position is at end of iterator range.
Definition StringIterator.h:51
bool advanceIfMatchesRange(CodePoint first, CodePoint last)
Advance position if any code point in the range [first, last] is matched.
bool endsWith(IteratorType other) const
Check if this Iterator at its end matches entirely another Iterator's range.
bool advanceByLengthOf(StringIterator other)
Advances position by the same number of code points as other.
Definition StringIterator.h:94
Non-owning view over a range of characters with UTF Encoding.
Definition StringView.h:47