Sane C++ Libraries
C++ Platform Abstraction Libraries
StringIterator.h
1// Copyright (c) Stefano Cristiano
2// SPDX-License-Identifier: MIT
3#pragma once
4#include "../Foundation/Assert.h" //Assert::unreachable
5#include "../Foundation/Span.h"
6
7namespace SC
8{
11
14
17{
18 Ascii = 0,
19 Utf8 = 1,
20 Utf16 = 2,
21#if SC_PLATFORM_WINDOWS
22 Native = Utf16,
23 Wide = Utf16
24#else
25 Native = Utf8
26#endif
27};
28
34{
35 return (encoding1 == encoding2) or (encoding2 == StringEncoding::Ascii and encoding1 == StringEncoding::Utf8) or
36 (encoding2 == StringEncoding::Utf8 and encoding1 == StringEncoding::Ascii);
37}
38
43{
44 switch (encoding)
45 {
46 case StringEncoding::Utf16: return 2;
47 case StringEncoding::Ascii: return 1;
48 case StringEncoding::Utf8: return 1;
49 }
50 Assert::unreachable();
51}
52
61template <typename CharIterator>
63{
64 static constexpr StringEncoding getEncoding() { return CharIterator::getEncoding(); }
65
66 using CodeUnit = char;
67 using CodePoint = StringCodePoint;
68
70 constexpr void setToStart() { it = start; }
71
73 constexpr void setToEnd() { it = end; }
74
77 [[nodiscard]] constexpr bool isAtEnd() const { return it >= end; }
78
81 [[nodiscard]] constexpr bool isAtStart() const { return it <= start; }
82
86 [[nodiscard]] constexpr bool advanceUntilMatches(CodePoint c);
87
91 [[nodiscard]] bool reverseAdvanceUntilMatches(CodePoint c);
92
97 [[nodiscard]] bool advanceAfterFinding(StringIterator other);
98
103 [[nodiscard]] bool advanceBeforeFinding(StringIterator other);
104
108 [[nodiscard]] bool advanceByLengthOf(StringIterator other) { return advanceOfBytes(other.end - other.it); }
109
114 [[nodiscard]] bool advanceUntilMatchesAny(Span<const CodePoint> items, CodePoint& matched);
115
120 [[nodiscard]] bool reverseAdvanceUntilMatchesAny(Span<const CodePoint> items, CodePoint& matched);
121
126 [[nodiscard]] bool advanceUntilDifferentFrom(CodePoint c, CodePoint* optionalReadChar = nullptr);
127
131 [[nodiscard]] constexpr bool advanceIfMatches(CodePoint c);
132
136 [[nodiscard]] bool advanceBackwardIfMatches(CodePoint c);
137
142
147 [[nodiscard]] bool advanceIfMatchesRange(CodePoint first, CodePoint last);
148
153 [[nodiscard]] bool match(CodePoint c) { return it < end and CharIterator::decode(it) == c; }
154
158 [[nodiscard]] constexpr bool advanceRead(CodePoint& c);
159
163 [[nodiscard]] bool read(CodePoint& c);
164
168 [[nodiscard]] bool advanceBackwardRead(CodePoint& c);
169
172 [[nodiscard]] constexpr bool stepForward();
173
176 [[nodiscard]] constexpr bool stepBackward();
177
181 [[nodiscard]] constexpr bool advanceCodePoints(size_t numCodePoints);
182
186 [[nodiscard]] bool reverseAdvanceCodePoints(size_t numCodePoints);
187
191 [[nodiscard]] constexpr bool isFollowedBy(CodePoint c);
192
196 [[nodiscard]] constexpr bool isPrecededBy(CodePoint c);
197
201 [[nodiscard]] constexpr StringIterator sliceFromStartUntil(StringIterator otherPoint) const;
202
206 [[nodiscard]] constexpr ssize_t bytesDistanceFrom(StringIterator other) const;
207
211 [[nodiscard]] bool endsWithAnyOf(Span<const CodePoint> codePoints) const;
212
216 [[nodiscard]] bool startsWithAnyOf(Span<const CodePoint> codePoints) const;
217
221 template <typename IteratorType>
222 [[nodiscard]] bool endsWith(IteratorType other) const;
223
227 template <typename IteratorType>
228 [[nodiscard]] bool startsWith(IteratorType other) const;
229
230 protected:
231 [[nodiscard]] bool advanceOfBytes(ssize_t bytesLength);
232
233 friend struct StringView;
234 static constexpr const CodeUnit* getNextOf(const CodeUnit* src) { return CharIterator::getNextOf(src); }
235 static constexpr const CodeUnit* getPreviousOf(const CodeUnit* src) { return CharIterator::getPreviousOf(src); }
236 constexpr StringIterator(const CodeUnit* it, const CodeUnit* end) : it(it), start(it), end(end) {}
237 const CodeUnit* it;
238 const CodeUnit* start;
239 const CodeUnit* end;
240};
241
243struct SC_COMPILER_EXPORT StringIteratorASCII : public StringIterator<StringIteratorASCII>
244{
245 [[nodiscard]] constexpr bool advanceUntilMatches(CodePoint c);
246
247 private:
248 [[nodiscard]] bool advanceUntilMatchesNonConstexpr(CodePoint c);
249 using StringIterator::StringIterator;
250 constexpr StringIteratorASCII(const CodeUnit* it, const CodeUnit* end) : StringIterator(it, end) {}
252 friend Parent;
253 friend struct StringView;
254
255 [[nodiscard]] static constexpr StringEncoding getEncoding() { return StringEncoding::Ascii; }
256
257 [[nodiscard]] static constexpr const char* getNextOf(const char* src) { return src + 1; }
258 [[nodiscard]] static constexpr const char* getPreviousOf(const char* src) { return src - 1; }
259 [[nodiscard]] static constexpr CodePoint decode(const char* src) { return static_cast<CodePoint>(*src); }
260};
261
263struct SC_COMPILER_EXPORT StringIteratorUTF16 : public StringIterator<StringIteratorUTF16>
264{
265 private:
266 using StringIterator::StringIterator;
267 constexpr StringIteratorUTF16(const CodeUnit* it, const CodeUnit* end) : StringIterator(it, end) {}
269 friend Parent;
270 friend struct StringView;
271
272 [[nodiscard]] static StringEncoding getEncoding() { return StringEncoding::Utf16; }
273
274 [[nodiscard]] static const char* getNextOf(const char* bytes);
275
276 [[nodiscard]] static const char* getPreviousOf(const char* bytes);
277
278 [[nodiscard]] static uint32_t decode(const char* bytes);
279};
280
282struct SC_COMPILER_EXPORT StringIteratorUTF8 : public StringIterator<StringIteratorUTF8>
283{
284 private:
286 friend Parent;
287 friend struct StringView;
288 using StringIterator::StringIterator;
289 constexpr StringIteratorUTF8(const CodeUnit* it, const CodeUnit* end) : StringIterator(it, end) {}
290
291 [[nodiscard]] static StringEncoding getEncoding() { return StringEncoding::Utf8; }
292
293 [[nodiscard]] static const char* getNextOf(const char* src);
294
295 [[nodiscard]] static const char* getPreviousOf(const char* src);
296
297 [[nodiscard]] static uint32_t decode(const char* src);
298};
299
302{
303 bool matches[256] = {false};
305 {
306 for (auto c : chars)
307 {
308 matches[static_cast<int>(c)] = true;
309 }
310 }
311};
313
314//-----------------------------------------------------------------------------------------------------------------------
315// Implementations Details
316//-----------------------------------------------------------------------------------------------------------------------
317template <typename CharIterator>
319{
320 while (it < end)
321 {
322 if (CharIterator::decode(it) == c)
323 return true;
324 it = getNextOf(it);
325 }
326 return false;
327}
328
329template <typename CharIterator>
331{
332 if (it < end and CharIterator::decode(it) == c)
333 {
334 it = getNextOf(it);
335 return true;
336 }
337 return false;
338}
339
340template <typename CharIterator>
342{
343 if (it < end)
344 {
345 c = CharIterator::decode(it);
346 it = getNextOf(it);
347 return true;
348 }
349 return false;
350}
351
352template <typename CharIterator>
354{
355 if (it < end)
356 {
357 it = getNextOf(it);
358 return true;
359 }
360 return false;
361}
362
363template <typename CharIterator>
365{
366 if (it > start)
367 {
368 it = getPreviousOf(it);
369 return true;
370 }
371 return false;
372}
373
374template <typename CharIterator>
375constexpr bool StringIterator<CharIterator>::advanceCodePoints(size_t numCodePoints)
376{
377 while (numCodePoints > 0)
378 {
379 numCodePoints -= 1;
380 if (it >= end)
381 {
382 return false;
383 }
384 it = getNextOf(it);
385 }
386 return true;
387}
388
389template <typename CharIterator>
391{
392 return it < end ? CharIterator::decode(getNextOf(it)) == c : false;
393}
394
395template <typename CharIterator>
397{
398 return it > start ? CharIterator::decode(getPreviousOf(it)) == c : false;
399}
400
401template <typename CharIterator>
403 StringIterator otherPoint) const
404{
405 SC_ASSERT_RELEASE(it <= otherPoint.it);
406 return StringIterator(it, otherPoint.it);
407}
408
409template <typename CharIterator>
411{
412 return (it - other.it) * static_cast<ssize_t>(sizeof(CodeUnit));
413}
414
415// StringIteratorASCII
416[[nodiscard]] constexpr bool StringIteratorASCII::advanceUntilMatches(CodePoint c)
417{
418 return __builtin_is_constant_evaluated() ? StringIterator::advanceUntilMatches(c)
419 : advanceUntilMatchesNonConstexpr(c);
420}
421
422} // namespace SC
#define SC_COMPILER_EXPORT
Macro for symbol visibility in non-MSVC compilers.
Definition: Compiler.h:78
#define SC_ASSERT_RELEASE(e)
Assert expression e to be true.
Definition: Assert.h:66
unsigned char uint8_t
Platform independent (1) byte unsigned int.
Definition: PrimitiveTypes.h:36
unsigned int uint32_t
Platform independent (4) bytes unsigned int.
Definition: PrimitiveTypes.h:38
signed long ssize_t
Platform independent signed size type.
Definition: PrimitiveTypes.h:57
uint32_t StringCodePoint
UTF code point (32 bit)
Definition: StringIterator.h:13
constexpr bool StringEncodingAreBinaryCompatible(StringEncoding encoding1, StringEncoding encoding2)
Checks if two encodings have the same utf unit size.
Definition: StringIterator.h:33
StringEncoding
String Encoding (Ascii, Utf8, Utf16)
Definition: StringIterator.h:17
constexpr uint32_t StringEncodingGetSize(StringEncoding encoding)
Returns the number of bytes to represent an utf unit in the given encoding.
Definition: StringIterator.h:42
@ Ascii
Encoding is ASCII.
@ Utf8
Encoding is UTF8.
@ Native
Encoding is UTF8.
@ Utf16
Encoding is UTF16-LE.
View over a contiguous sequence of items (pointer + size in elements).
Definition: Span.h:32
A string iterator for ASCII strings.
Definition: StringIterator.h:244
A position inside a fixed range [start, end) of UTF code points.
Definition: StringIterator.h:63
constexpr void setToStart()
Rewind current position to start of iterator range.
Definition: StringIterator.h:70
constexpr ssize_t bytesDistanceFrom(StringIterator other) const
Get distance in bytes from current position to another StringIterator current position.
Definition: StringIterator.h:410
constexpr void setToEnd()
Set current position to end of iterator range.
Definition: StringIterator.h:73
bool advanceUntilMatchesAny(Span< const CodePoint > items, CodePoint &matched)
Advances position until any CodePoint in the given Span is found.
bool advanceBackwardIfMatches(CodePoint c)
Move position by one code point towards start if previous code point matches c
constexpr bool advanceIfMatches(CodePoint c)
Advance position only if next code point matches c.
Definition: StringIterator.h:330
constexpr bool isAtStart() const
Check if current position is at start of iterator range.
Definition: StringIterator.h:81
bool reverseAdvanceUntilMatches(CodePoint c)
Moves position towards start until CodePoint c is found or position == end
bool advanceBeforeFinding(StringIterator other)
Advances position towards end until a matching range of character equal to other[it,...
bool advanceUntilDifferentFrom(CodePoint c, CodePoint *optionalReadChar=nullptr)
Advances position until a code point different from c is found or end is reached.
bool advanceAfterFinding(StringIterator other)
Advances position towards end until a matching range of character equal to other[it,...
bool startsWith(IteratorType other) const
Check if this Iterator at its start matches entirely another Iterator's range.
bool reverseAdvanceCodePoints(size_t numCodePoints)
Move position backwards (towards start) by variable number of code pints.
constexpr bool stepBackward()
Move position to previous code point.
Definition: StringIterator.h:364
constexpr bool advanceCodePoints(size_t numCodePoints)
Move position forward (towards end) by variable number of code points.
Definition: StringIterator.h:375
bool reverseAdvanceUntilMatchesAny(Span< const CodePoint > items, CodePoint &matched)
Moves position towards start until any CodePoint in the given Span is found.
constexpr bool stepForward()
Move position to next code point.
Definition: StringIterator.h:353
bool endsWithAnyOf(Span< const CodePoint > codePoints) const
Check if this Iterator ends with any code point in the given span.
bool advanceIfMatchesAny(Span< const CodePoint > items)
Advance position only if any of the code points in given Span is matched.
constexpr bool isFollowedBy(CodePoint c)
Check if next code point is c
Definition: StringIterator.h:390
bool advanceBackwardRead(CodePoint &c)
Move to previous position and read code unit.
bool startsWithAnyOf(Span< const CodePoint > codePoints) const
Check if this Iterator starts with any code point in the given span.
constexpr StringIterator sliceFromStartUntil(StringIterator otherPoint) const
Returns another StringIterator range, starting from start to otherPoint position.
Definition: StringIterator.h:402
constexpr bool isPrecededBy(CodePoint c)
Check if previous code point is c
Definition: StringIterator.h:396
constexpr bool advanceUntilMatches(CodePoint c)
Advances position towards end until it matches CodePoint c or position == end
Definition: StringIterator.h:318
bool read(CodePoint &c)
Read code unit at current position.
constexpr bool advanceRead(CodePoint &c)
Decode code unit at current position and advance.
Definition: StringIterator.h:341
bool match(CodePoint c)
Check if code unit at current position matches CodePoint c
Definition: StringIterator.h:153
constexpr bool isAtEnd() const
Check if current position is at end of iterator range.
Definition: StringIterator.h:77
bool advanceIfMatchesRange(CodePoint first, CodePoint last)
Advance position if any code point in the range [first, last] is matched.
bool endsWith(IteratorType other) const
Check if this Iterator at its end matches entirely another Iterator's range.
bool advanceByLengthOf(StringIterator other)
Advances position by the same number of code points as other.
Definition: StringIterator.h:108
Builds a constexpr bool skip table of 256 entries used in some parsers.
Definition: StringIterator.h:302
A string iterator for UTF16 strings.
Definition: StringIterator.h:264
A string iterator for UTF8 strings.
Definition: StringIterator.h:283
Non-owning view over a range of characters with UTF Encoding.
Definition: StringView.h:47