Sane C++ Libraries
C++ Platform Abstraction Libraries
Loading...
Searching...
No Matches
StringIterator.h
1// Copyright (c) Stefano Cristiano
2// SPDX-License-Identifier: MIT
3#pragma once
4#include "../Foundation/Assert.h" //Assert::unreachable
5#include "../Foundation/Span.h"
6#include "../Foundation/StringViewData.h"
7
8namespace SC
9{
12
15
20constexpr bool StringEncodingAreBinaryCompatible(StringEncoding encoding1, StringEncoding encoding2)
21{
22 return (encoding1 == encoding2) or (encoding2 == StringEncoding::Ascii and encoding1 == StringEncoding::Utf8) or
23 (encoding2 == StringEncoding::Utf8 and encoding1 == StringEncoding::Ascii);
24}
25
29constexpr uint32_t StringEncodingGetSize(StringEncoding encoding)
30{
31 switch (encoding)
32 {
33 case StringEncoding::Utf16: return 2;
34 case StringEncoding::Ascii: return 1;
35 case StringEncoding::Utf8: return 1;
36 }
37 Assert::unreachable();
38}
39
48template <typename CharIterator>
50{
51 static constexpr StringEncoding getEncoding() { return CharIterator::getEncoding(); }
52
53 using CodeUnit = char;
54 using CodePoint = StringCodePoint;
55
57 constexpr void setToStart() { it = start; }
58
60 constexpr void setToEnd() { it = end; }
61
64 [[nodiscard]] constexpr bool isAtEnd() const { return it >= end; }
65
68 [[nodiscard]] constexpr bool isAtStart() const { return it <= start; }
69
73 [[nodiscard]] constexpr bool advanceUntilMatches(CodePoint c);
74
78 [[nodiscard]] bool reverseAdvanceUntilMatches(CodePoint c);
79
84 [[nodiscard]] bool advanceAfterFinding(StringIterator other);
85
90 [[nodiscard]] bool advanceBeforeFinding(StringIterator other);
91
95 [[nodiscard]] bool advanceByLengthOf(StringIterator other) { return advanceOfBytes(other.end - other.it); }
96
101 [[nodiscard]] bool advanceUntilMatchesAny(Span<const CodePoint> items, CodePoint& matched);
102
107 [[nodiscard]] bool reverseAdvanceUntilMatchesAny(Span<const CodePoint> items, CodePoint& matched);
108
113 [[nodiscard]] bool advanceUntilDifferentFrom(CodePoint c, CodePoint* optionalReadChar = nullptr);
114
118 [[nodiscard]] constexpr bool advanceIfMatches(CodePoint c);
119
123 [[nodiscard]] bool advanceBackwardIfMatches(CodePoint c);
124
129
134 [[nodiscard]] bool advanceIfMatchesRange(CodePoint first, CodePoint last);
135
140 [[nodiscard]] bool match(CodePoint c) { return it < end and CharIterator::decode(it) == c; }
141
145 [[nodiscard]] constexpr bool advanceRead(CodePoint& c);
146
150 [[nodiscard]] bool read(CodePoint& c);
151
155 [[nodiscard]] bool advanceBackwardRead(CodePoint& c);
156
159 [[nodiscard]] constexpr bool stepForward();
160
163 [[nodiscard]] constexpr bool stepBackward();
164
168 [[nodiscard]] constexpr bool advanceCodePoints(size_t numCodePoints);
169
173 [[nodiscard]] bool reverseAdvanceCodePoints(size_t numCodePoints);
174
178 [[nodiscard]] constexpr bool isFollowedBy(CodePoint c);
179
183 [[nodiscard]] constexpr bool isPrecededBy(CodePoint c);
184
188 [[nodiscard]] constexpr StringIterator sliceFromStartUntil(StringIterator otherPoint) const;
189
193 [[nodiscard]] constexpr ssize_t bytesDistanceFrom(StringIterator other) const;
194
198 [[nodiscard]] bool endsWithAnyOf(Span<const CodePoint> codePoints) const;
199
203 [[nodiscard]] bool startsWithAnyOf(Span<const CodePoint> codePoints) const;
204
208 template <typename IteratorType>
209 [[nodiscard]] bool endsWith(IteratorType other) const;
210
214 template <typename IteratorType>
215 [[nodiscard]] bool startsWith(IteratorType other) const;
216
217 protected:
218 [[nodiscard]] bool advanceOfBytes(ssize_t bytesLength);
219
220 friend struct StringView;
221 static constexpr const CodeUnit* getNextOf(const CodeUnit* src) { return CharIterator::getNextOf(src); }
222 static constexpr const CodeUnit* getPreviousOf(const CodeUnit* src) { return CharIterator::getPreviousOf(src); }
223 constexpr StringIterator(const CodeUnit* it, const CodeUnit* end) : it(it), start(it), end(end) {}
224 const CodeUnit* it;
225 const CodeUnit* start;
226 const CodeUnit* end;
227};
228
230struct SC_COMPILER_EXPORT StringIteratorASCII : public StringIterator<StringIteratorASCII>
231{
232 [[nodiscard]] constexpr bool advanceUntilMatches(CodePoint c);
233
234 private:
235 [[nodiscard]] bool advanceUntilMatchesNonConstexpr(CodePoint c);
236 using StringIterator::StringIterator;
237 constexpr StringIteratorASCII(const CodeUnit* it, const CodeUnit* end) : StringIterator(it, end) {}
239 friend Parent;
240 friend struct StringView;
241
242 [[nodiscard]] static constexpr StringEncoding getEncoding() { return StringEncoding::Ascii; }
243
244 [[nodiscard]] static constexpr const char* getNextOf(const char* src) { return src + 1; }
245 [[nodiscard]] static constexpr const char* getPreviousOf(const char* src) { return src - 1; }
246 [[nodiscard]] static constexpr CodePoint decode(const char* src) { return static_cast<CodePoint>(*src); }
247};
248
250struct SC_COMPILER_EXPORT StringIteratorUTF16 : public StringIterator<StringIteratorUTF16>
251{
252 private:
253 using StringIterator::StringIterator;
254 constexpr StringIteratorUTF16(const CodeUnit* it, const CodeUnit* end) : StringIterator(it, end) {}
256 friend Parent;
257 friend struct StringView;
258
259 [[nodiscard]] static StringEncoding getEncoding() { return StringEncoding::Utf16; }
260
261 [[nodiscard]] static const char* getNextOf(const char* bytes);
262
263 [[nodiscard]] static const char* getPreviousOf(const char* bytes);
264
265 [[nodiscard]] static uint32_t decode(const char* bytes);
266};
267
269struct SC_COMPILER_EXPORT StringIteratorUTF8 : public StringIterator<StringIteratorUTF8>
270{
271 private:
273 friend Parent;
274 friend struct StringView;
275 using StringIterator::StringIterator;
276 constexpr StringIteratorUTF8(const CodeUnit* it, const CodeUnit* end) : StringIterator(it, end) {}
277
278 [[nodiscard]] static StringEncoding getEncoding() { return StringEncoding::Utf8; }
279
280 [[nodiscard]] static const char* getNextOf(const char* src);
281
282 [[nodiscard]] static const char* getPreviousOf(const char* src);
283
284 [[nodiscard]] static uint32_t decode(const char* src);
285};
286
289{
290 bool matches[256] = {false};
292 {
293 for (auto c : chars)
294 {
295 matches[static_cast<int>(c)] = true;
296 }
297 }
298};
300
301//-----------------------------------------------------------------------------------------------------------------------
302// Implementations Details
303//-----------------------------------------------------------------------------------------------------------------------
304template <typename CharIterator>
306{
307 while (it < end)
308 {
309 if (CharIterator::decode(it) == c)
310 return true;
311 it = getNextOf(it);
312 }
313 return false;
314}
315
316template <typename CharIterator>
318{
319 if (it < end and CharIterator::decode(it) == c)
320 {
321 it = getNextOf(it);
322 return true;
323 }
324 return false;
325}
326
327template <typename CharIterator>
329{
330 if (it < end)
331 {
332 c = CharIterator::decode(it);
333 it = getNextOf(it);
334 return true;
335 }
336 return false;
337}
338
339template <typename CharIterator>
341{
342 if (it < end)
343 {
344 it = getNextOf(it);
345 return true;
346 }
347 return false;
348}
349
350template <typename CharIterator>
352{
353 if (it > start)
354 {
355 it = getPreviousOf(it);
356 return true;
357 }
358 return false;
359}
360
361template <typename CharIterator>
362constexpr bool StringIterator<CharIterator>::advanceCodePoints(size_t numCodePoints)
363{
364 while (numCodePoints > 0)
365 {
366 numCodePoints -= 1;
367 if (it >= end)
368 {
369 return false;
370 }
371 it = getNextOf(it);
372 }
373 return true;
374}
375
376template <typename CharIterator>
378{
379 return it < end ? CharIterator::decode(getNextOf(it)) == c : false;
380}
381
382template <typename CharIterator>
384{
385 return it > start ? CharIterator::decode(getPreviousOf(it)) == c : false;
386}
387
388template <typename CharIterator>
390 StringIterator otherPoint) const
391{
392 SC_ASSERT_RELEASE(it <= otherPoint.it);
393 return StringIterator(it, otherPoint.it);
394}
395
396template <typename CharIterator>
398{
399 return (it - other.it) * static_cast<ssize_t>(sizeof(CodeUnit));
400}
401
402// StringIteratorASCII
403[[nodiscard]] constexpr bool StringIteratorASCII::advanceUntilMatches(CodePoint c)
404{
405#if defined(__clang__)
406#pragma clang diagnostic push
407#pragma clang diagnostic ignored "-Wunreachable-code"
408#endif
409 return __builtin_is_constant_evaluated() ? StringIterator::advanceUntilMatches(c)
410 : advanceUntilMatchesNonConstexpr(c);
411#if defined(__clang__)
412#pragma clang diagnostic pop
413#endif
414}
415
416} // namespace SC
#define SC_COMPILER_EXPORT
Macro for symbol visibility in non-MSVC compilers.
Definition Compiler.h:78
#define SC_ASSERT_RELEASE(e)
Assert expression e to be true.
Definition Assert.h:66
unsigned int uint32_t
Platform independent (4) bytes unsigned int.
Definition PrimitiveTypes.h:38
signed long ssize_t
Platform independent signed size type.
Definition PrimitiveTypes.h:57
constexpr bool StringEncodingAreBinaryCompatible(StringEncoding encoding1, StringEncoding encoding2)
Checks if two encodings have the same utf unit size.
Definition StringIterator.h:20
constexpr uint32_t StringEncodingGetSize(StringEncoding encoding)
Returns the number of bytes to represent an utf unit in the given encoding.
Definition StringIterator.h:29
uint32_t StringCodePoint
UTF code point (32 bit)
Definition StringIterator.h:14
View over a contiguous sequence of items (pointer + size in elements).
Definition Span.h:29
A string iterator for ASCII strings.
Definition StringIterator.h:231
Builds a constexpr bool skip table of 256 entries used in some parsers.
Definition StringIterator.h:289
A string iterator for UTF16 strings.
Definition StringIterator.h:251
A string iterator for UTF8 strings.
Definition StringIterator.h:270
A position inside a fixed range [start, end) of UTF code points.
Definition StringIterator.h:50
constexpr void setToStart()
Rewind current position to start of iterator range.
Definition StringIterator.h:57
constexpr ssize_t bytesDistanceFrom(StringIterator other) const
Get distance in bytes from current position to another StringIterator current position.
Definition StringIterator.h:397
constexpr void setToEnd()
Set current position to end of iterator range.
Definition StringIterator.h:60
bool advanceUntilMatchesAny(Span< const CodePoint > items, CodePoint &matched)
Advances position until any CodePoint in the given Span is found.
bool advanceBackwardIfMatches(CodePoint c)
Move position by one code point towards start if previous code point matches c
constexpr bool advanceIfMatches(CodePoint c)
Advance position only if next code point matches c.
Definition StringIterator.h:317
constexpr bool isAtStart() const
Check if current position is at start of iterator range.
Definition StringIterator.h:68
bool reverseAdvanceUntilMatches(CodePoint c)
Moves position towards start until CodePoint c is found or position == end
bool advanceBeforeFinding(StringIterator other)
Advances position towards end until a matching range of character equal to other[it,...
bool advanceUntilDifferentFrom(CodePoint c, CodePoint *optionalReadChar=nullptr)
Advances position until a code point different from c is found or end is reached.
bool advanceAfterFinding(StringIterator other)
Advances position towards end until a matching range of character equal to other[it,...
bool startsWith(IteratorType other) const
Check if this Iterator at its start matches entirely another Iterator's range.
bool reverseAdvanceCodePoints(size_t numCodePoints)
Move position backwards (towards start) by variable number of code pints.
constexpr bool stepBackward()
Move position to previous code point.
Definition StringIterator.h:351
constexpr bool advanceCodePoints(size_t numCodePoints)
Move position forward (towards end) by variable number of code points.
Definition StringIterator.h:362
bool reverseAdvanceUntilMatchesAny(Span< const CodePoint > items, CodePoint &matched)
Moves position towards start until any CodePoint in the given Span is found.
constexpr bool stepForward()
Move position to next code point.
Definition StringIterator.h:340
bool endsWithAnyOf(Span< const CodePoint > codePoints) const
Check if this Iterator ends with any code point in the given span.
bool advanceIfMatchesAny(Span< const CodePoint > items)
Advance position only if any of the code points in given Span is matched.
constexpr bool isFollowedBy(CodePoint c)
Check if next code point is c
Definition StringIterator.h:377
bool advanceBackwardRead(CodePoint &c)
Move to previous position and read code unit.
bool startsWithAnyOf(Span< const CodePoint > codePoints) const
Check if this Iterator starts with any code point in the given span.
constexpr StringIterator sliceFromStartUntil(StringIterator otherPoint) const
Returns another StringIterator range, starting from start to otherPoint position.
Definition StringIterator.h:389
constexpr bool isPrecededBy(CodePoint c)
Check if previous code point is c
Definition StringIterator.h:383
constexpr bool advanceUntilMatches(CodePoint c)
Advances position towards end until it matches CodePoint c or position == end
Definition StringIterator.h:305
bool read(CodePoint &c)
Read code unit at current position.
constexpr bool advanceRead(CodePoint &c)
Decode code unit at current position and advance.
Definition StringIterator.h:328
bool match(CodePoint c)
Check if code unit at current position matches CodePoint c
Definition StringIterator.h:140
constexpr bool isAtEnd() const
Check if current position is at end of iterator range.
Definition StringIterator.h:64
bool advanceIfMatchesRange(CodePoint first, CodePoint last)
Advance position if any code point in the range [first, last] is matched.
bool endsWith(IteratorType other) const
Check if this Iterator at its end matches entirely another Iterator's range.
bool advanceByLengthOf(StringIterator other)
Advances position by the same number of code points as other.
Definition StringIterator.h:95
Non-owning view over a range of characters with UTF Encoding.
Definition StringView.h:48