Sane C++ Libraries
C++ Platform Abstraction Libraries
Loading...
Searching...
No Matches
StringIterator.h
1// Copyright (c) Stefano Cristiano
2// SPDX-License-Identifier: MIT
3#pragma once
4#include "../Foundation/Assert.h" //Assert::unreachable
5#include "../Foundation/Span.h"
6#include "../Foundation/StringSpan.h"
7
8namespace SC
9{
12
15
20constexpr bool StringEncodingAreBinaryCompatible(StringEncoding encoding1, StringEncoding encoding2)
21{
22 return (encoding1 == encoding2) or (encoding2 == StringEncoding::Ascii and encoding1 == StringEncoding::Utf8) or
23 (encoding2 == StringEncoding::Utf8 and encoding1 == StringEncoding::Ascii);
24}
25
34template <typename CharIterator>
35struct SC_COMPILER_EXPORT StringIterator
36{
37 static constexpr StringEncoding getEncoding() { return CharIterator::getEncoding(); }
38
39 using CodeUnit = char;
40 using CodePoint = StringCodePoint;
41
43 constexpr void setToStart() { it = start; }
44
46 constexpr void setToEnd() { it = end; }
47
50 [[nodiscard]] constexpr bool isAtEnd() const { return it >= end; }
51
54 [[nodiscard]] constexpr bool isAtStart() const { return it <= start; }
55
59 [[nodiscard]] constexpr bool advanceUntilMatches(CodePoint c);
60
64 [[nodiscard]] bool reverseAdvanceUntilMatches(CodePoint c);
65
70 template <typename OtherIterator>
72 {
73 return advanceBeforeOrAfterFinding<OtherIterator, true>(other);
74 }
75
76 [[nodiscard]] bool advanceAfterFinding(StringIterator other) { return advanceAfterFindingSameIterator(other); }
77
82 template <typename OtherIterator>
84 {
85 return advanceBeforeOrAfterFinding<OtherIterator, false>(other);
86 }
87
88 [[nodiscard]] bool advanceBeforeFinding(StringIterator other) { return advanceBeforeFindingSameIterator(other); }
89
93 [[nodiscard]] bool advanceByLengthOf(StringIterator other) { return advanceOfBytes(other.end - other.it); }
94
99 [[nodiscard]] bool advanceUntilMatchesAny(Span<const CodePoint> items, CodePoint& matched);
100
105 [[nodiscard]] bool reverseAdvanceUntilMatchesAny(Span<const CodePoint> items, CodePoint& matched);
106
111 [[nodiscard]] bool advanceUntilDifferentFrom(CodePoint c, CodePoint* optionalReadChar = nullptr);
112
116 [[nodiscard]] constexpr bool advanceIfMatches(CodePoint c);
117
121 [[nodiscard]] bool advanceBackwardIfMatches(CodePoint c);
122
127
132 [[nodiscard]] bool advanceIfMatchesRange(CodePoint first, CodePoint last);
133
138 [[nodiscard]] bool match(CodePoint c) { return it < end and CharIterator::decode(it) == c; }
139
143 [[nodiscard]] constexpr bool advanceRead(CodePoint& c);
144
148 [[nodiscard]] bool read(CodePoint& c);
149
153 [[nodiscard]] bool advanceBackwardRead(CodePoint& c);
154
157 [[nodiscard]] constexpr bool stepForward();
158
161 [[nodiscard]] constexpr bool stepBackward();
162
166 [[nodiscard]] constexpr bool advanceCodePoints(size_t numCodePoints);
167
171 [[nodiscard]] bool reverseAdvanceCodePoints(size_t numCodePoints);
172
176 [[nodiscard]] constexpr bool isFollowedBy(CodePoint c);
177
181 [[nodiscard]] constexpr bool isPrecededBy(CodePoint c);
182
186 [[nodiscard]] constexpr StringIterator sliceFromStartUntil(StringIterator otherPoint) const;
187
191 [[nodiscard]] constexpr ssize_t bytesDistanceFrom(StringIterator other) const;
192
196 [[nodiscard]] bool endsWithAnyOf(Span<const CodePoint> codePoints) const;
197
201 [[nodiscard]] bool startsWithAnyOf(Span<const CodePoint> codePoints) const;
202
206 template <typename IteratorType>
207 [[nodiscard]] bool endsWith(IteratorType other) const;
208
212 template <typename IteratorType>
213 [[nodiscard]] bool startsWith(IteratorType other) const;
214
215 protected:
216 template <typename OtherIterator, bool after>
217 [[nodiscard]] bool advanceBeforeOrAfterFinding(StringIterator<OtherIterator> other);
218 [[nodiscard]] bool advanceAfterFindingSameIterator(StringIterator other);
219 [[nodiscard]] bool advanceBeforeFindingSameIterator(StringIterator other);
220 [[nodiscard]] bool advanceOfBytes(ssize_t bytesLength);
221
222 friend struct StringView;
223 static constexpr const CodeUnit* getNextOf(const CodeUnit* src) { return CharIterator::getNextOf(src); }
224 static constexpr const CodeUnit* getPreviousOf(const CodeUnit* src) { return CharIterator::getPreviousOf(src); }
225 constexpr StringIterator(const CodeUnit* it, const CodeUnit* end) : it(it), start(it), end(end) {}
226 const CodeUnit* it;
227 const CodeUnit* start;
228 const CodeUnit* end;
229};
230
232struct SC_COMPILER_EXPORT StringIteratorASCII : public StringIterator<StringIteratorASCII>
233{
234 [[nodiscard]] constexpr bool advanceUntilMatches(CodePoint c);
235
236 private:
237 [[nodiscard]] bool advanceUntilMatchesNonConstexpr(CodePoint c);
238 using StringIterator::StringIterator;
239 constexpr StringIteratorASCII(const CodeUnit* it, const CodeUnit* end) : StringIterator(it, end) {}
241 friend Parent;
242 friend struct StringView;
243
244 [[nodiscard]] static constexpr StringEncoding getEncoding() { return StringEncoding::Ascii; }
245
246 [[nodiscard]] static constexpr const char* getNextOf(const char* src) { return src + 1; }
247 [[nodiscard]] static constexpr const char* getPreviousOf(const char* src) { return src - 1; }
248 [[nodiscard]] static constexpr CodePoint decode(const char* src) { return static_cast<CodePoint>(*src); }
249};
250
252struct SC_COMPILER_EXPORT StringIteratorUTF16 : public StringIterator<StringIteratorUTF16>
253{
254 private:
255 using StringIterator::StringIterator;
256 constexpr StringIteratorUTF16(const CodeUnit* it, const CodeUnit* end) : StringIterator(it, end) {}
258 friend Parent;
259 friend struct StringView;
260
261 [[nodiscard]] static StringEncoding getEncoding() { return StringEncoding::Utf16; }
262
263 [[nodiscard]] static const char* getNextOf(const char* bytes);
264
265 [[nodiscard]] static const char* getPreviousOf(const char* bytes);
266
267 [[nodiscard]] static uint32_t decode(const char* bytes);
268};
269
271struct SC_COMPILER_EXPORT StringIteratorUTF8 : public StringIterator<StringIteratorUTF8>
272{
273 private:
275 friend Parent;
276 friend struct StringView;
277 using StringIterator::StringIterator;
278 constexpr StringIteratorUTF8(const CodeUnit* it, const CodeUnit* end) : StringIterator(it, end) {}
279
280 [[nodiscard]] static StringEncoding getEncoding() { return StringEncoding::Utf8; }
281
282 [[nodiscard]] static const char* getNextOf(const char* src);
283
284 [[nodiscard]] static const char* getPreviousOf(const char* src);
285
286 [[nodiscard]] static uint32_t decode(const char* src);
287};
288
291{
292 bool matches[256] = {false};
294 {
295 for (auto c : chars)
296 {
297 matches[static_cast<int>(c)] = true;
298 }
299 }
300};
302
303//-----------------------------------------------------------------------------------------------------------------------
304// Implementations Details
305//-----------------------------------------------------------------------------------------------------------------------
306template <typename CharIterator>
308{
309 while (it < end)
310 {
311 if (CharIterator::decode(it) == c)
312 return true;
313 it = getNextOf(it);
314 }
315 return false;
316}
317
318template <typename CharIterator>
320{
321 if (it < end and CharIterator::decode(it) == c)
322 {
323 it = getNextOf(it);
324 return true;
325 }
326 return false;
327}
328
329template <typename CharIterator>
331{
332 if (it < end)
333 {
334 c = CharIterator::decode(it);
335 it = getNextOf(it);
336 return true;
337 }
338 return false;
339}
340
341template <typename CharIterator>
343{
344 if (it < end)
345 {
346 it = getNextOf(it);
347 return true;
348 }
349 return false;
350}
351
352template <typename CharIterator>
354{
355 if (it > start)
356 {
357 it = getPreviousOf(it);
358 return true;
359 }
360 return false;
361}
362
363template <typename CharIterator>
364constexpr bool StringIterator<CharIterator>::advanceCodePoints(size_t numCodePoints)
365{
366 while (numCodePoints > 0)
367 {
368 numCodePoints -= 1;
369 if (it >= end)
370 {
371 return false;
372 }
373 it = getNextOf(it);
374 }
375 return true;
376}
377
378template <typename CharIterator>
380{
381 return it < end ? CharIterator::decode(getNextOf(it)) == c : false;
382}
383
384template <typename CharIterator>
386{
387 return it > start ? CharIterator::decode(getPreviousOf(it)) == c : false;
388}
389
390template <typename CharIterator>
392 StringIterator otherPoint) const
393{
394 SC_ASSERT_RELEASE(it <= otherPoint.it);
395 return StringIterator(it, otherPoint.it);
396}
397
398template <typename CharIterator>
400{
401 return (it - other.it) * static_cast<ssize_t>(sizeof(CodeUnit));
402}
403
404// StringIteratorASCII
405[[nodiscard]] constexpr bool StringIteratorASCII::advanceUntilMatches(CodePoint c)
406{
407#if defined(__clang__)
408#pragma clang diagnostic push
409#pragma clang diagnostic ignored "-Wunreachable-code"
410#endif
411 return __builtin_is_constant_evaluated() ? StringIterator::advanceUntilMatches(c)
412 : advanceUntilMatchesNonConstexpr(c);
413#if defined(__clang__)
414#pragma clang diagnostic pop
415#endif
416}
417
418} // namespace SC
#define SC_ASSERT_RELEASE(e)
Assert expression e to be true.
Definition Assert.h:42
unsigned int uint32_t
Platform independent (4) bytes unsigned int.
Definition PrimitiveTypes.h:38
signed long ssize_t
Platform independent signed size type.
Definition PrimitiveTypes.h:57
constexpr bool StringEncodingAreBinaryCompatible(StringEncoding encoding1, StringEncoding encoding2)
Checks if two encodings have the same utf unit size.
Definition StringIterator.h:20
uint32_t StringCodePoint
UTF code point (32 bit)
Definition StringIterator.h:14
View over a contiguous sequence of items (pointer + size in elements).
Definition Span.h:29
A string iterator for ASCII strings.
Definition StringIterator.h:233
Builds a constexpr bool skip table of 256 entries used in some parsers.
Definition StringIterator.h:291
A string iterator for UTF16 strings.
Definition StringIterator.h:253
A string iterator for UTF8 strings.
Definition StringIterator.h:272
A position inside a fixed range [start, end) of UTF code points.
Definition StringIterator.h:36
constexpr void setToStart()
Rewind current position to start of iterator range.
Definition StringIterator.h:43
constexpr ssize_t bytesDistanceFrom(StringIterator other) const
Get distance in bytes from current position to another StringIterator current position.
Definition StringIterator.h:399
constexpr void setToEnd()
Set current position to end of iterator range.
Definition StringIterator.h:46
bool advanceUntilMatchesAny(Span< const CodePoint > items, CodePoint &matched)
Advances position until any CodePoint in the given Span is found.
bool advanceBackwardIfMatches(CodePoint c)
Move position by one code point towards start if previous code point matches c
constexpr bool advanceIfMatches(CodePoint c)
Advance position only if next code point matches c.
Definition StringIterator.h:319
bool advanceBeforeFinding(StringIterator< OtherIterator > other)
Advances position towards end until a matching range of character equal to other[it,...
Definition StringIterator.h:83
constexpr bool isAtStart() const
Check if current position is at start of iterator range.
Definition StringIterator.h:54
bool reverseAdvanceUntilMatches(CodePoint c)
Moves position towards start until CodePoint c is found or position == end
bool advanceAfterFinding(StringIterator< OtherIterator > other)
Advances position towards end until a matching range of character equal to other[it,...
Definition StringIterator.h:71
bool advanceUntilDifferentFrom(CodePoint c, CodePoint *optionalReadChar=nullptr)
Advances position until a code point different from c is found or end is reached.
bool startsWith(IteratorType other) const
Check if this Iterator at its start matches entirely another Iterator's range.
bool reverseAdvanceCodePoints(size_t numCodePoints)
Move position backwards (towards start) by variable number of code pints.
constexpr bool stepBackward()
Move position to previous code point.
Definition StringIterator.h:353
constexpr bool advanceCodePoints(size_t numCodePoints)
Move position forward (towards end) by variable number of code points.
Definition StringIterator.h:364
bool reverseAdvanceUntilMatchesAny(Span< const CodePoint > items, CodePoint &matched)
Moves position towards start until any CodePoint in the given Span is found.
constexpr bool stepForward()
Move position to next code point.
Definition StringIterator.h:342
bool endsWithAnyOf(Span< const CodePoint > codePoints) const
Check if this Iterator ends with any code point in the given span.
bool advanceIfMatchesAny(Span< const CodePoint > items)
Advance position only if any of the code points in given Span is matched.
constexpr bool isFollowedBy(CodePoint c)
Check if next code point is c
Definition StringIterator.h:379
bool advanceBackwardRead(CodePoint &c)
Move to previous position and read code unit.
bool startsWithAnyOf(Span< const CodePoint > codePoints) const
Check if this Iterator starts with any code point in the given span.
constexpr StringIterator sliceFromStartUntil(StringIterator otherPoint) const
Returns another StringIterator range, starting from start to otherPoint position.
Definition StringIterator.h:391
constexpr bool isPrecededBy(CodePoint c)
Check if previous code point is c
Definition StringIterator.h:385
constexpr bool advanceUntilMatches(CodePoint c)
Advances position towards end until it matches CodePoint c or position == end
Definition StringIterator.h:307
bool read(CodePoint &c)
Read code unit at current position.
constexpr bool advanceRead(CodePoint &c)
Decode code unit at current position and advance.
Definition StringIterator.h:330
bool match(CodePoint c)
Check if code unit at current position matches CodePoint c
Definition StringIterator.h:138
constexpr bool isAtEnd() const
Check if current position is at end of iterator range.
Definition StringIterator.h:50
bool advanceIfMatchesRange(CodePoint first, CodePoint last)
Advance position if any code point in the range [first, last] is matched.
bool endsWith(IteratorType other) const
Check if this Iterator at its end matches entirely another Iterator's range.
bool advanceByLengthOf(StringIterator other)
Advances position by the same number of code points as other.
Definition StringIterator.h:93
Non-owning view over a range of characters with UTF Encoding.
Definition StringView.h:46