Files
sbox-public/engine/ThirdParty/Topten.RichTextKit/Utils/Utf32Buffer.cs
s&box team 71f266059a Open source release
This commit imports the C# engine code and game files, excluding C++ source code.

[Source-Commit: ceb3d758046e50faa6258bc3b658a30c97743268]
2025-11-24 09:05:18 +00:00

295 lines
8.3 KiB
C#

// RichTextKit
// Copyright © 2019-2020 Topten Software. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may
// not use this product except in compliance with the License. You may obtain
// a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Topten.RichTextKit.Utils;
namespace Topten.RichTextKit.Utils
{
/// <summary>
/// Represents a buffer of UTF-32 encoded code point data
/// </summary>
public class Utf32Buffer : Buffer<int>
{
/// <summary>
/// Constructs a new Utf32Buffer
/// </summary>
public Utf32Buffer()
{
}
/// <summary>
/// Constructs a Utf32 buffer with an initial string
/// </summary>
/// <param name="str">The string to initialize with</param>
public Utf32Buffer( string str )
{
Add( str );
}
/// <summary>
/// Clears this buffer.
/// </summary>
public new void Clear()
{
_surrogatePositionsValid = false;
base.Clear();
}
/// <summary>
/// Appends utf32 data to this buffer
/// </summary>
/// <param name="data">The UTF32 data to be appended</param>
/// <returns>A slice representing the added UTF-32 data.</returns>
public new Slice<int> Add( Slice<int> data )
{
_surrogatePositionsValid = false;
return base.Add( data );
}
/// <summary>
/// Appends text to this buffer, converting from UTF-16 to UTF-32
/// </summary>
/// <param name="str">The string of text to be inserted</param>
/// <returns>A slice representing the added UTF-32 data.</returns>
public Slice<int> Add( string str )
{
return Insert( Length, str );
}
/// <summary>
/// Appends text to this buffer, converting from UTF-16 to UTF-32
/// </summary>
/// <param name="str">The string of text to be inserted</param>
/// <returns>A slice representing the added UTF-32 data.</returns>
public Slice<int> Add( ReadOnlySpan<char> str )
{
return Insert( Length, str );
}
/// <summary>
/// Appends utf32 data to this buffer
/// </summary>
/// <param name="position">Position to insert the string</param>
/// <param name="data">The string of text to be appended</param>
/// <returns>A slice representing the added UTF-32 data.</returns>
public new Slice<int> Insert( int position, Slice<int> data )
{
_surrogatePositionsValid = false;
return base.Insert( position, data );
}
/// <summary>
/// Inserts text to this buffer, converting from UTF-16 to UTF-32
/// </summary>
/// <param name="position">The position to insert the string</param>
/// <param name="str">The string of text to be inserted</param>
/// <returns>A slice representing the added UTF-32 data.</returns>
public Slice<int> Insert( int position, string str )
{
return Insert( position, str.AsSpan() );
}
/// <summary>
/// Inserts text to this buffer, converting from UTF-16 to UTF-32
/// </summary>
/// <param name="position">The position to insert the string</param>
/// <param name="str">The string of text to be inserted</param>
/// <returns>A slice representing the added UTF-32 data.</returns>
public Slice<int> Insert( int position, ReadOnlySpan<char> str )
{
// Remember old length
int oldLength = Length;
// Invalidate surrogate positions
_surrogatePositionsValid = false;
// For performance reasons and to save copying to intermediate arrays if we use
// (Encoding.UTF32), we do our own utf16 to utf32 decoding directly to our
// internal code point buffer. Also stores the indicies of any surrogate pairs
// for later back conversion.
// Also use pointers for performance reasons too (maybe)
Slice<int> codePointBuffer = base.Insert( position, str.Length );
int convertedLength;
unsafe
{
fixed ( int* pDestBuf = codePointBuffer.Underlying )
fixed ( char* pSrcBuf = str )
{
int* pDestStart = pDestBuf + codePointBuffer.Start;
int* pDest = pDestStart;
char* pSrc = pSrcBuf;
char* pSrcEnd = pSrcBuf + str.Length;
while ( pSrc < pSrcEnd )
{
char ch = *pSrc++;
if ( ch >= 0xD800 && ch <= 0xDFFF )
{
if ( ch <= 0xDBFF )
{
// High surrogate
var chL = pSrc < pSrcEnd ? (*pSrc++) : 0;
*pDest++ = 0x10000 | ((ch - 0xD800) << 10) | (chL - 0xDC00);
}
else
{
// Single low surrogte?
*pDest++ = 0x10000 + ch - 0xDC00;
}
}
else
{
*pDest++ = ch;
}
}
// Work out the converted length
convertedLength = (int)(pDest - pDestStart);
}
}
// If converted length was shorter due to surrogates, then remove
// the extra space that was allocated
if ( convertedLength < str.Length )
{
base.Delete( position + convertedLength, str.Length - convertedLength );
}
// Return the encapsulating slice
return SubSlice( position, convertedLength );
}
/// <summary>
/// Delete a section of the buffer
/// </summary>
/// <param name="from">The position to delete from</param>
/// <param name="length">The length to of the deletion</param>
public new void Delete( int from, int length )
{
_surrogatePositionsValid = false;
base.Delete( from, length );
}
/// <summary>
/// Convers an offset into this buffer to a UTF-16 offset in the originally
/// added string.
/// </summary>
/// <remarks>
/// This function assumes the was text added to the buffer as UTF-16
/// and hasn't been modified in any way since.
/// </remarks>
/// <param name="utf32Offset">The UTF-3232 offset to convert</param>
/// <returns>The converted UTF-16 character offset</returns>
public int Utf32OffsetToUtf16Offset( int utf32Offset )
{
// Make sure surrorgate positions are valid
BuildSurrogatePositions();
// How many surrogate pairs were there before this utf32 offset?
int pos = _surrogatePositions.BinarySearch( utf32Offset );
if ( pos < 0 )
{
pos = ~pos;
}
return utf32Offset + pos;
}
/// <summary>
/// Converts an offset in the original UTF-16 string, a code point index into
/// this UTF-32 buffer.
/// </summary>
/// <param name="utf16Offset">The utf-16 character index</param>
/// <returns>The utf-32 code point index</returns>
public int Utf16OffsetToUtf32Offset( int utf16Offset )
{
// Make sure surrorgate positions are valid
BuildSurrogatePositions();
var pos = utf16Offset;
for ( int i = 0; i < _surrogatePositions.Count; i++ )
{
var sp = _surrogatePositions[i];
if ( sp < pos )
pos--;
if ( sp > pos )
return pos;
}
return pos;
}
/// <summary>
/// Gets the enture buffer's content as a string.
/// </summary>
/// <returns></returns>
public override string ToString()
{
return Utf32Utils.FromUtf32( AsSlice() );
}
/// <summary>
/// Gets a part of the buffer as a string.
/// </summary>
/// <param name="start">The UTF-32 code point index of the first character to retrieve</param>
/// <param name="length">The number of code points in the string to be retrieved</param>
/// <returns>A string equivalent to the specified code point range.</returns>
public string GetString( int start, int length )
{
return Utf32Utils.FromUtf32( SubSlice( start, length ) );
}
/// <summary>
/// Indicies of all code points in the in the buffer
/// that were decoded from a surrogate pair
/// </summary>
List<int> _surrogatePositions = new List<int>();
bool _surrogatePositionsValid = false;
/// <summary>
/// Build an array indicies to all characters that require surrogates
/// when converted to utf16.
/// </summary>
void BuildSurrogatePositions()
{
if ( _surrogatePositionsValid )
return;
_surrogatePositionsValid = true;
_surrogatePositions.Clear();
unsafe
{
fixed ( int* pBuf = this.Underlying )
{
int* pEnd = pBuf + this.Length;
int* p = pBuf;
while ( p < pEnd )
{
if ( p[0] >= 0x10000 )
_surrogatePositions.Add( (int)(p - pBuf) );
p++;
}
}
}
}
}
}