using System; using System.Collections.Generic; using System.Text; using Topten.RichTextKit.Utils; namespace Topten.RichTextKit { static class GraphemeClusterAlgorithm { /// /// Given a sequence of code points, return its grapheme cluster boundaries /// /// The code points /// An enumerable of grapheme cluster boundaries public static IEnumerable GetBoundaries( Slice codePoints ) { for ( int i = 0; i <= codePoints.Length; i++ ) { if ( IsBoundary( codePoints, i ) ) yield return i; } } /// /// Check if a position in a code point buffer is a grapheme cluster boundary /// /// The code points /// The position to check /// public static bool IsBoundary( Slice codePoints, int position ) { if ( codePoints.Length == 0 ) return false; // Get the grapheme cluster class of the character on each side var a = position <= 0 ? GraphemeClusterClass.SOT : UnicodeClasses.GraphemeClusterClass( codePoints[position - 1] ); var b = position < codePoints.Length ? UnicodeClasses.GraphemeClusterClass( codePoints[position] ) : GraphemeClusterClass.EOT; // Rule 11 - Special handling for ZWJ in extended pictograph if ( a == GraphemeClusterClass.ZWJ ) { var i = position - 2; while ( i >= 0 && UnicodeClasses.GraphemeClusterClass( codePoints[i] ) == GraphemeClusterClass.Extend ) { i--; } if ( i >= 0 && UnicodeClasses.GraphemeClusterClass( codePoints[i] ) == GraphemeClusterClass.ExtPict ) { a = GraphemeClusterClass.ExtPictZwg; } } // Special handling for regional indicator // Rule 12 and 13 if ( a == GraphemeClusterClass.Regional_Indicator ) { // Count how many int count = 0; for ( int i = position - 1; i > 0; i-- ) { if ( UnicodeClasses.GraphemeClusterClass( codePoints[i - 1] ) != GraphemeClusterClass.Regional_Indicator ) break; count++; } // If odd, switch from RI to Any if ( (count % 2) != 0 ) { a = GraphemeClusterClass.Any; } } return pairTable[(int)b][(int)a] != 0; } static byte[][] pairTable = new byte[][] { // Any CR LF Control Extend Regional_Indicator Prepend SpacingMark L V T LV LVT ExtPict ZWJ SOT EOT ExtPictZwg new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // Any new byte[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // CR new byte[] { 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // LF new byte[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // Control new byte[] { 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, }, // Extend new byte[] { 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // Regional_Indicator new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // Prepend new byte[] { 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, }, // SpacingMark new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // L new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, }, // V new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, }, // T new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // LV new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // LVT new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, }, // ExtPict new byte[] { 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, }, // ZWJ new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // SOT new byte[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // EOT new byte[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, // ExtPictZwg }; } }