package displaywidth

import (
	"strings"
	"unicode/utf8"

	"github.com/clipperhouse/uax29/v2/graphemes"
)

// Options allows you to specify the treatment of ambiguous East Asian
// characters and ANSI escape sequences.
type Options struct {
	// EastAsianWidth specifies whether to treat ambiguous East Asian characters
	// as width 1 or 2. When false (default), ambiguous East Asian characters
	// are treated as width 1. When true, they are width 2.
	EastAsianWidth bool

	// ControlSequences specifies whether to ignore ECMA-48 escape sequences
	// when calculating the display width. When false (default), ANSI escape
	// sequences are treated as just a series of characters. When true, they are
	// treated as a single zero-width unit.
	//
	// Note that this option is about *sequences*. Individual control characters
	// are already treated as zero-width. With this option, ANSI sequences such as
	// "\x1b[31m" and "\x1b[0m" do not count towards the width of a string.
	ControlSequences bool
}

// DefaultOptions is the default options for the display width
// calculation, which is EastAsianWidth false and ControlSequences false.
var DefaultOptions = Options{EastAsianWidth: false, ControlSequences: false}

// String calculates the display width of a string,
// by iterating over grapheme clusters in the string
// and summing their widths.
func String(s string) int {
	return DefaultOptions.String(s)
}

// String calculates the display width of a string, for the given options, by
// iterating over grapheme clusters in the string and summing their widths.
func (options Options) String(s string) int {
	width := 0
	pos := 0

	for pos < len(s) {
		// Try ASCII optimization
		asciiLen := printableASCIILength(s[pos:])
		if asciiLen > 0 {
			width += asciiLen
			pos += asciiLen
			continue
		}

		// Not ASCII, use grapheme parsing
		g := graphemes.FromString(s[pos:])
		g.AnsiEscapeSequences = options.ControlSequences

		start := pos

		for g.Next() {
			v := g.Value()
			width += graphemeWidth(v, options)
			pos += len(v)

			// Quick check: if remaining might have printable ASCII, break to outer loop
			if pos < len(s) && s[pos] >= 0x20 && s[pos] <= 0x7E {
				break
			}
		}

		// Defensive, should not happen: if no progress was made,
		// skip a byte to prevent infinite loop. Only applies if
		// the grapheme parser misbehaves.
		if pos == start {
			pos++
		}
	}

	return width
}

// Bytes calculates the display width of a []byte,
// by iterating over grapheme clusters in the byte slice
// and summing their widths.
func Bytes(s []byte) int {
	return DefaultOptions.Bytes(s)
}

// Bytes calculates the display width of a []byte, for the given options, by
// iterating over grapheme clusters in the slice and summing their widths.
func (options Options) Bytes(s []byte) int {
	width := 0
	pos := 0

	for pos < len(s) {
		// Try ASCII optimization
		asciiLen := printableASCIILength(s[pos:])
		if asciiLen > 0 {
			width += asciiLen
			pos += asciiLen
			continue
		}

		// Not ASCII, use grapheme parsing
		g := graphemes.FromBytes(s[pos:])
		g.AnsiEscapeSequences = options.ControlSequences

		start := pos

		for g.Next() {
			v := g.Value()
			width += graphemeWidth(v, options)
			pos += len(v)

			// Quick check: if remaining might have printable ASCII, break to outer loop
			if pos < len(s) && s[pos] >= 0x20 && s[pos] <= 0x7E {
				break
			}
		}

		// Defensive, should not happen: if no progress was made,
		// skip a byte to prevent infinite loop. Only applies if
		// the grapheme parser misbehaves.
		if pos == start {
			pos++
		}
	}

	return width
}

// Rune calculates the display width of a rune. You
// should almost certainly use [String] or [Bytes] for
// most purposes.
//
// The smallest unit of display width is a grapheme
// cluster, not a rune. Iterating over runes to measure
// width is incorrect in many cases.
func Rune(r rune) int {
	return DefaultOptions.Rune(r)
}

// Rune calculates the display width of a rune, for the given options.
//
// You should almost certainly use [String] or [Bytes] for most purposes.
//
// The smallest unit of display width is a grapheme cluster, not a rune.
// Iterating over runes to measure width is incorrect in many cases.
func (options Options) Rune(r rune) int {
	if r < utf8.RuneSelf {
		return asciiWidth(byte(r))
	}

	// Surrogates (U+D800-U+DFFF) are invalid UTF-8.
	if r >= 0xD800 && r <= 0xDFFF {
		return 0
	}

	var buf [4]byte
	n := utf8.EncodeRune(buf[:], r)

	// Skip the grapheme iterator
	return graphemeWidth(buf[:n], options)
}

const _Default property = 0

// TruncateString truncates a string to the given maxWidth, and appends the
// given tail if the string is truncated.
//
// It ensures the visible width, including the width of the tail, is less than or
// equal to maxWidth.
//
// When [Options.ControlSequences] is true, ANSI escape sequences that appear
// after the truncation point are preserved in the output. This ensures that
// escape sequences such as SGR resets are not lost, preventing color bleed
// in terminal output.
func (options Options) TruncateString(s string, maxWidth int, tail string) string {
	maxWidthWithoutTail := maxWidth - options.String(tail)

	var pos, total int
	g := graphemes.FromString(s)
	g.AnsiEscapeSequences = options.ControlSequences

	for g.Next() {
		gw := graphemeWidth(g.Value(), options)
		if total+gw <= maxWidthWithoutTail {
			pos = g.End()
		}
		total += gw
		if total > maxWidth {
			if options.ControlSequences {
				// Build result with trailing ANSI escape sequences preserved
				var b strings.Builder
				b.Grow(len(s) + len(tail)) // at most original + tail
				b.WriteString(s[:pos])
				b.WriteString(tail)
				rem := graphemes.FromString(s[pos:])
				rem.AnsiEscapeSequences = true
				for rem.Next() {
					v := rem.Value()
					if len(v) > 0 && v[0] == 0x1B {
						b.WriteString(v)
					}
				}
				return b.String()
			}
			return s[:pos] + tail
		}
	}
	// No truncation
	return s
}

// TruncateString truncates a string to the given maxWidth, and appends the
// given tail if the string is truncated.
//
// It ensures the total width, including the width of the tail, is less than or
// equal to maxWidth.
func TruncateString(s string, maxWidth int, tail string) string {
	return DefaultOptions.TruncateString(s, maxWidth, tail)
}

// TruncateBytes truncates a []byte to the given maxWidth, and appends the
// given tail if the []byte is truncated.
//
// It ensures the visible width, including the width of the tail, is less than or
// equal to maxWidth.
//
// When [Options.ControlSequences] is true, ANSI escape sequences that appear
// after the truncation point are preserved in the output. This ensures that
// escape sequences such as SGR resets are not lost, preventing color bleed
// in terminal output.
func (options Options) TruncateBytes(s []byte, maxWidth int, tail []byte) []byte {
	maxWidthWithoutTail := maxWidth - options.Bytes(tail)

	var pos, total int
	g := graphemes.FromBytes(s)
	g.AnsiEscapeSequences = options.ControlSequences

	for g.Next() {
		gw := graphemeWidth(g.Value(), options)
		if total+gw <= maxWidthWithoutTail {
			pos = g.End()
		}
		total += gw
		if total > maxWidth {
			if options.ControlSequences {
				// Build result with trailing ANSI escape sequences preserved
				result := make([]byte, 0, len(s)+len(tail)) // at most original + tail
				result = append(result, s[:pos]...)
				result = append(result, tail...)
				rem := graphemes.FromBytes(s[pos:])
				rem.AnsiEscapeSequences = true
				for rem.Next() {
					v := rem.Value()
					if len(v) > 0 && v[0] == 0x1B {
						result = append(result, v...)
					}
				}
				return result
			}
			result := make([]byte, 0, pos+len(tail))
			result = append(result, s[:pos]...)
			result = append(result, tail...)
			return result
		}
	}
	// No truncation
	return s
}

// TruncateBytes truncates a []byte to the given maxWidth, and appends the
// given tail if the []byte is truncated.
//
// It ensures the total width, including the width of the tail, is less than or
// equal to maxWidth.
func TruncateBytes(s []byte, maxWidth int, tail []byte) []byte {
	return DefaultOptions.TruncateBytes(s, maxWidth, tail)
}

// graphemeWidth returns the display width of a grapheme cluster.
// The passed string must be a single grapheme cluster.
func graphemeWidth[T ~string | []byte](s T, options Options) int {
	// Optimization: no need to look up properties
	switch len(s) {
	case 0:
		return 0
	case 1:
		return asciiWidth(s[0])
	}

	// Multi-byte grapheme clusters led by a C0 control (0x00-0x1F)
	if s[0] <= 0x1F {
		return 0
	}

	p, sz := lookup(s)
	prop := property(p)

	// Variation Selector 16 (VS16) requests emoji presentation
	if prop != _Wide && sz > 0 && len(s) >= sz+3 {
		vs := s[sz : sz+3]
		if isVS16(vs) {
			prop = _Wide
		}
		// VS15 (0x8E) requests text presentation but does not affect width,
		// in my reading of Unicode TR51. Falls through to return the base
		// character's property.
	}

	if options.EastAsianWidth && prop == _East_Asian_Ambiguous {
		prop = _Wide
	}

	if prop > upperBound {
		prop = _Default
	}

	return propertyWidths[prop]
}

func asciiWidth(b byte) int {
	if b <= 0x1F || b == 0x7F {
		return 0
	}
	return 1
}

// printableASCIILength returns the length of consecutive printable ASCII bytes
// starting at the beginning of s.
func printableASCIILength[T string | []byte](s T) int {
	i := 0
	for ; i < len(s); i++ {
		b := s[i]
		// Printable ASCII is 0x20-0x7E (space through tilde)
		if b < 0x20 || b > 0x7E {
			break
		}
	}

	// If the next byte is non-ASCII (>= 0x80), back off by 1. The grapheme
	// parser may group the last ASCII byte with subsequent non-ASCII bytes,
	// such as combining marks.
	if i > 0 && i < len(s) && s[i] >= 0x80 {
		i--
	}

	return i
}

// isVS16 checks if the slice matches VS16 (U+FE0F) UTF-8 encoding
// (EF B8 8F). It assumes len(s) >= 3.
func isVS16[T ~string | []byte](s T) bool {
	return s[0] == 0xEF && s[1] == 0xB8 && s[2] == 0x8F
}

// propertyWidths is a jump table of sorts, instead of a switch
var propertyWidths = [4]int{
	_Default:              1,
	_Zero_Width:           0,
	_Wide:                 2,
	_East_Asian_Ambiguous: 1,
}

const upperBound = property(len(propertyWidths) - 1)