namespace Sandbox.Rendering; using System.Buffers; using System.Runtime.InteropServices; /// /// This object renders every sprite registered to it in a single draw call. It takes care of sorting, sampling, and the whole pipeline regarding sprites. /// The SceneSpriteSystem is responsible for pushing sprites into this object depending on its properties. /// internal sealed class SpriteBatchSceneObject : SceneCustomObject { internal readonly record struct SpriteGroup( SpriteData[] SharedSprites, int Offset, int Count, BBox Bounds ); public bool Sorted { get; set; } = false; public bool Filtered { get; set; } = false; public bool Additive { get; set; } = false; public bool Opaque { get; set; } = false; internal Dictionary Components = new(); private static ComputeShader SpriteComputeShader = new( "sprite/sprite_cs" ); private static ComputeShader SortComputeShader = new( "sort_cs" ); private readonly RenderAttributes SortComputeShaderAttributes = new(); private readonly GpuBuffer SpriteAtomicCounter; private static Material SpriteMaterial = Material.FromShader( "sprite/sprite_ps.shader" ); internal Dictionary SpriteGroups = []; internal readonly SamplerState sampler = new() { AddressModeU = TextureAddressMode.Clamp, AddressModeV = TextureAddressMode.Clamp, }; /// /// Flags for sprite rendering, must match SpriteFlags in sprite_ps.shader /// [Flags] public enum SpriteFlags { None = 0x0, CastShadows = 0x1, FlipX = 0x2, FlipY = 0x4, SnapToFrame = 0x8 } // GPU Resident representation of a sprite [StructLayout( LayoutKind.Sequential, Pack = 16 )] public struct SpriteData { public Vector3 Position; public Vector3 Rotation; public Vector2 Scale; public uint TintColor; // Packed RGBA8 public uint OverlayColor; // Packed RGBA8 public int TextureHandle; public int RenderFlags; public uint BillboardMode; public uint FogStrengthCutout; // Lower 16 bits: fog, upper 16 bits: alpha cutout public uint Lighting; public float DepthFeather; public int SamplerIndex; public int Splots = 0; public int Sequence = 0; public float SequenceTime = 0; public float RotationOffset = -1.0f; public Vector4 MotionBlur; public Vector3 Velocity = Vector3.Zero; public Vector4 BlendSheetUV; public Vector2 Offset; public SpriteData() { } // Helper method to pack Color to RGBA8 internal static uint PackColor( Color color ) { byte r = (byte)(color.r * 255f); byte g = (byte)(color.g * 255f); byte b = (byte)(color.b * 255f); byte a = (byte)(color.a * 255f); return (uint)(r | (g << 8) | (b << 16) | (a << 24)); } // Pack fog strength and alpha cutout into a single uint internal static uint PackFogAndAlphaCutout( float fogStrength, float alphaCutout ) { ushort fogPacked = (ushort)(fogStrength.Clamp( 0f, 1f ) * 65535f); ushort alphaPacked = (ushort)(alphaCutout.Clamp( 0f, 1f ) * 65535f); return (uint)(fogPacked | (alphaPacked << 16)); } } struct SpriteVertex { public Vector4 position; public Vector4 normal; public Vector2 uv; public SpriteVertex( Vector3 pos, Vector3 norm, Vector2 inUv ) { position = new Vector4( pos, 1.0f ); normal = new Vector4( norm, 0.0f ); uv = inUv; } } const int DefaultBufferSize = 16; int CurrentBufferSize = DefaultBufferSize; int _splotCount = 0; int SplotCount { get { if ( _splotCount != 0 ) { return _splotCount; } // Use precomputed splot counts to avoid iteration int splotCount = 0; foreach ( var group in SpriteGroups ) { if ( _precomputedSplotCounts.TryGetValue( group.Key, out int precomputedCount ) ) { splotCount += precomputedCount; } else { // Fallback var spriteGroup = group.Value; for ( int i = 0; i < spriteGroup.Count; i++ ) { int index = spriteGroup.Offset + i; int leading = spriteGroup.SharedSprites[index].MotionBlur.x > 1 ? 2 : 1; splotCount += spriteGroup.SharedSprites[index].Splots * leading; } } } _splotCount = splotCount; return _splotCount; } } int SpriteCount { get { int sum = Components.Count; foreach ( var group in SpriteGroups ) { sum += group.Value.Count; } return sum; } } bool GPUUploadQueued = false; // Set by UploadOnHost after building the staging buffer, // consumed by RenderSceneObject to do the actual GPU upload within a valid render context. bool _gpuUploadPending = false; int _pendingComponentCount = 0; int _pendingSpriteCount = 0; int _pendingSplotCount = 0; GpuBuffer SpriteBuffer; GpuBuffer SpriteBufferOut; GpuBuffer VertexBuffer; GpuBuffer IndexBuffer; GpuBuffer GPUSortingBuffer; GpuBuffer GPUDistanceBuffer; SpriteData[] SpriteDataBuffer = null!; bool SpriteDataBufferRented = false; public SpriteBatchSceneObject( Scene scene ) : base( scene.SceneWorld ) { InitializeSpriteMesh(); // GPU buffers SpriteBuffer = new( CurrentBufferSize ); SpriteBufferOut = new( CurrentBufferSize ); GPUSortingBuffer = new( CurrentBufferSize ); GPUDistanceBuffer = new( CurrentBufferSize ); SpriteAtomicCounter = new( 1 ); } /// /// Create the initialize sprite mesh that will be instanced /// private void InitializeSpriteMesh() { // Vertex pulling buffer const float spriteSize = 10f; SpriteVertex[] vertices = [ new ( new ( -spriteSize, -spriteSize, 0 ), Vector3.Forward, new ( 0, 0 ) ), new ( new ( spriteSize, -spriteSize, 0 ), Vector3.Forward, new ( 1, 0 ) ), new ( new ( spriteSize, spriteSize, 0 ), Vector3.Forward, new ( 1, 1 ) ), new ( new ( -spriteSize, spriteSize, 0 ), Vector3.Forward, new ( 0, 1 ) ), ]; VertexBuffer = new( 4 ); VertexBuffer.SetData( vertices.AsSpan() ); // Index buffer int[] indices = { 0, 1, 2, 0, 2, 3 }; IndexBuffer = new( 6, GpuBuffer.UsageFlags.Index ); IndexBuffer.SetData( indices ); } ~SpriteBatchSceneObject() { if ( SpriteDataBufferRented ) { ArrayPool.Shared.Return( SpriteDataBuffer, clearArray: false ); } } /// /// Resizes GPU buffers to the nearest power of 2 /// public void ResizeBuffers() { int allocationSize = SplotCount + SpriteCount; if ( allocationSize <= CurrentBufferSize ) { return; } ResizeBuffers( allocationSize ); } /// /// Resizes GPU buffers to accommodate the specified allocation size /// private void ResizeBuffers( int allocationSize ) { CurrentBufferSize = (int)System.Numerics.BitOperations.RoundUpToPowerOf2( (uint)allocationSize ); SpriteBuffer?.Dispose(); GPUSortingBuffer?.Dispose(); GPUDistanceBuffer?.Dispose(); SpriteBuffer = new( CurrentBufferSize ); SpriteBufferOut = new( CurrentBufferSize ); GPUSortingBuffer = new( CurrentBufferSize ); GPUDistanceBuffer = new( CurrentBufferSize ); } private readonly Dictionary _precomputedSplotCounts = []; // Pre-allocated buffer to avoid GC allocations in hot path private SpriteRenderer[] _componentBuffer = new SpriteRenderer[16]; private readonly object _boundsLock = new(); public void RegisterSprite( Guid ownerId, SpriteData[] sharedSprites, int offset, int count, int splotCount, BBox bounds ) { SpriteGroups[ownerId] = new( sharedSprites, offset, count, bounds ); _precomputedSplotCounts[ownerId] = splotCount; OnChanged(); } public void RegisterSprite( Guid id, SpriteRenderer component ) { Components[id] = component; OnChanged(); } public void UnregisterSprite( Guid id ) { Components.Remove( id ); OnChanged(); } public void UnregisterSpriteGroup( Guid ownerId ) { if ( SpriteGroups.Remove( ownerId ) ) { // Clean up precomputed splot count _precomputedSplotCounts.Remove( ownerId ); OnChanged(); } } public void UpdateSprite( Guid id, SpriteRenderer component ) { if ( Components.ContainsKey( id ) ) { Components[id] = component; OnChanged(); } } public bool ContainsSprite( Guid id ) { return Components.ContainsKey( id ); } public void OnChanged() { int requiredSize = SplotCount + SpriteCount; // Clear cached splot count to force recalculation _splotCount = 0; // Only resize if we actually need more space if ( requiredSize > CurrentBufferSize ) { ResizeBuffers( requiredSize ); } GPUUploadQueued = true; } /// /// Copy host buffers onto GPU /// public void UploadOnHost() { if ( !GPUUploadQueued && !Sorted ) { return; } int spriteCount = SpriteCount; int componentCount = Components.Count; // Staging buffer only needs to hold component sprites — particle groups // are uploaded directly from SharedSprites in RenderSceneObject. if ( componentCount > 0 && (SpriteDataBuffer == null || SpriteDataBuffer.Length < componentCount) ) { if ( SpriteDataBufferRented ) { ArrayPool.Shared.Return( SpriteDataBuffer, clearArray: false ); } SpriteDataBuffer = ArrayPool.Shared.Rent( componentCount ); SpriteDataBufferRented = true; } var boundsMin = new Vector3( float.MaxValue, float.MaxValue, float.MaxValue ); var boundsMax = new Vector3( float.MinValue, float.MinValue, float.MinValue ); // Upload sprites if ( componentCount > 0 ) { // Use pre-allocated buffer to avoid GC allocation if ( _componentBuffer.Length < componentCount ) { _componentBuffer = new SpriteRenderer[componentCount * 2]; } int index = 0; foreach ( var component in Components.Values ) { _componentBuffer[index++] = component; } object boundsLock = _boundsLock; Parallel.For<(Vector3 mins, Vector3 maxs)>( 0, componentCount, () => (new Vector3( float.MaxValue, float.MaxValue, float.MaxValue ), new Vector3( float.MinValue, float.MinValue, float.MinValue )), ( i, _, local ) => { var c = _componentBuffer[i]; var transform = c.WorldTransform; var spriteSize = c.Size; var rotation = c.WorldRotation.Angles().AsVector3(); if ( c.Billboard == SpriteRenderer.BillboardMode.Always || c.Billboard == SpriteRenderer.BillboardMode.YOnly ) { // We only care about roll in this case rotation.x = 0; rotation.y = 0; } spriteSize = spriteSize.Abs(); // Adjust for aspect ratio var aspectRatio = (c.Texture?.Width ?? 1) / (float)(c.Texture?.Height ?? 1); var size = spriteSize / 2f; var pos = transform.Position; var scale = new Vector3( transform.Scale.x * size.x, transform.Scale.y, transform.Scale.z * size.y ); if ( aspectRatio < 1f ) scale *= new Vector3( aspectRatio, 1f, 1f ); else scale *= new Vector3( 1f, 1f, 1f / aspectRatio ); pos = pos.RotateAround( transform.Position, transform.Rotation ); transform = transform.WithScale( scale ).WithPosition( pos ); var renderFlags = SpriteFlags.None; if ( c.FlipHorizontal ) renderFlags |= SpriteFlags.FlipX; if ( c.FlipVertical ) renderFlags |= SpriteFlags.FlipY; var rgbe = c.Color.ToRgbe(); var alpha = (byte)(c.Color.a.Clamp( 0.0f, 1.0f ) * 255.0f); var tintColor = new Color32( rgbe.r, rgbe.g, rgbe.b, alpha ); var overlayRgbe = c.OverlayColor.ToRgbe(); var overlayAlpha = (byte)(c.OverlayColor.a.Clamp( 0.0f, 1.0f ) * 255.0f); var overlayColor = new Color32( overlayRgbe.r, overlayRgbe.g, overlayRgbe.b, overlayAlpha ); int lightingFlag = c.Lighting ? 1 : 0; uint packedExponent = (uint)(((byte)lightingFlag) | rgbe.a << 16); uint packedFogAndAlpha = SpriteData.PackFogAndAlphaCutout( c.FogStrength, c.AlphaCutoff ); var spritePos = transform.Position; var spriteScale = new Vector2( transform.Scale.x, transform.Scale.z ); SpriteDataBuffer[i] = new SpriteData { Position = spritePos, Rotation = new( rotation.x, rotation.y, rotation.z ), Scale = spriteScale, TextureHandle = c.Texture is null ? Texture.Invalid.Index : c.Texture.Index, TintColor = tintColor.RawInt, OverlayColor = overlayColor.RawInt, RenderFlags = (int)renderFlags, BillboardMode = (uint)c.Billboard, FogStrengthCutout = packedFogAndAlpha, Lighting = packedExponent, DepthFeather = c.DepthFeather, SamplerIndex = SamplerState.GetBindlessIndex( sampler with { Filter = c.TextureFilter } ), Offset = c.Pivot }; var pivot = c.Pivot; float halfSize = MathF.Max( MathF.Max( pivot.x, 1f - pivot.x ) * 2f * spriteScale.x, MathF.Max( pivot.y, 1f - pivot.y ) * 2f * spriteScale.y ); var expand = new Vector3( halfSize, halfSize, halfSize ); return (Vector3.Min( local.mins, spritePos - expand ), Vector3.Max( local.maxs, spritePos + expand )); }, local => { lock ( boundsLock ) { boundsMin = Vector3.Min( boundsMin, local.mins ); boundsMax = Vector3.Max( boundsMax, local.maxs ); } } ); } foreach ( var spriteGroup in SpriteGroups.Values ) { boundsMin = Vector3.Min( boundsMin, spriteGroup.Bounds.Mins ); boundsMax = Vector3.Max( boundsMax, spriteGroup.Bounds.Maxs ); } // Use a degenerate zero-size bounds for empty batches so they can be frustum-culled. Bounds = spriteCount > 0 ? new BBox( boundsMin, boundsMax ) : default; // Defer the actual GPU upload to RenderSceneObject where we have a valid render context, // avoiding expensive per-call CRenderContextPtr creation. _pendingComponentCount = componentCount; _pendingSpriteCount = spriteCount; _pendingSplotCount = SplotCount; _gpuUploadPending = true; GPUUploadQueued = false; } private const int GroupSize = 256; private const int MaxDimGroups = 1024; private const int MaxDimThreads = GroupSize * MaxDimGroups; private void PreSort() { if ( _pendingSpriteCount < 2 ) return; // First we clear the buffers to prepare for sorting SortComputeShaderAttributes.SetCombo( "D_CLEAR", 1 ); SortComputeShaderAttributes.Set( "SortBuffer", GPUSortingBuffer ); SortComputeShaderAttributes.Set( "DistanceBuffer", GPUDistanceBuffer ); SortComputeShaderAttributes.Set( "Count", CurrentBufferSize ); SortComputeShader.DispatchWithAttributes( SortComputeShaderAttributes, CurrentBufferSize, 1, 1 ); Graphics.ResourceBarrierTransition( GPUSortingBuffer, ResourceState.UnorderedAccess, ResourceState.UnorderedAccess ); Graphics.ResourceBarrierTransition( GPUDistanceBuffer, ResourceState.UnorderedAccess, ResourceState.UnorderedAccess ); } /// /// Performs a GPU bitonic sort /// private void Sort() { // Distance buffer is already filled by GPU compute shader, no need to update from CPU Graphics.ResourceBarrierTransition( GPUDistanceBuffer, Sandbox.Rendering.ResourceState.Common ); // Sort SortComputeShaderAttributes.SetCombo( "D_CLEAR", 0 ); var x = Math.Min( CurrentBufferSize, MaxDimThreads ); var y = (CurrentBufferSize + MaxDimThreads - 1) / MaxDimThreads; var z = 1; for ( var dim = 2; dim <= CurrentBufferSize; dim <<= 1 ) { SortComputeShaderAttributes.Set( "Dim", dim ); for ( var block = dim >> 1; block > 0; block >>= 1 ) { SortComputeShaderAttributes.Set( "Block", block ); SortComputeShader.DispatchWithAttributes( SortComputeShaderAttributes, x, y, z ); // Make sure sort buffer is ready to use Graphics.ResourceBarrierTransition( GPUSortingBuffer, ResourceState.UnorderedAccess, ResourceState.UnorderedAccess ); Graphics.ResourceBarrierTransition( GPUDistanceBuffer, ResourceState.UnorderedAccess, ResourceState.UnorderedAccess ); } } } /// /// Rendering logic of the sprites /// public override void RenderSceneObject() { base.RenderSceneObject(); int spriteCount = _pendingSpriteCount; int splotCount = _pendingSplotCount; if ( spriteCount == 0 ) { return; } // Upload sprite data to GPU once per frame, reusing the view's render context. // All SetData calls here go through the existing renderContext->SetGPUBufferData() // directly, avoiding expensive CRenderContextPtr creation per call. if ( _gpuUploadPending ) { if ( _pendingComponentCount > 0 ) { SpriteBuffer.SetData( SpriteDataBuffer.AsSpan( 0, _pendingComponentCount ) ); } int currentOffset = _pendingComponentCount; foreach ( var group in SpriteGroups.Values ) { SpriteBuffer.SetData( group.SharedSprites.AsSpan( group.Offset, group.Count ), currentOffset ); currentOffset += group.Count; } _gpuUploadPending = false; } if ( Sorted ) { PreSort(); } // Generate trails and UVs (this is mainly for particles) SpriteAtomicCounter.SetData( [0] ); // Reset atomic counter Graphics.ResourceBarrierTransition( SpriteAtomicCounter, ResourceState.Common ); Graphics.ResourceBarrierTransition( SpriteBuffer, ResourceState.Common ); Graphics.ResourceBarrierTransition( SpriteBufferOut, ResourceState.Common ); Graphics.ResourceBarrierTransition( GPUDistanceBuffer, ResourceState.Common ); var attributes = RenderAttributes.Pool.Get(); attributes.Set( "Sprites", SpriteBuffer ); attributes.Set( "SpriteBufferOut", SpriteBufferOut ); attributes.Set( "SpriteCount", spriteCount ); attributes.Set( "AtomicCounter", SpriteAtomicCounter ); // Sorting attributes.Set( "DistanceBuffer", GPUDistanceBuffer ); attributes.Set( "CameraPosition", Graphics.CameraPosition ); SpriteComputeShader.DispatchWithAttributes( attributes, spriteCount, 1, 1 ); RenderAttributes.Pool.Return( attributes ); // Barried for the new sprites generated Graphics.ResourceBarrierTransition( SpriteAtomicCounter, ResourceState.Common ); Graphics.ResourceBarrierTransition( SpriteBufferOut, ResourceState.Common ); Graphics.Attributes.SetCombo( "D_BLEND", Additive ? 1 : 0 ); Graphics.Attributes.SetCombo( "D_OPAQUE", Opaque ? 1 : 0 ); // Sort if ( Sorted ) { Sort(); } // Draw the sprites Graphics.Attributes.Set( "IsSorted", Sorted ? 1 : 0 ); Graphics.Attributes.Set( "SpriteCount", spriteCount + splotCount ); Graphics.Attributes.Set( "Filtered", Filtered ); Graphics.Attributes.Set( "Sprites", SpriteBufferOut ); Graphics.Attributes.Set( "SortLUT", GPUSortingBuffer ); // Always bind even if not used // Vertex Pulling Graphics.Attributes.Set( "Vertices", VertexBuffer ); Graphics.Attributes.Set( "g_bNonDirectionalDiffuseLighting", true ); Graphics.DrawIndexedInstanced( IndexBuffer, SpriteMaterial, spriteCount + splotCount ); } }