Files
WoWee/src/rendering/m2_renderer.cpp
Pavel Okhlopkov b79d9b8fea feat(rendering): implement spell visual effects with bone-tracked ribbons and particles
Add complete spell visual pipeline resolving the DBC chain
(Spell → SpellVisual → SpellVisualKit → SpellVisualEffectName → M2)
with precast/cast/impact phases, bone-attached positioning, and
automatic dual-hand mirroring.

Ribbon rendering fixes:
- Parse visibility track as uint8 (was read as float, suppressing
  all ribbon edges due to ~1.4e-45 failing the >0.5 check)
- Filter garbage emitters with bone=UINT_MAX unconditionally
- Guard against NaN spine positions from corrupt bone data
- Resolve ribbon textures via direct index, not textureLookup table
- Fall back to bone 0 when ribbon bone index is out of range

Particle rendering fixes:
- Reduce spell particle scale from 5x to 1.5x (was oversized)
- Exempt spell effect instances from position-based deduplication

Spell handler integration:
- Trigger precast visuals on SMSG_SPELL_START with server castTimeMs
- Trigger cast/impact visuals on SMSG_SPELL_GO
- Cancel precast visuals on cast interrupt/failure/movement

M2 classifier expansion:
- Add AmbientEmitterType enum for sound system integration
- Add 20+ foliage tokens, 4 spell effect tokens, isSmallFoliage flag
- Add markModelAsSpellEffect() to override disableAnimation

DBC layouts:
- Add SpellVisualID field to Spell.dbc for all expansion configs

Signed-off-by: Pavel Okhlopkov <pavel.okhlopkov@flant.com>
2026-04-07 11:27:59 +03:00

1727 lines
82 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#include "rendering/m2_renderer.hpp"
#include "rendering/m2_renderer_internal.h"
#include "rendering/m2_model_classifier.hpp"
#include "rendering/vk_context.hpp"
#include "rendering/vk_buffer.hpp"
#include "rendering/vk_texture.hpp"
#include "rendering/vk_pipeline.hpp"
#include "rendering/vk_shader.hpp"
#include "rendering/vk_utils.hpp"
#include "rendering/vk_frame_data.hpp"
#include "rendering/camera.hpp"
#include "rendering/frustum.hpp"
#include "pipeline/asset_manager.hpp"
#include "pipeline/blp_loader.hpp"
#include "core/logger.hpp"
#include "core/profiler.hpp"
#include <chrono>
#include <cctype>
#include <glm/gtc/matrix_transform.hpp>
#include <glm/gtc/type_ptr.hpp>
#include <glm/gtx/quaternion.hpp>
#include <unordered_set>
#include <functional>
#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <random>
#include <limits>
#include <future>
#include <thread>
namespace wowee {
namespace rendering {
namespace {
bool envFlagEnabled(const char* key, bool defaultValue) {
const char* raw = std::getenv(key);
if (!raw || !*raw) return defaultValue;
std::string v(raw);
std::transform(v.begin(), v.end(), v.begin(), [](unsigned char c) {
return static_cast<char>(std::tolower(c));
});
return !(v == "0" || v == "false" || v == "off" || v == "no");
}
} // namespace
void M2Instance::updateModelMatrix() {
modelMatrix = glm::mat4(1.0f);
modelMatrix = glm::translate(modelMatrix, position);
// Rotation in radians
modelMatrix = glm::rotate(modelMatrix, rotation.x, glm::vec3(1.0f, 0.0f, 0.0f));
modelMatrix = glm::rotate(modelMatrix, rotation.y, glm::vec3(0.0f, 1.0f, 0.0f));
modelMatrix = glm::rotate(modelMatrix, rotation.z, glm::vec3(0.0f, 0.0f, 1.0f));
modelMatrix = glm::scale(modelMatrix, glm::vec3(scale));
invModelMatrix = glm::inverse(modelMatrix);
}
M2Renderer::M2Renderer() {
}
M2Renderer::~M2Renderer() {
shutdown();
}
bool M2Renderer::initialize(VkContext* ctx, VkDescriptorSetLayout perFrameLayout,
pipeline::AssetManager* assets) {
if (initialized_) { assetManager = assets; return true; }
vkCtx_ = ctx;
assetManager = assets;
const unsigned hc = std::thread::hardware_concurrency();
const size_t availableCores = (hc > 1u) ? static_cast<size_t>(hc - 1u) : 1ull;
// Keep headroom for other frame tasks: M2 gets about half of non-main cores by default.
const size_t defaultAnimThreads = std::max<size_t>(1, availableCores / 2);
numAnimThreads_ = static_cast<uint32_t>(std::max<size_t>(
1, envSizeOrDefault("WOWEE_M2_ANIM_THREADS", defaultAnimThreads)));
LOG_INFO("Initializing M2 renderer (Vulkan, ", numAnimThreads_, " anim threads)...");
VkDevice device = vkCtx_->getDevice();
// --- Descriptor set layouts ---
// Material set layout (set 1): binding 0 = sampler2D, binding 2 = M2Material UBO
// (M2Params moved to push constants alongside model matrix)
{
VkDescriptorSetLayoutBinding bindings[2] = {};
bindings[0].binding = 0;
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[0].descriptorCount = 1;
bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[1].binding = 2;
bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[1].descriptorCount = 1;
bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
VkDescriptorSetLayoutCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
ci.bindingCount = 2;
ci.pBindings = bindings;
vkCreateDescriptorSetLayout(device, &ci, nullptr, &materialSetLayout_);
}
// Bone set layout (set 2): binding 0 = STORAGE_BUFFER (bone matrices)
{
VkDescriptorSetLayoutBinding binding{};
binding.binding = 0;
binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
binding.descriptorCount = 1;
binding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
VkDescriptorSetLayoutCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
ci.bindingCount = 1;
ci.pBindings = &binding;
vkCreateDescriptorSetLayout(device, &ci, nullptr, &boneSetLayout_);
}
// Instance data set layout (set 3): binding 0 = STORAGE_BUFFER (per-instance data)
{
VkDescriptorSetLayoutBinding binding{};
binding.binding = 0;
binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
binding.descriptorCount = 1;
binding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
VkDescriptorSetLayoutCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
ci.bindingCount = 1;
ci.pBindings = &binding;
vkCreateDescriptorSetLayout(device, &ci, nullptr, &instanceSetLayout_);
}
// Particle texture set layout (set 1 for particles): binding 0 = sampler2D
{
VkDescriptorSetLayoutBinding binding{};
binding.binding = 0;
binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
binding.descriptorCount = 1;
binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
VkDescriptorSetLayoutCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
ci.bindingCount = 1;
ci.pBindings = &binding;
vkCreateDescriptorSetLayout(device, &ci, nullptr, &particleTexLayout_);
}
// --- Descriptor pools ---
{
VkDescriptorPoolSize sizes[] = {
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, MAX_MATERIAL_SETS + 256},
{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, MAX_MATERIAL_SETS + 256},
};
VkDescriptorPoolCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
ci.maxSets = MAX_MATERIAL_SETS + 256;
ci.poolSizeCount = 2;
ci.pPoolSizes = sizes;
ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
vkCreateDescriptorPool(device, &ci, nullptr, &materialDescPool_);
}
{
VkDescriptorPoolSize sizes[] = {
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BONE_SETS},
};
VkDescriptorPoolCreateInfo ci{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
ci.maxSets = MAX_BONE_SETS;
ci.poolSizeCount = 1;
ci.pPoolSizes = sizes;
ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
vkCreateDescriptorPool(device, &ci, nullptr, &boneDescPool_);
}
// Create a small identity-bone SSBO + descriptor set so that non-animated
// draws always have a valid set 2 bound. The Intel ANV driver segfaults
// on vkCmdDrawIndexed when a declared descriptor set slot is unbound.
{
// Single identity matrix (bone 0 = identity)
glm::mat4 identity(1.0f);
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
bci.size = sizeof(glm::mat4);
bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
VmaAllocationCreateInfo aci{};
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo allocInfo{};
vmaCreateBuffer(ctx->getAllocator(), &bci, &aci,
&dummyBoneBuffer_, &dummyBoneAlloc_, &allocInfo);
if (allocInfo.pMappedData) {
memcpy(allocInfo.pMappedData, &identity, sizeof(identity));
}
dummyBoneSet_ = allocateBoneSet();
if (dummyBoneSet_) {
VkDescriptorBufferInfo bufInfo{};
bufInfo.buffer = dummyBoneBuffer_;
bufInfo.offset = 0;
bufInfo.range = sizeof(glm::mat4);
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
write.dstSet = dummyBoneSet_;
write.dstBinding = 0;
write.descriptorCount = 1;
write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
write.pBufferInfo = &bufInfo;
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
}
}
// Mega bone SSBO — consolidates all animated instance bones into one buffer per frame.
// Slot 0 = identity matrix (for non-animated instances), slots 1..N = animated instances.
{
const VkDeviceSize megaSize = MEGA_BONE_MAX_INSTANCES * MAX_BONES_PER_INSTANCE * sizeof(glm::mat4);
glm::mat4 identity(1.0f);
for (int i = 0; i < 2; i++) {
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
bci.size = megaSize;
bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
VmaAllocationCreateInfo aci{};
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo allocInfo{};
vmaCreateBuffer(ctx->getAllocator(), &bci, &aci,
&megaBoneBuffer_[i], &megaBoneAlloc_[i], &allocInfo);
megaBoneMapped_[i] = allocInfo.pMappedData;
// Slot 0: identity matrix (for non-animated instances)
if (megaBoneMapped_[i]) {
memcpy(megaBoneMapped_[i], &identity, sizeof(identity));
}
megaBoneSet_[i] = allocateBoneSet();
if (megaBoneSet_[i]) {
VkDescriptorBufferInfo bufInfo{};
bufInfo.buffer = megaBoneBuffer_[i];
bufInfo.offset = 0;
bufInfo.range = megaSize;
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
write.dstSet = megaBoneSet_[i];
write.dstBinding = 0;
write.descriptorCount = 1;
write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
write.pBufferInfo = &bufInfo;
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
}
}
}
// Instance data SSBO — per-frame buffer holding per-instance transforms, fade, bones.
// Shader reads instanceData[push.instanceDataOffset + gl_InstanceIndex].
{
static_assert(sizeof(M2InstanceGPU) == 96, "M2InstanceGPU must be 96 bytes (std430)");
const VkDeviceSize instBufSize = MAX_INSTANCE_DATA * sizeof(M2InstanceGPU);
// Descriptor pool for 2 sets (double-buffered)
VkDescriptorPoolSize poolSize{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2};
VkDescriptorPoolCreateInfo poolCi{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
poolCi.maxSets = 2;
poolCi.poolSizeCount = 1;
poolCi.pPoolSizes = &poolSize;
vkCreateDescriptorPool(device, &poolCi, nullptr, &instanceDescPool_);
for (int i = 0; i < 2; i++) {
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
bci.size = instBufSize;
bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
VmaAllocationCreateInfo aci{};
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo allocInfo{};
vmaCreateBuffer(ctx->getAllocator(), &bci, &aci,
&instanceBuffer_[i], &instanceAlloc_[i], &allocInfo);
instanceMapped_[i] = allocInfo.pMappedData;
VkDescriptorSetAllocateInfo setAi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
setAi.descriptorPool = instanceDescPool_;
setAi.descriptorSetCount = 1;
setAi.pSetLayouts = &instanceSetLayout_;
vkAllocateDescriptorSets(device, &setAi, &instanceSet_[i]);
VkDescriptorBufferInfo bufInfo{};
bufInfo.buffer = instanceBuffer_[i];
bufInfo.offset = 0;
bufInfo.range = instBufSize;
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
write.dstSet = instanceSet_[i];
write.dstBinding = 0;
write.descriptorCount = 1;
write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
write.pBufferInfo = &bufInfo;
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
}
}
// GPU frustum culling — compute pipeline, buffers, descriptors.
// Compute shader tests each instance bounding sphere against 6 frustum planes + distance.
// Output: uint visibility[] read back by CPU to skip culled instances in sortedVisible_ build.
{
static_assert(sizeof(CullInstanceGPU) == 32, "CullInstanceGPU must be 32 bytes (std430)");
static_assert(sizeof(CullUniformsGPU) == 272, "CullUniformsGPU must be 272 bytes (std140)");
// Descriptor set layout: binding 0 = UBO (frustum+camera), 1 = SSBO (input), 2 = SSBO (output)
VkDescriptorSetLayoutBinding bindings[3] = {};
bindings[0].binding = 0;
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[0].descriptorCount = 1;
bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
bindings[1].binding = 1;
bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[1].descriptorCount = 1;
bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
bindings[2].binding = 2;
bindings[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[2].descriptorCount = 1;
bindings[2].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
VkDescriptorSetLayoutCreateInfo layoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
layoutCi.bindingCount = 3;
layoutCi.pBindings = bindings;
vkCreateDescriptorSetLayout(device, &layoutCi, nullptr, &cullSetLayout_);
// Pipeline layout (no push constants — everything via UBO)
VkPipelineLayoutCreateInfo plCi{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
plCi.setLayoutCount = 1;
plCi.pSetLayouts = &cullSetLayout_;
vkCreatePipelineLayout(device, &plCi, nullptr, &cullPipelineLayout_);
// Load compute shader
rendering::VkShaderModule cullComp;
if (!cullComp.loadFromFile(device, "assets/shaders/m2_cull.comp.spv")) {
LOG_ERROR("M2Renderer: failed to load m2_cull.comp.spv — GPU culling disabled");
} else {
VkComputePipelineCreateInfo cpCi{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
cpCi.stage = cullComp.stageInfo(VK_SHADER_STAGE_COMPUTE_BIT);
cpCi.layout = cullPipelineLayout_;
if (vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &cpCi, nullptr, &cullPipeline_) != VK_SUCCESS) {
LOG_ERROR("M2Renderer: failed to create cull compute pipeline");
cullPipeline_ = VK_NULL_HANDLE;
}
cullComp.destroy();
}
// HiZ-aware cull pipeline (Phase 6.3 Option B)
// Uses set 0 (same as frustum-only) + set 1 (HiZ pyramid sampler from HiZSystem).
// The HiZ descriptor set layout is created lazily when hizSystem_ is set, but the
// pipeline layout and shader are created now if the shader is available.
rendering::VkShaderModule cullHiZComp;
if (cullHiZComp.loadFromFile(device, "assets/shaders/m2_cull_hiz.comp.spv")) {
// HiZ cull set 1 layout: single combined image sampler (the HiZ pyramid)
VkDescriptorSetLayoutBinding hizBinding{};
hizBinding.binding = 0;
hizBinding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
hizBinding.descriptorCount = 1;
hizBinding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
VkDescriptorSetLayout hizSamplerLayout = VK_NULL_HANDLE;
VkDescriptorSetLayoutCreateInfo hizLayoutCi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
hizLayoutCi.bindingCount = 1;
hizLayoutCi.pBindings = &hizBinding;
vkCreateDescriptorSetLayout(device, &hizLayoutCi, nullptr, &hizSamplerLayout);
VkDescriptorSetLayout hizSetLayouts[2] = {cullSetLayout_, hizSamplerLayout};
VkPipelineLayoutCreateInfo hizPlCi{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
hizPlCi.setLayoutCount = 2;
hizPlCi.pSetLayouts = hizSetLayouts;
vkCreatePipelineLayout(device, &hizPlCi, nullptr, &cullHiZPipelineLayout_);
VkComputePipelineCreateInfo hizCpCi{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
hizCpCi.stage = cullHiZComp.stageInfo(VK_SHADER_STAGE_COMPUTE_BIT);
hizCpCi.layout = cullHiZPipelineLayout_;
if (vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &hizCpCi, nullptr, &cullHiZPipeline_) != VK_SUCCESS) {
LOG_WARNING("M2Renderer: failed to create HiZ cull compute pipeline — HiZ disabled");
cullHiZPipeline_ = VK_NULL_HANDLE;
vkDestroyPipelineLayout(device, cullHiZPipelineLayout_, nullptr);
cullHiZPipelineLayout_ = VK_NULL_HANDLE;
} else {
LOG_INFO("M2Renderer: HiZ occlusion cull pipeline created");
}
// The hizSamplerLayout is now owned by the pipeline layout; we don't track it
// separately because the pipeline layout keeps a ref. But actually Vulkan
// requires us to keep it alive. Store it where HiZSystem will provide it.
// For now, we can destroy it since the pipeline layout was already created.
vkDestroyDescriptorSetLayout(device, hizSamplerLayout, nullptr);
cullHiZComp.destroy();
} else {
LOG_INFO("M2Renderer: m2_cull_hiz.comp.spv not found — HiZ occlusion culling not available");
}
// Descriptor pool: 2 sets × 3 descriptors each (1 UBO + 2 SSBO)
VkDescriptorPoolSize poolSizes[2] = {};
poolSizes[0] = {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2};
poolSizes[1] = {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 4}; // 2 input + 2 output
VkDescriptorPoolCreateInfo poolCi{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
poolCi.maxSets = 2;
poolCi.poolSizeCount = 2;
poolCi.pPoolSizes = poolSizes;
vkCreateDescriptorPool(device, &poolCi, nullptr, &cullDescPool_);
const VkDeviceSize uniformSize = sizeof(CullUniformsGPU);
const VkDeviceSize inputSize = MAX_CULL_INSTANCES * sizeof(CullInstanceGPU);
const VkDeviceSize outputSize = MAX_CULL_INSTANCES * sizeof(uint32_t);
for (int i = 0; i < 2; i++) {
// Uniform buffer (frustum planes + camera)
{
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
bci.size = uniformSize;
bci.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
VmaAllocationCreateInfo aci{};
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo ai{};
vmaCreateBuffer(ctx->getAllocator(), &bci, &aci,
&cullUniformBuffer_[i], &cullUniformAlloc_[i], &ai);
cullUniformMapped_[i] = ai.pMappedData;
}
// Input SSBO (per-instance cull data)
{
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
bci.size = inputSize;
bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
VmaAllocationCreateInfo aci{};
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo ai{};
vmaCreateBuffer(ctx->getAllocator(), &bci, &aci,
&cullInputBuffer_[i], &cullInputAlloc_[i], &ai);
cullInputMapped_[i] = ai.pMappedData;
}
// Output SSBO (visibility flags — GPU writes, CPU reads)
{
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
bci.size = outputSize;
bci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
VmaAllocationCreateInfo aci{};
aci.usage = VMA_MEMORY_USAGE_GPU_TO_CPU;
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo ai{};
vmaCreateBuffer(ctx->getAllocator(), &bci, &aci,
&cullOutputBuffer_[i], &cullOutputAlloc_[i], &ai);
cullOutputMapped_[i] = ai.pMappedData;
}
// Allocate and write descriptor set
VkDescriptorSetAllocateInfo setAi{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
setAi.descriptorPool = cullDescPool_;
setAi.descriptorSetCount = 1;
setAi.pSetLayouts = &cullSetLayout_;
vkAllocateDescriptorSets(device, &setAi, &cullSet_[i]);
VkDescriptorBufferInfo uboInfo{cullUniformBuffer_[i], 0, uniformSize};
VkDescriptorBufferInfo inputInfo{cullInputBuffer_[i], 0, inputSize};
VkDescriptorBufferInfo outputInfo{cullOutputBuffer_[i], 0, outputSize};
VkWriteDescriptorSet writes[3] = {};
writes[0] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
writes[0].dstSet = cullSet_[i];
writes[0].dstBinding = 0;
writes[0].descriptorCount = 1;
writes[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
writes[0].pBufferInfo = &uboInfo;
writes[1] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
writes[1].dstSet = cullSet_[i];
writes[1].dstBinding = 1;
writes[1].descriptorCount = 1;
writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
writes[1].pBufferInfo = &inputInfo;
writes[2] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
writes[2].dstSet = cullSet_[i];
writes[2].dstBinding = 2;
writes[2].descriptorCount = 1;
writes[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
writes[2].pBufferInfo = &outputInfo;
vkUpdateDescriptorSets(device, 3, writes, 0, nullptr);
}
}
// --- Pipeline layouts ---
// Main M2 pipeline layout: set 0 = perFrame, set 1 = material, set 2 = bones, set 3 = instances
// Push constant: int texCoordSet + int isFoliage + int instanceDataOffset (12 bytes)
{
VkDescriptorSetLayout setLayouts[] = {perFrameLayout, materialSetLayout_, boneSetLayout_, instanceSetLayout_};
VkPushConstantRange pushRange{};
pushRange.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
pushRange.offset = 0;
pushRange.size = 12; // int texCoordSet + int isFoliage + int instanceDataOffset
VkPipelineLayoutCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
ci.setLayoutCount = 4;
ci.pSetLayouts = setLayouts;
ci.pushConstantRangeCount = 1;
ci.pPushConstantRanges = &pushRange;
vkCreatePipelineLayout(device, &ci, nullptr, &pipelineLayout_);
}
// Particle pipeline layout: set 0 = perFrame, set 1 = particleTex
// Push constant: vec2 tileCount + int alphaKey (12 bytes)
{
VkDescriptorSetLayout setLayouts[] = {perFrameLayout, particleTexLayout_};
VkPushConstantRange pushRange{};
pushRange.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
pushRange.offset = 0;
pushRange.size = 12; // vec2 + int
VkPipelineLayoutCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
ci.setLayoutCount = 2;
ci.pSetLayouts = setLayouts;
ci.pushConstantRangeCount = 1;
ci.pPushConstantRanges = &pushRange;
vkCreatePipelineLayout(device, &ci, nullptr, &particlePipelineLayout_);
}
// Smoke pipeline layout: set 0 = perFrame
// Push constant: float screenHeight (4 bytes)
{
VkDescriptorSetLayout setLayouts[] = {perFrameLayout};
VkPushConstantRange pushRange{};
pushRange.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
pushRange.offset = 0;
pushRange.size = 4;
VkPipelineLayoutCreateInfo ci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
ci.setLayoutCount = 1;
ci.pSetLayouts = setLayouts;
ci.pushConstantRangeCount = 1;
ci.pPushConstantRanges = &pushRange;
vkCreatePipelineLayout(device, &ci, nullptr, &smokePipelineLayout_);
}
// --- Load shaders ---
rendering::VkShaderModule m2Vert, m2Frag;
rendering::VkShaderModule particleVert, particleFrag;
rendering::VkShaderModule smokeVert, smokeFrag;
(void)m2Vert.loadFromFile(device, "assets/shaders/m2.vert.spv");
(void)m2Frag.loadFromFile(device, "assets/shaders/m2.frag.spv");
(void)particleVert.loadFromFile(device, "assets/shaders/m2_particle.vert.spv");
(void)particleFrag.loadFromFile(device, "assets/shaders/m2_particle.frag.spv");
(void)smokeVert.loadFromFile(device, "assets/shaders/m2_smoke.vert.spv");
(void)smokeFrag.loadFromFile(device, "assets/shaders/m2_smoke.frag.spv");
if (!m2Vert.isValid() || !m2Frag.isValid()) {
LOG_ERROR("M2: Missing required shaders, cannot initialize");
return false;
}
VkRenderPass mainPass = vkCtx_->getImGuiRenderPass();
// --- Build M2 model pipelines ---
// Vertex input: 18 floats = 72 bytes stride
// loc 0: vec3 pos (0), loc 1: vec3 normal (12), loc 2: vec2 uv0 (24),
// loc 5: vec2 uv1 (32), loc 3: vec4 boneWeights (40), loc 4: vec4 boneIndices (56)
VkVertexInputBindingDescription m2Binding{};
m2Binding.binding = 0;
m2Binding.stride = 18 * sizeof(float);
m2Binding.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
std::vector<VkVertexInputAttributeDescription> m2Attrs = {
{0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0}, // position
{1, 0, VK_FORMAT_R32G32B32_SFLOAT, 3 * sizeof(float)}, // normal
{2, 0, VK_FORMAT_R32G32_SFLOAT, 6 * sizeof(float)}, // texCoord0
{5, 0, VK_FORMAT_R32G32_SFLOAT, 8 * sizeof(float)}, // texCoord1
{3, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 10 * sizeof(float)}, // boneWeights
{4, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 14 * sizeof(float)}, // boneIndices (float)
};
// Pipeline derivatives — opaque is the base, others derive from it for shared state optimization
auto buildM2Pipeline = [&](VkPipelineColorBlendAttachmentState blendState, bool depthWrite,
VkPipelineCreateFlags flags = 0, VkPipeline basePipeline = VK_NULL_HANDLE) -> VkPipeline {
return PipelineBuilder()
.setShaders(m2Vert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
m2Frag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
.setVertexInput({m2Binding}, m2Attrs)
.setTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST)
.setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
.setDepthTest(true, depthWrite, VK_COMPARE_OP_LESS_OR_EQUAL)
.setColorBlendAttachment(blendState)
.setMultisample(vkCtx_->getMsaaSamples())
.setLayout(pipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
.setFlags(flags)
.setBasePipeline(basePipeline)
.build(device, vkCtx_->getPipelineCache());
};
opaquePipeline_ = buildM2Pipeline(PipelineBuilder::blendDisabled(), true,
VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT);
alphaTestPipeline_ = buildM2Pipeline(PipelineBuilder::blendAlpha(), true,
VK_PIPELINE_CREATE_DERIVATIVE_BIT, opaquePipeline_);
alphaPipeline_ = buildM2Pipeline(PipelineBuilder::blendAlpha(), false,
VK_PIPELINE_CREATE_DERIVATIVE_BIT, opaquePipeline_);
additivePipeline_ = buildM2Pipeline(PipelineBuilder::blendAdditive(), false,
VK_PIPELINE_CREATE_DERIVATIVE_BIT, opaquePipeline_);
// --- Build particle pipelines ---
if (particleVert.isValid() && particleFrag.isValid()) {
VkVertexInputBindingDescription pBind{};
pBind.binding = 0;
pBind.stride = 9 * sizeof(float); // pos3 + color4 + size1 + tile1
pBind.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
std::vector<VkVertexInputAttributeDescription> pAttrs = {
{0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0}, // position
{1, 0, VK_FORMAT_R32G32B32A32_SFLOAT, 3 * sizeof(float)}, // color
{2, 0, VK_FORMAT_R32_SFLOAT, 7 * sizeof(float)}, // size
{3, 0, VK_FORMAT_R32_SFLOAT, 8 * sizeof(float)}, // tile
};
auto buildParticlePipeline = [&](VkPipelineColorBlendAttachmentState blend) -> VkPipeline {
return PipelineBuilder()
.setShaders(particleVert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
particleFrag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
.setVertexInput({pBind}, pAttrs)
.setTopology(VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
.setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
.setDepthTest(true, false, VK_COMPARE_OP_LESS_OR_EQUAL)
.setColorBlendAttachment(blend)
.setMultisample(vkCtx_->getMsaaSamples())
.setLayout(particlePipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
.build(device, vkCtx_->getPipelineCache());
};
particlePipeline_ = buildParticlePipeline(PipelineBuilder::blendAlpha());
particleAdditivePipeline_ = buildParticlePipeline(PipelineBuilder::blendAdditive());
}
// --- Build smoke pipeline ---
if (smokeVert.isValid() && smokeFrag.isValid()) {
VkVertexInputBindingDescription sBind{};
sBind.binding = 0;
sBind.stride = 6 * sizeof(float); // pos3 + lifeRatio1 + size1 + isSpark1
sBind.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
std::vector<VkVertexInputAttributeDescription> sAttrs = {
{0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0}, // position
{1, 0, VK_FORMAT_R32_SFLOAT, 3 * sizeof(float)}, // lifeRatio
{2, 0, VK_FORMAT_R32_SFLOAT, 4 * sizeof(float)}, // size
{3, 0, VK_FORMAT_R32_SFLOAT, 5 * sizeof(float)}, // isSpark
};
smokePipeline_ = PipelineBuilder()
.setShaders(smokeVert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
smokeFrag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
.setVertexInput({sBind}, sAttrs)
.setTopology(VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
.setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
.setDepthTest(true, false, VK_COMPARE_OP_LESS_OR_EQUAL)
.setColorBlendAttachment(PipelineBuilder::blendAlpha())
.setMultisample(vkCtx_->getMsaaSamples())
.setLayout(smokePipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
.build(device, vkCtx_->getPipelineCache());
}
// --- Build ribbon pipelines ---
// Vertex format: pos(3) + color(3) + alpha(1) + uv(2) = 9 floats = 36 bytes
{
rendering::VkShaderModule ribVert, ribFrag;
(void)ribVert.loadFromFile(device, "assets/shaders/m2_ribbon.vert.spv");
(void)ribFrag.loadFromFile(device, "assets/shaders/m2_ribbon.frag.spv");
if (ribVert.isValid() && ribFrag.isValid()) {
// Reuse particleTexLayout_ for set 1 (single texture sampler)
VkDescriptorSetLayout ribLayouts[] = {perFrameLayout, particleTexLayout_};
VkPipelineLayoutCreateInfo lci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
lci.setLayoutCount = 2;
lci.pSetLayouts = ribLayouts;
vkCreatePipelineLayout(device, &lci, nullptr, &ribbonPipelineLayout_);
VkVertexInputBindingDescription rBind{};
rBind.binding = 0;
rBind.stride = 9 * sizeof(float);
rBind.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
std::vector<VkVertexInputAttributeDescription> rAttrs = {
{0, 0, VK_FORMAT_R32G32B32_SFLOAT, 0}, // pos
{1, 0, VK_FORMAT_R32G32B32_SFLOAT, 3 * sizeof(float)}, // color
{2, 0, VK_FORMAT_R32_SFLOAT, 6 * sizeof(float)}, // alpha
{3, 0, VK_FORMAT_R32G32_SFLOAT, 7 * sizeof(float)}, // uv
};
auto buildRibbonPipeline = [&](VkPipelineColorBlendAttachmentState blend) -> VkPipeline {
return PipelineBuilder()
.setShaders(ribVert.stageInfo(VK_SHADER_STAGE_VERTEX_BIT),
ribFrag.stageInfo(VK_SHADER_STAGE_FRAGMENT_BIT))
.setVertexInput({rBind}, rAttrs)
.setTopology(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP)
.setRasterization(VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE)
.setDepthTest(true, false, VK_COMPARE_OP_LESS_OR_EQUAL)
.setColorBlendAttachment(blend)
.setMultisample(vkCtx_->getMsaaSamples())
.setLayout(ribbonPipelineLayout_)
.setRenderPass(mainPass)
.setDynamicStates({VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR})
.build(device, vkCtx_->getPipelineCache());
};
ribbonPipeline_ = buildRibbonPipeline(PipelineBuilder::blendAlpha());
ribbonAdditivePipeline_ = buildRibbonPipeline(PipelineBuilder::blendAdditive());
}
ribVert.destroy(); ribFrag.destroy();
}
// Clean up shader modules
m2Vert.destroy(); m2Frag.destroy();
particleVert.destroy(); particleFrag.destroy();
smokeVert.destroy(); smokeFrag.destroy();
// --- Create dynamic particle buffers (mapped for CPU writes) ---
{
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
bci.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
VmaAllocationCreateInfo aci{};
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VmaAllocationInfo allocInfo{};
// Smoke particle buffer
bci.size = MAX_SMOKE_PARTICLES * 6 * sizeof(float);
vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &smokeVB_, &smokeVBAlloc_, &allocInfo);
smokeVBMapped_ = allocInfo.pMappedData;
// M2 particle buffer
bci.size = MAX_M2_PARTICLES * 9 * sizeof(float);
vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &m2ParticleVB_, &m2ParticleVBAlloc_, &allocInfo);
m2ParticleVBMapped_ = allocInfo.pMappedData;
// Dedicated glow sprite buffer (separate from particle VB to avoid data race)
bci.size = MAX_GLOW_SPRITES * 9 * sizeof(float);
vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &glowVB_, &glowVBAlloc_, &allocInfo);
glowVBMapped_ = allocInfo.pMappedData;
// Ribbon vertex buffer — triangle strip: pos(3)+color(3)+alpha(1)+uv(2)=9 floats/vert
bci.size = MAX_RIBBON_VERTS * 9 * sizeof(float);
vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &ribbonVB_, &ribbonVBAlloc_, &allocInfo);
ribbonVBMapped_ = allocInfo.pMappedData;
}
// --- Create white fallback texture ---
{
uint8_t white[] = {255, 255, 255, 255};
whiteTexture_ = std::make_unique<VkTexture>();
whiteTexture_->upload(*vkCtx_, white, 1, 1, VK_FORMAT_R8G8B8A8_UNORM);
whiteTexture_->createSampler(device, VK_FILTER_LINEAR, VK_FILTER_LINEAR, VK_SAMPLER_ADDRESS_MODE_REPEAT);
}
// --- Generate soft radial gradient glow texture ---
{
static constexpr int SZ = 64;
std::vector<uint8_t> px(SZ * SZ * 4);
float half = SZ / 2.0f;
for (int y = 0; y < SZ; y++) {
for (int x = 0; x < SZ; x++) {
float dx = (x + 0.5f - half) / half;
float dy = (y + 0.5f - half) / half;
float r = std::sqrt(dx * dx + dy * dy);
float a = std::max(0.0f, 1.0f - r);
a = a * a; // Quadratic falloff
int idx = (y * SZ + x) * 4;
px[idx + 0] = 255;
px[idx + 1] = 255;
px[idx + 2] = 255;
px[idx + 3] = static_cast<uint8_t>(a * 255);
}
}
glowTexture_ = std::make_unique<VkTexture>();
glowTexture_->upload(*vkCtx_, px.data(), SZ, SZ, VK_FORMAT_R8G8B8A8_UNORM);
glowTexture_->createSampler(device, VK_FILTER_LINEAR, VK_FILTER_LINEAR, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE);
// Pre-allocate glow texture descriptor set (reused every frame)
if (particleTexLayout_ && materialDescPool_) {
VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
ai.descriptorPool = materialDescPool_;
ai.descriptorSetCount = 1;
ai.pSetLayouts = &particleTexLayout_;
if (vkAllocateDescriptorSets(device, &ai, &glowTexDescSet_) == VK_SUCCESS) {
VkDescriptorImageInfo imgInfo = glowTexture_->descriptorInfo();
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
write.dstSet = glowTexDescSet_;
write.dstBinding = 0;
write.descriptorCount = 1;
write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
write.pImageInfo = &imgInfo;
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
}
}
}
textureCacheBudgetBytes_ =
envSizeMBOrDefault("WOWEE_M2_TEX_CACHE_MB", 4096) * 1024ull * 1024ull;
modelCacheLimit_ = envSizeMBOrDefault("WOWEE_M2_MODEL_LIMIT", 6000);
LOG_INFO("M2 texture cache budget: ", textureCacheBudgetBytes_ / (1024 * 1024), " MB");
LOG_INFO("M2 model cache limit: ", modelCacheLimit_);
LOG_INFO("M2 renderer initialized (Vulkan)");
initialized_ = true;
return true;
}
void M2Renderer::invalidateCullOutput(uint32_t frameIndex) {
// On non-HOST_COHERENT memory, VMA-mapped GPU→CPU buffers need explicit
// invalidation so the CPU cache sees the latest GPU writes.
if (frameIndex < 2 && cullOutputAlloc_[frameIndex]) {
vmaInvalidateAllocation(vkCtx_->getAllocator(), cullOutputAlloc_[frameIndex], 0, VK_WHOLE_SIZE);
}
}
void M2Renderer::shutdown() {
LOG_INFO("Shutting down M2 renderer...");
if (!vkCtx_) return;
vkDeviceWaitIdle(vkCtx_->getDevice());
VkDevice device = vkCtx_->getDevice();
VmaAllocator alloc = vkCtx_->getAllocator();
// Delete model GPU resources
for (auto& [id, model] : models) {
destroyModelGPU(model);
}
models.clear();
// Destroy instance bone buffers
for (auto& inst : instances) {
destroyInstanceBones(inst);
}
instances.clear();
spatialGrid.clear();
instanceIndexById.clear();
instanceDedupMap_.clear();
// Delete cached textures
textureCache.clear();
textureCacheBytes_ = 0;
textureCacheCounter_ = 0;
texturePropsByPtr_.clear();
failedTextureCache_.clear();
failedTextureRetryAt_.clear();
loggedTextureLoadFails_.clear();
textureLookupSerial_ = 0;
textureBudgetRejectWarnings_ = 0;
whiteTexture_.reset();
glowTexture_.reset();
// Clean up particle/ribbon buffers
if (smokeVB_) { vmaDestroyBuffer(alloc, smokeVB_, smokeVBAlloc_); smokeVB_ = VK_NULL_HANDLE; }
if (m2ParticleVB_) { vmaDestroyBuffer(alloc, m2ParticleVB_, m2ParticleVBAlloc_); m2ParticleVB_ = VK_NULL_HANDLE; }
if (glowVB_) { vmaDestroyBuffer(alloc, glowVB_, glowVBAlloc_); glowVB_ = VK_NULL_HANDLE; }
if (ribbonVB_) { vmaDestroyBuffer(alloc, ribbonVB_, ribbonVBAlloc_); ribbonVB_ = VK_NULL_HANDLE; }
smokeParticles.clear();
// Destroy pipelines
auto destroyPipeline = [&](VkPipeline& p) { if (p) { vkDestroyPipeline(device, p, nullptr); p = VK_NULL_HANDLE; } };
destroyPipeline(opaquePipeline_);
destroyPipeline(alphaTestPipeline_);
destroyPipeline(alphaPipeline_);
destroyPipeline(additivePipeline_);
destroyPipeline(particlePipeline_);
destroyPipeline(particleAdditivePipeline_);
destroyPipeline(smokePipeline_);
destroyPipeline(ribbonPipeline_);
destroyPipeline(ribbonAdditivePipeline_);
if (pipelineLayout_) { vkDestroyPipelineLayout(device, pipelineLayout_, nullptr); pipelineLayout_ = VK_NULL_HANDLE; }
if (particlePipelineLayout_) { vkDestroyPipelineLayout(device, particlePipelineLayout_, nullptr); particlePipelineLayout_ = VK_NULL_HANDLE; }
if (smokePipelineLayout_) { vkDestroyPipelineLayout(device, smokePipelineLayout_, nullptr); smokePipelineLayout_ = VK_NULL_HANDLE; }
if (ribbonPipelineLayout_) { vkDestroyPipelineLayout(device, ribbonPipelineLayout_, nullptr); ribbonPipelineLayout_ = VK_NULL_HANDLE; }
// Destroy descriptor pools and layouts
if (dummyBoneBuffer_) { vmaDestroyBuffer(alloc, dummyBoneBuffer_, dummyBoneAlloc_); dummyBoneBuffer_ = VK_NULL_HANDLE; }
// dummyBoneSet_ is freed implicitly when boneDescPool_ is destroyed
dummyBoneSet_ = VK_NULL_HANDLE;
// Mega bone SSBO cleanup (sets freed implicitly with boneDescPool_)
for (int i = 0; i < 2; i++) {
if (megaBoneBuffer_[i]) { vmaDestroyBuffer(alloc, megaBoneBuffer_[i], megaBoneAlloc_[i]); megaBoneBuffer_[i] = VK_NULL_HANDLE; }
megaBoneMapped_[i] = nullptr;
megaBoneSet_[i] = VK_NULL_HANDLE;
}
if (materialDescPool_) { vkDestroyDescriptorPool(device, materialDescPool_, nullptr); materialDescPool_ = VK_NULL_HANDLE; }
if (boneDescPool_) { vkDestroyDescriptorPool(device, boneDescPool_, nullptr); boneDescPool_ = VK_NULL_HANDLE; }
// Instance data SSBO cleanup (sets freed with instanceDescPool_)
for (int i = 0; i < 2; i++) {
if (instanceBuffer_[i]) { vmaDestroyBuffer(alloc, instanceBuffer_[i], instanceAlloc_[i]); instanceBuffer_[i] = VK_NULL_HANDLE; }
instanceMapped_[i] = nullptr;
instanceSet_[i] = VK_NULL_HANDLE;
}
if (instanceDescPool_) { vkDestroyDescriptorPool(device, instanceDescPool_, nullptr); instanceDescPool_ = VK_NULL_HANDLE; }
// GPU frustum culling compute pipeline + buffers cleanup
if (cullHiZPipeline_) { vkDestroyPipeline(device, cullHiZPipeline_, nullptr); cullHiZPipeline_ = VK_NULL_HANDLE; }
if (cullHiZPipelineLayout_) { vkDestroyPipelineLayout(device, cullHiZPipelineLayout_, nullptr); cullHiZPipelineLayout_ = VK_NULL_HANDLE; }
if (cullPipeline_) { vkDestroyPipeline(device, cullPipeline_, nullptr); cullPipeline_ = VK_NULL_HANDLE; }
if (cullPipelineLayout_) { vkDestroyPipelineLayout(device, cullPipelineLayout_, nullptr); cullPipelineLayout_ = VK_NULL_HANDLE; }
for (int i = 0; i < 2; i++) {
if (cullUniformBuffer_[i]) { vmaDestroyBuffer(alloc, cullUniformBuffer_[i], cullUniformAlloc_[i]); cullUniformBuffer_[i] = VK_NULL_HANDLE; }
if (cullInputBuffer_[i]) { vmaDestroyBuffer(alloc, cullInputBuffer_[i], cullInputAlloc_[i]); cullInputBuffer_[i] = VK_NULL_HANDLE; }
if (cullOutputBuffer_[i]) { vmaDestroyBuffer(alloc, cullOutputBuffer_[i], cullOutputAlloc_[i]); cullOutputBuffer_[i] = VK_NULL_HANDLE; }
cullUniformMapped_[i] = cullInputMapped_[i] = cullOutputMapped_[i] = nullptr;
cullSet_[i] = VK_NULL_HANDLE;
}
if (cullDescPool_) { vkDestroyDescriptorPool(device, cullDescPool_, nullptr); cullDescPool_ = VK_NULL_HANDLE; }
if (cullSetLayout_) { vkDestroyDescriptorSetLayout(device, cullSetLayout_, nullptr); cullSetLayout_ = VK_NULL_HANDLE; }
if (materialSetLayout_) { vkDestroyDescriptorSetLayout(device, materialSetLayout_, nullptr); materialSetLayout_ = VK_NULL_HANDLE; }
if (boneSetLayout_) { vkDestroyDescriptorSetLayout(device, boneSetLayout_, nullptr); boneSetLayout_ = VK_NULL_HANDLE; }
if (instanceSetLayout_) { vkDestroyDescriptorSetLayout(device, instanceSetLayout_, nullptr); instanceSetLayout_ = VK_NULL_HANDLE; }
if (particleTexLayout_) { vkDestroyDescriptorSetLayout(device, particleTexLayout_, nullptr); particleTexLayout_ = VK_NULL_HANDLE; }
// Destroy shadow resources
destroyPipeline(shadowPipeline_);
if (shadowPipelineLayout_) { vkDestroyPipelineLayout(device, shadowPipelineLayout_, nullptr); shadowPipelineLayout_ = VK_NULL_HANDLE; }
for (auto& pool : shadowTexPool_) { if (pool) { vkDestroyDescriptorPool(device, pool, nullptr); pool = VK_NULL_HANDLE; } }
if (shadowParamsPool_) { vkDestroyDescriptorPool(device, shadowParamsPool_, nullptr); shadowParamsPool_ = VK_NULL_HANDLE; }
if (shadowParamsLayout_) { vkDestroyDescriptorSetLayout(device, shadowParamsLayout_, nullptr); shadowParamsLayout_ = VK_NULL_HANDLE; }
if (shadowParamsUBO_) { vmaDestroyBuffer(alloc, shadowParamsUBO_, shadowParamsAlloc_); shadowParamsUBO_ = VK_NULL_HANDLE; }
initialized_ = false;
}
void M2Renderer::destroyModelGPU(M2ModelGPU& model) {
if (!vkCtx_) return;
VmaAllocator alloc = vkCtx_->getAllocator();
if (model.vertexBuffer) { vmaDestroyBuffer(alloc, model.vertexBuffer, model.vertexAlloc); model.vertexBuffer = VK_NULL_HANDLE; }
if (model.indexBuffer) { vmaDestroyBuffer(alloc, model.indexBuffer, model.indexAlloc); model.indexBuffer = VK_NULL_HANDLE; }
VkDevice device = vkCtx_->getDevice();
for (auto& batch : model.batches) {
if (batch.materialSet) { vkFreeDescriptorSets(device, materialDescPool_, 1, &batch.materialSet); batch.materialSet = VK_NULL_HANDLE; }
if (batch.materialUBO) { vmaDestroyBuffer(alloc, batch.materialUBO, batch.materialUBOAlloc); batch.materialUBO = VK_NULL_HANDLE; }
}
// Free pre-allocated particle texture descriptor sets
for (auto& pSet : model.particleTexSets) {
if (pSet) { vkFreeDescriptorSets(device, materialDescPool_, 1, &pSet); pSet = VK_NULL_HANDLE; }
}
model.particleTexSets.clear();
// Free ribbon texture descriptor sets
for (auto& rSet : model.ribbonTexSets) {
if (rSet) { vkFreeDescriptorSets(device, materialDescPool_, 1, &rSet); rSet = VK_NULL_HANDLE; }
}
model.ribbonTexSets.clear();
}
void M2Renderer::destroyInstanceBones(M2Instance& inst, bool defer) {
if (!vkCtx_) return;
VkDevice device = vkCtx_->getDevice();
VmaAllocator alloc = vkCtx_->getAllocator();
for (int i = 0; i < 2; i++) {
// Snapshot handles before clearing the instance — needed for both
// immediate and deferred paths.
VkDescriptorSet boneSet = inst.boneSet[i];
::VkBuffer boneBuf = inst.boneBuffer[i];
VmaAllocation boneAlloc = inst.boneAlloc[i];
inst.boneSet[i] = VK_NULL_HANDLE;
inst.boneBuffer[i] = VK_NULL_HANDLE;
inst.boneMapped[i] = nullptr;
if (!defer) {
// Immediate destruction (safe after vkDeviceWaitIdle)
if (boneSet != VK_NULL_HANDLE) {
vkFreeDescriptorSets(device, boneDescPool_, 1, &boneSet);
}
if (boneBuf) {
vmaDestroyBuffer(alloc, boneBuf, boneAlloc);
}
} else if (boneSet != VK_NULL_HANDLE || boneBuf) {
// Deferred destruction — the loop destroys bone sets for ALL frame
// slots, so the other slot's command buffer may still be in flight.
// Must wait for all fences, not just the current frame's.
VkDescriptorPool pool = boneDescPool_;
vkCtx_->deferAfterAllFrameFences([device, alloc, pool, boneSet, boneBuf, boneAlloc]() {
if (boneSet != VK_NULL_HANDLE) {
VkDescriptorSet s = boneSet;
vkFreeDescriptorSets(device, pool, 1, &s);
}
if (boneBuf) {
vmaDestroyBuffer(alloc, boneBuf, boneAlloc);
}
});
}
}
}
VkDescriptorSet M2Renderer::allocateMaterialSet() {
VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
ai.descriptorPool = materialDescPool_;
ai.descriptorSetCount = 1;
ai.pSetLayouts = &materialSetLayout_;
VkDescriptorSet set = VK_NULL_HANDLE;
VkResult result = vkAllocateDescriptorSets(vkCtx_->getDevice(), &ai, &set);
if (result != VK_SUCCESS) {
LOG_ERROR("M2Renderer: material descriptor set allocation failed (", result, ")");
return VK_NULL_HANDLE;
}
return set;
}
VkDescriptorSet M2Renderer::allocateBoneSet() {
VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
ai.descriptorPool = boneDescPool_;
ai.descriptorSetCount = 1;
ai.pSetLayouts = &boneSetLayout_;
VkDescriptorSet set = VK_NULL_HANDLE;
VkResult result = vkAllocateDescriptorSets(vkCtx_->getDevice(), &ai, &set);
if (result != VK_SUCCESS) {
LOG_ERROR("M2Renderer: bone descriptor set allocation failed (", result, ")");
return VK_NULL_HANDLE;
}
return set;
}
// ---------------------------------------------------------------------------
// M2 collision mesh: build spatial grid + classify triangles
// ---------------------------------------------------------------------------
void M2ModelGPU::CollisionMesh::build() {
if (indices.size() < 3 || vertices.empty()) return;
triCount = static_cast<uint32_t>(indices.size() / 3);
// Bounding box for grid
glm::vec3 bmin(std::numeric_limits<float>::max());
glm::vec3 bmax(-std::numeric_limits<float>::max());
for (const auto& v : vertices) {
bmin = glm::min(bmin, v);
bmax = glm::max(bmax, v);
}
gridOrigin = glm::vec2(bmin.x, bmin.y);
gridCellsX = std::max(1, std::min(32, static_cast<int>(std::ceil((bmax.x - bmin.x) / CELL_SIZE))));
gridCellsY = std::max(1, std::min(32, static_cast<int>(std::ceil((bmax.y - bmin.y) / CELL_SIZE))));
cellFloorTris.resize(gridCellsX * gridCellsY);
cellWallTris.resize(gridCellsX * gridCellsY);
triBounds.resize(triCount);
for (uint32_t ti = 0; ti < triCount; ti++) {
uint16_t i0 = indices[ti * 3];
uint16_t i1 = indices[ti * 3 + 1];
uint16_t i2 = indices[ti * 3 + 2];
if (i0 >= vertices.size() || i1 >= vertices.size() || i2 >= vertices.size()) continue;
const auto& v0 = vertices[i0];
const auto& v1 = vertices[i1];
const auto& v2 = vertices[i2];
triBounds[ti].minZ = std::min({v0.z, v1.z, v2.z});
triBounds[ti].maxZ = std::max({v0.z, v1.z, v2.z});
glm::vec3 normal = glm::cross(v1 - v0, v2 - v0);
float normalLen = glm::length(normal);
float absNz = (normalLen > 0.001f) ? std::abs(normal.z / normalLen) : 0.0f;
bool isFloor = (absNz >= 0.35f); // ~70° max slope (relaxed for steep stairs)
bool isWall = (absNz < 0.65f);
float triMinX = std::min({v0.x, v1.x, v2.x});
float triMaxX = std::max({v0.x, v1.x, v2.x});
float triMinY = std::min({v0.y, v1.y, v2.y});
float triMaxY = std::max({v0.y, v1.y, v2.y});
int cxMin = std::clamp(static_cast<int>((triMinX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
int cxMax = std::clamp(static_cast<int>((triMaxX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
int cyMin = std::clamp(static_cast<int>((triMinY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
int cyMax = std::clamp(static_cast<int>((triMaxY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
for (int cy = cyMin; cy <= cyMax; cy++) {
for (int cx = cxMin; cx <= cxMax; cx++) {
int ci = cy * gridCellsX + cx;
if (isFloor) cellFloorTris[ci].push_back(ti);
if (isWall) cellWallTris[ci].push_back(ti);
}
}
}
}
void M2ModelGPU::CollisionMesh::getFloorTrisInRange(
float minX, float minY, float maxX, float maxY,
std::vector<uint32_t>& out) const {
out.clear();
if (gridCellsX == 0 || gridCellsY == 0) return;
int cxMin = std::clamp(static_cast<int>((minX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
int cxMax = std::clamp(static_cast<int>((maxX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
int cyMin = std::clamp(static_cast<int>((minY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
int cyMax = std::clamp(static_cast<int>((maxY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
const size_t cellCount = static_cast<size_t>(cxMax - cxMin + 1) *
static_cast<size_t>(cyMax - cyMin + 1);
out.reserve(cellCount * 8);
for (int cy = cyMin; cy <= cyMax; cy++) {
for (int cx = cxMin; cx <= cxMax; cx++) {
const auto& cell = cellFloorTris[cy * gridCellsX + cx];
out.insert(out.end(), cell.begin(), cell.end());
}
}
std::sort(out.begin(), out.end());
out.erase(std::unique(out.begin(), out.end()), out.end());
}
void M2ModelGPU::CollisionMesh::getWallTrisInRange(
float minX, float minY, float maxX, float maxY,
std::vector<uint32_t>& out) const {
out.clear();
if (gridCellsX == 0 || gridCellsY == 0) return;
int cxMin = std::clamp(static_cast<int>((minX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
int cxMax = std::clamp(static_cast<int>((maxX - gridOrigin.x) / CELL_SIZE), 0, gridCellsX - 1);
int cyMin = std::clamp(static_cast<int>((minY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
int cyMax = std::clamp(static_cast<int>((maxY - gridOrigin.y) / CELL_SIZE), 0, gridCellsY - 1);
const size_t cellCount = static_cast<size_t>(cxMax - cxMin + 1) *
static_cast<size_t>(cyMax - cyMin + 1);
out.reserve(cellCount * 8);
for (int cy = cyMin; cy <= cyMax; cy++) {
for (int cx = cxMin; cx <= cxMax; cx++) {
const auto& cell = cellWallTris[cy * gridCellsX + cx];
out.insert(out.end(), cell.begin(), cell.end());
}
}
std::sort(out.begin(), out.end());
out.erase(std::unique(out.begin(), out.end()), out.end());
}
bool M2Renderer::hasModel(uint32_t modelId) const {
return models.find(modelId) != models.end();
}
void M2Renderer::markModelAsSpellEffect(uint32_t modelId) {
auto it = models.find(modelId);
if (it != models.end()) {
it->second.isSpellEffect = true;
// Spell effects MUST have bone animation for ribbons/particles to work.
// The classifier may have set disableAnimation=true based on name tokens
// (e.g. "chest" in HolySmite_Low_Chest.m2) — override that for spell effects.
if (it->second.disableAnimation && it->second.hasAnimation) {
it->second.disableAnimation = false;
LOG_INFO("SpellEffect: re-enabled animation for '", it->second.name, "'");
}
}
}
bool M2Renderer::loadModel(const pipeline::M2Model& model, uint32_t modelId) {
if (models.find(modelId) != models.end()) {
// Already loaded
return true;
}
if (models.size() >= modelCacheLimit_) {
if (modelLimitRejectWarnings_ < 3) {
LOG_WARNING("M2 model cache full (", models.size(), "/", modelCacheLimit_,
"), skipping model load: id=", modelId, " name=", model.name);
}
++modelLimitRejectWarnings_;
return false;
}
bool hasGeometry = !model.vertices.empty() && !model.indices.empty();
bool hasParticles = !model.particleEmitters.empty();
bool hasRibbons = !model.ribbonEmitters.empty();
if (!hasGeometry && !hasParticles && !hasRibbons) {
LOG_WARNING("M2 model has no renderable content: ", model.name);
return false;
}
M2ModelGPU gpuModel;
gpuModel.name = model.name;
// Use tight bounds from actual vertices for collision/camera occlusion.
// Header bounds in some M2s are overly conservative.
glm::vec3 tightMin(0.0f);
glm::vec3 tightMax(0.0f);
if (hasGeometry) {
tightMin = glm::vec3(std::numeric_limits<float>::max());
tightMax = glm::vec3(-std::numeric_limits<float>::max());
for (const auto& v : model.vertices) {
tightMin = glm::min(tightMin, v.position);
tightMax = glm::max(tightMax, v.position);
}
}
// Classify model from name and geometry — pure function, no GPU dependencies.
auto cls = classifyM2Model(model.name, tightMin, tightMax,
model.vertices.size(),
model.particleEmitters.size());
const bool isInvisibleTrap = cls.isInvisibleTrap;
const bool groundDetailModel = cls.isGroundDetail;
if (isInvisibleTrap) {
LOG_INFO("Loading InvisibleTrap model: ", model.name, " (will be invisible, no collision)");
}
gpuModel.isInvisibleTrap = cls.isInvisibleTrap;
gpuModel.collisionSteppedFountain = cls.collisionSteppedFountain;
gpuModel.collisionSteppedLowPlatform = cls.collisionSteppedLowPlatform;
gpuModel.collisionBridge = cls.collisionBridge;
gpuModel.collisionPlanter = cls.collisionPlanter;
gpuModel.collisionStatue = cls.collisionStatue;
gpuModel.collisionTreeTrunk = cls.collisionTreeTrunk;
gpuModel.collisionNarrowVerticalProp = cls.collisionNarrowVerticalProp;
gpuModel.collisionSmallSolidProp = cls.collisionSmallSolidProp;
gpuModel.collisionNoBlock = cls.collisionNoBlock;
gpuModel.isGroundDetail = cls.isGroundDetail;
gpuModel.isFoliageLike = cls.isFoliageLike;
gpuModel.disableAnimation = cls.disableAnimation;
gpuModel.shadowWindFoliage = cls.shadowWindFoliage;
gpuModel.isFireflyEffect = cls.isFireflyEffect;
gpuModel.isSmallFoliage = cls.isSmallFoliage;
gpuModel.isSmoke = cls.isSmoke;
gpuModel.isSpellEffect = cls.isSpellEffect;
gpuModel.isLavaModel = cls.isLavaModel;
gpuModel.isInstancePortal = cls.isInstancePortal;
gpuModel.isWaterVegetation = cls.isWaterVegetation;
gpuModel.isElvenLike = cls.isElvenLike;
gpuModel.isLanternLike = cls.isLanternLike;
gpuModel.isKoboldFlame = cls.isKoboldFlame;
gpuModel.isWaterfall = cls.isWaterfall;
gpuModel.isBrazierOrFire = cls.isBrazierOrFire;
gpuModel.isTorch = cls.isTorch;
gpuModel.ambientEmitterType = cls.ambientEmitterType;
gpuModel.boundMin = tightMin;
gpuModel.boundMax = tightMax;
gpuModel.boundRadius = model.boundRadius;
// Fallback: compute bound radius from vertex extents when M2 header reports 0
if (gpuModel.boundRadius < 0.01f && !model.vertices.empty()) {
glm::vec3 extent = tightMax - tightMin;
gpuModel.boundRadius = glm::length(extent) * 0.5f;
}
gpuModel.indexCount = static_cast<uint32_t>(model.indices.size());
gpuModel.vertexCount = static_cast<uint32_t>(model.vertices.size());
// Store bone/sequence data for animation
gpuModel.bones = model.bones;
gpuModel.sequences = model.sequences;
gpuModel.globalSequenceDurations = model.globalSequenceDurations;
gpuModel.hasAnimation = false;
for (const auto& bone : model.bones) {
if (bone.translation.hasData() || bone.rotation.hasData() || bone.scale.hasData()) {
gpuModel.hasAnimation = true;
break;
}
}
// Build collision mesh + spatial grid from M2 bounding geometry
gpuModel.collision.vertices = model.collisionVertices;
gpuModel.collision.indices = model.collisionIndices;
gpuModel.collision.build();
if (gpuModel.collision.valid()) {
core::Logger::getInstance().debug(" M2 collision mesh: ", gpuModel.collision.triCount,
" tris, grid ", gpuModel.collision.gridCellsX, "x", gpuModel.collision.gridCellsY);
}
// Identify idle variation sequences (animation ID 0 = Stand)
for (int i = 0; i < static_cast<int>(model.sequences.size()); i++) {
if (model.sequences[i].id == 0 && model.sequences[i].duration > 0) {
gpuModel.idleVariationIndices.push_back(i);
}
}
// Batch all GPU uploads (VB, IB, textures) into a single command buffer
// submission with one fence wait, instead of one fence wait per upload.
vkCtx_->beginUploadBatch();
if (hasGeometry) {
// Create VBO with interleaved vertex data
// Format: position (3), normal (3), texcoord0 (2), texcoord1 (2), boneWeights (4), boneIndices (4 as float)
const size_t floatsPerVertex = 18;
std::vector<float> vertexData;
vertexData.reserve(model.vertices.size() * floatsPerVertex);
for (const auto& v : model.vertices) {
vertexData.push_back(v.position.x);
vertexData.push_back(v.position.y);
vertexData.push_back(v.position.z);
vertexData.push_back(v.normal.x);
vertexData.push_back(v.normal.y);
vertexData.push_back(v.normal.z);
vertexData.push_back(v.texCoords[0].x);
vertexData.push_back(v.texCoords[0].y);
vertexData.push_back(v.texCoords[1].x);
vertexData.push_back(v.texCoords[1].y);
float w0 = v.boneWeights[0] / 255.0f;
float w1 = v.boneWeights[1] / 255.0f;
float w2 = v.boneWeights[2] / 255.0f;
float w3 = v.boneWeights[3] / 255.0f;
vertexData.push_back(w0);
vertexData.push_back(w1);
vertexData.push_back(w2);
vertexData.push_back(w3);
vertexData.push_back(static_cast<float>(std::min(v.boneIndices[0], uint8_t(127))));
vertexData.push_back(static_cast<float>(std::min(v.boneIndices[1], uint8_t(127))));
vertexData.push_back(static_cast<float>(std::min(v.boneIndices[2], uint8_t(127))));
vertexData.push_back(static_cast<float>(std::min(v.boneIndices[3], uint8_t(127))));
}
// Upload vertex buffer to GPU
{
auto buf = uploadBuffer(*vkCtx_,
vertexData.data(), vertexData.size() * sizeof(float),
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
gpuModel.vertexBuffer = buf.buffer;
gpuModel.vertexAlloc = buf.allocation;
}
// Upload index buffer to GPU
{
auto buf = uploadBuffer(*vkCtx_,
model.indices.data(), model.indices.size() * sizeof(uint16_t),
VK_BUFFER_USAGE_INDEX_BUFFER_BIT);
gpuModel.indexBuffer = buf.buffer;
gpuModel.indexAlloc = buf.allocation;
}
if (!gpuModel.vertexBuffer || !gpuModel.indexBuffer) {
LOG_ERROR("M2Renderer::loadModel: GPU buffer upload failed for model ", modelId);
}
}
// Load ALL textures from the model into a local vector.
// textureLoadFailed[i] is true if texture[i] had a named path that failed to load.
// Such batches are hidden (batchOpacity=0) rather than rendered white.
std::vector<VkTexture*> allTextures;
std::vector<bool> textureLoadFailed;
std::vector<std::string> textureKeysLower;
if (assetManager) {
for (size_t ti = 0; ti < model.textures.size(); ti++) {
const auto& tex = model.textures[ti];
std::string texPath = tex.filename;
// Some extracted M2 texture strings contain embedded NUL + garbage suffix.
// Truncate at first NUL so valid paths like "...foo.blp\0junk" still resolve.
size_t nul = texPath.find('\0');
if (nul != std::string::npos) {
texPath.resize(nul);
}
if (!texPath.empty()) {
std::string texKey = texPath;
std::replace(texKey.begin(), texKey.end(), '/', '\\');
std::transform(texKey.begin(), texKey.end(), texKey.begin(),
[](unsigned char c) { return static_cast<char>(std::tolower(c)); });
VkTexture* texPtr = loadTexture(texPath, tex.flags);
bool failed = (texPtr == whiteTexture_.get());
if (failed) {
static uint32_t loggedModelTextureFails = 0;
static bool loggedModelTextureFailSuppressed = false;
if (loggedModelTextureFails < 250) {
LOG_WARNING("M2 model ", model.name, " texture[", ti, "] failed to load: ", texPath);
++loggedModelTextureFails;
} else if (!loggedModelTextureFailSuppressed) {
LOG_WARNING("M2 model texture-failure warnings suppressed after ",
loggedModelTextureFails, " entries");
loggedModelTextureFailSuppressed = true;
}
}
if (isInvisibleTrap) {
LOG_INFO(" InvisibleTrap texture[", ti, "]: ", texPath, " -> ", (failed ? "WHITE" : "OK"));
}
allTextures.push_back(texPtr);
textureLoadFailed.push_back(failed);
textureKeysLower.push_back(std::move(texKey));
} else {
if (isInvisibleTrap) {
LOG_INFO(" InvisibleTrap texture[", ti, "]: EMPTY (using white fallback)");
}
allTextures.push_back(whiteTexture_.get());
textureLoadFailed.push_back(false); // Empty filename = intentional white (type!=0)
textureKeysLower.emplace_back();
}
}
}
static const bool kGlowDiag = envFlagEnabled("WOWEE_M2_GLOW_DIAG", false);
if (kGlowDiag) {
if (gpuModel.isLanternLike) {
for (size_t ti = 0; ti < model.textures.size(); ++ti) {
const std::string key = (ti < textureKeysLower.size()) ? textureKeysLower[ti] : std::string();
LOG_DEBUG("M2 GLOW TEX '", model.name, "' tex[", ti, "]='", key, "' flags=0x",
std::hex, model.textures[ti].flags, std::dec);
}
}
}
// Copy particle emitter data and resolve textures
gpuModel.particleEmitters = model.particleEmitters;
gpuModel.particleTextures.resize(model.particleEmitters.size(), whiteTexture_.get());
for (size_t ei = 0; ei < model.particleEmitters.size(); ei++) {
uint16_t texIdx = model.particleEmitters[ei].texture;
if (texIdx < allTextures.size() && allTextures[texIdx] != nullptr) {
gpuModel.particleTextures[ei] = allTextures[texIdx];
} else {
LOG_WARNING("M2 '", model.name, "' particle emitter[", ei,
"] texture index ", texIdx, " out of range (", allTextures.size(),
" textures) — using white fallback");
}
}
// Pre-allocate one stable descriptor set per particle emitter to avoid per-frame allocation.
// This prevents materialDescPool_ exhaustion when many emitters are active each frame.
if (particleTexLayout_ && materialDescPool_ && !model.particleEmitters.empty()) {
VkDevice device = vkCtx_->getDevice();
gpuModel.particleTexSets.resize(model.particleEmitters.size(), VK_NULL_HANDLE);
for (size_t ei = 0; ei < model.particleEmitters.size(); ei++) {
VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
ai.descriptorPool = materialDescPool_;
ai.descriptorSetCount = 1;
ai.pSetLayouts = &particleTexLayout_;
if (vkAllocateDescriptorSets(device, &ai, &gpuModel.particleTexSets[ei]) == VK_SUCCESS) {
VkTexture* tex = gpuModel.particleTextures[ei];
VkDescriptorImageInfo imgInfo = tex->descriptorInfo();
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
write.dstSet = gpuModel.particleTexSets[ei];
write.dstBinding = 0;
write.descriptorCount = 1;
write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
write.pImageInfo = &imgInfo;
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
}
}
}
// Copy ribbon emitter data and resolve textures
gpuModel.ribbonEmitters = model.ribbonEmitters;
if (!model.ribbonEmitters.empty()) {
VkDevice device = vkCtx_->getDevice();
gpuModel.ribbonTextures.resize(model.ribbonEmitters.size(), whiteTexture_.get());
gpuModel.ribbonTexSets.resize(model.ribbonEmitters.size(), VK_NULL_HANDLE);
for (size_t ri = 0; ri < model.ribbonEmitters.size(); ri++) {
// Resolve texture: ribbon textureIndex is a direct index into the
// model's texture array (NOT through the textureLookup table).
uint16_t texDirect = model.ribbonEmitters[ri].textureIndex;
if (texDirect < allTextures.size() && allTextures[texDirect] != nullptr) {
gpuModel.ribbonTextures[ri] = allTextures[texDirect];
} else {
// Fallback: try through textureLookup table
uint32_t texIdx = (texDirect < model.textureLookup.size())
? model.textureLookup[texDirect] : UINT32_MAX;
if (texIdx < allTextures.size() && allTextures[texIdx] != nullptr) {
gpuModel.ribbonTextures[ri] = allTextures[texIdx];
} else {
LOG_WARNING("M2 '", model.name, "' ribbon emitter[", ri,
"] texIndex=", texDirect, " lookup failed"
" (direct=", (texDirect < allTextures.size() ? "yes" : "OOB"),
" lookup=", texIdx,
" textures=", allTextures.size(),
") — using white fallback");
}
}
// Allocate descriptor set (reuse particleTexLayout_ = single sampler)
if (particleTexLayout_ && materialDescPool_) {
VkDescriptorSetAllocateInfo ai{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
ai.descriptorPool = materialDescPool_;
ai.descriptorSetCount = 1;
ai.pSetLayouts = &particleTexLayout_;
if (vkAllocateDescriptorSets(device, &ai, &gpuModel.ribbonTexSets[ri]) == VK_SUCCESS) {
VkTexture* tex = gpuModel.ribbonTextures[ri];
VkDescriptorImageInfo imgInfo = tex->descriptorInfo();
VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
write.dstSet = gpuModel.ribbonTexSets[ri];
write.dstBinding = 0;
write.descriptorCount = 1;
write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
write.pImageInfo = &imgInfo;
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
}
}
}
LOG_DEBUG(" Ribbon emitters loaded: ", model.ribbonEmitters.size());
}
// Copy texture transform data for UV animation
gpuModel.textureTransforms = model.textureTransforms;
gpuModel.textureTransformLookup = model.textureTransformLookup;
gpuModel.hasTextureAnimation = false;
// Build per-batch GPU entries
if (!model.batches.empty()) {
for (const auto& batch : model.batches) {
M2ModelGPU::BatchGPU bgpu;
bgpu.indexStart = batch.indexStart;
bgpu.indexCount = batch.indexCount;
// Store texture animation index from batch
bgpu.textureAnimIndex = batch.textureAnimIndex;
if (bgpu.textureAnimIndex != 0xFFFF) {
gpuModel.hasTextureAnimation = true;
}
// Store blend mode and flags from material
if (batch.materialIndex < model.materials.size()) {
bgpu.blendMode = model.materials[batch.materialIndex].blendMode;
bgpu.materialFlags = model.materials[batch.materialIndex].flags;
if (bgpu.blendMode >= 2) gpuModel.hasTransparentBatches = true;
}
// Copy LOD level from batch
bgpu.submeshLevel = batch.submeshLevel;
// Resolve texture: batch.textureIndex → textureLookup → allTextures
VkTexture* tex = whiteTexture_.get();
bool texFailed = false;
std::string batchTexKeyLower;
if (batch.textureIndex < model.textureLookup.size()) {
uint16_t texIdx = model.textureLookup[batch.textureIndex];
if (texIdx < allTextures.size()) {
tex = allTextures[texIdx];
texFailed = (texIdx < textureLoadFailed.size()) && textureLoadFailed[texIdx];
if (texIdx < textureKeysLower.size()) {
batchTexKeyLower = textureKeysLower[texIdx];
}
}
if (texIdx < model.textures.size()) {
bgpu.texFlags = static_cast<uint8_t>(model.textures[texIdx].flags & 0x3);
}
} else if (!allTextures.empty()) {
LOG_WARNING("M2 '", model.name, "' batch textureIndex ", batch.textureIndex,
" out of range (textureLookup size=", model.textureLookup.size(),
") — falling back to texture[0]");
tex = allTextures[0];
texFailed = !textureLoadFailed.empty() && textureLoadFailed[0];
if (!textureKeysLower.empty()) {
batchTexKeyLower = textureKeysLower[0];
}
}
if (texFailed && groundDetailModel) {
static const std::string kDetailFallbackTexture = "World\\NoDXT\\Detail\\8des_detaildoodads01.blp";
VkTexture* fallbackTex = loadTexture(kDetailFallbackTexture, 0);
if (fallbackTex != nullptr && fallbackTex != whiteTexture_.get()) {
tex = fallbackTex;
texFailed = false;
}
}
bgpu.texture = tex;
const auto tcls = classifyBatchTexture(batchTexKeyLower);
const bool modelLanternFamily = gpuModel.isLanternLike;
bgpu.lanternGlowHint =
tcls.exactLanternGlowTex ||
((tcls.hasGlowToken || (modelLanternFamily && tcls.hasFlameToken)) &&
(tcls.lanternFamily || modelLanternFamily) &&
(!tcls.likelyFlame || modelLanternFamily));
bgpu.glowCardLike = bgpu.lanternGlowHint && tcls.hasGlowCardToken;
bgpu.glowTint = tcls.glowTint;
if (tex != nullptr && tex != whiteTexture_.get()) {
auto pit = texturePropsByPtr_.find(tex);
if (pit != texturePropsByPtr_.end()) {
bgpu.hasAlpha = pit->second.hasAlpha;
bgpu.colorKeyBlack = pit->second.colorKeyBlack;
}
}
// textureCoordIndex is an index into a texture coord combo table, not directly
// a UV set selector. Most batches have index=0 (UV set 0). We always use UV set 0
// since we don't have the full combo table — dual-UV effects are rare edge cases.
bgpu.textureUnit = 0;
// Start at full opacity; hide only if texture failed to load.
bgpu.batchOpacity = (texFailed && !groundDetailModel) ? 0.0f : 1.0f;
// Apply at-rest transparency and color alpha from the M2 animation tracks.
// These provide per-batch opacity for ghosts, ethereal effects, fading doodads, etc.
// Skip zero values: some animated tracks start at 0 and animate up, and baking
// that first keyframe would make the entire batch permanently invisible.
if (bgpu.batchOpacity > 0.0f) {
float animAlpha = 1.0f;
if (batch.colorIndex < model.colorAlphas.size()) {
float ca = model.colorAlphas[batch.colorIndex];
if (ca > 0.001f) animAlpha *= ca;
}
if (batch.transparencyIndex < model.textureWeights.size()) {
float tw = model.textureWeights[batch.transparencyIndex];
if (tw > 0.001f) animAlpha *= tw;
}
bgpu.batchOpacity *= animAlpha;
}
// Compute batch center and radius for glow sprite positioning
if ((bgpu.blendMode >= 3 || bgpu.colorKeyBlack) && batch.indexCount > 0) {
glm::vec3 sum(0.0f);
uint32_t counted = 0;
for (uint32_t j = batch.indexStart; j < batch.indexStart + batch.indexCount; j++) {
if (j < model.indices.size()) {
uint16_t vi = model.indices[j];
if (vi < model.vertices.size()) {
sum += model.vertices[vi].position;
counted++;
}
}
}
if (counted > 0) {
bgpu.center = sum / static_cast<float>(counted);
float maxDist = 0.0f;
for (uint32_t j = batch.indexStart; j < batch.indexStart + batch.indexCount; j++) {
if (j < model.indices.size()) {
uint16_t vi = model.indices[j];
if (vi < model.vertices.size()) {
float d = glm::length(model.vertices[vi].position - bgpu.center);
maxDist = std::max(maxDist, d);
}
}
}
bgpu.glowSize = std::max(maxDist, 0.5f);
}
}
// Optional diagnostics for glow/light batches (disabled by default).
if (kGlowDiag && gpuModel.isLanternLike) {
LOG_DEBUG("M2 GLOW DIAG '", model.name, "' batch ", gpuModel.batches.size(),
": blend=", bgpu.blendMode, " matFlags=0x",
std::hex, bgpu.materialFlags, std::dec,
" colorKey=", bgpu.colorKeyBlack ? "Y" : "N",
" hasAlpha=", bgpu.hasAlpha ? "Y" : "N",
" unlit=", (bgpu.materialFlags & 0x01) ? "Y" : "N",
" lanternHint=", bgpu.lanternGlowHint ? "Y" : "N",
" glowSize=", bgpu.glowSize,
" tex=", bgpu.texture,
" idxCount=", bgpu.indexCount);
}
gpuModel.batches.push_back(bgpu);
}
} else {
// Fallback: single batch covering all indices with first texture
M2ModelGPU::BatchGPU bgpu;
bgpu.indexStart = 0;
bgpu.indexCount = gpuModel.indexCount;
bgpu.texture = allTextures.empty() ? whiteTexture_.get() : allTextures[0];
if (bgpu.texture != nullptr && bgpu.texture != whiteTexture_.get()) {
auto pit = texturePropsByPtr_.find(bgpu.texture);
if (pit != texturePropsByPtr_.end()) {
bgpu.hasAlpha = pit->second.hasAlpha;
bgpu.colorKeyBlack = pit->second.colorKeyBlack;
}
}
gpuModel.batches.push_back(bgpu);
}
// Detect particle emitter volume models: box mesh (24 verts, 36 indices)
// with disproportionately large bounds. These are invisible bounding volumes
// that only exist to spawn particles — their mesh should never be rendered.
if (!isInvisibleTrap && !groundDetailModel &&
gpuModel.vertexCount <= 24 && gpuModel.indexCount <= 36
&& !model.particleEmitters.empty()) {
glm::vec3 size = gpuModel.boundMax - gpuModel.boundMin;
float maxDim = std::max({size.x, size.y, size.z});
if (maxDim > 5.0f) {
gpuModel.isInvisibleTrap = true;
LOG_DEBUG("M2 emitter volume hidden: '", model.name, "' size=(",
size.x, " x ", size.y, " x ", size.z, ")");
}
}
vkCtx_->endUploadBatch();
// Allocate Vulkan descriptor sets and UBOs for each batch
for (auto& bgpu : gpuModel.batches) {
// Create combined UBO for M2Params (binding 1) + M2Material (binding 2)
// We allocate them as separate buffers for clarity
VmaAllocationInfo matAllocInfo{};
{
VkBufferCreateInfo bci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
bci.size = sizeof(M2MaterialUBO);
bci.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
VmaAllocationCreateInfo aci{};
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
vmaCreateBuffer(vkCtx_->getAllocator(), &bci, &aci, &bgpu.materialUBO, &bgpu.materialUBOAlloc, &matAllocInfo);
// Write initial material data (static per-batch — fadeAlpha/interiorDarken updated at draw time)
M2MaterialUBO mat{};
mat.hasTexture = (bgpu.texture != nullptr && bgpu.texture != whiteTexture_.get()) ? 1 : 0;
mat.alphaTest = (bgpu.blendMode == 1 || (bgpu.blendMode >= 2 && !bgpu.hasAlpha)) ? 1 : 0;
mat.colorKeyBlack = bgpu.colorKeyBlack ? 1 : 0;
mat.colorKeyThreshold = 0.08f;
mat.unlit = (bgpu.materialFlags & 0x01) ? 1 : 0;
mat.blendMode = bgpu.blendMode;
mat.fadeAlpha = 1.0f;
mat.interiorDarken = 0.0f;
mat.specularIntensity = 0.5f;
memcpy(matAllocInfo.pMappedData, &mat, sizeof(mat));
bgpu.materialUBOMapped = matAllocInfo.pMappedData;
}
// Allocate descriptor set and write all bindings
bgpu.materialSet = allocateMaterialSet();
if (bgpu.materialSet) {
VkTexture* batchTex = bgpu.texture ? bgpu.texture : whiteTexture_.get();
VkDescriptorImageInfo imgInfo = batchTex->descriptorInfo();
VkDescriptorBufferInfo matBufInfo{};
matBufInfo.buffer = bgpu.materialUBO;
matBufInfo.offset = 0;
matBufInfo.range = sizeof(M2MaterialUBO);
VkWriteDescriptorSet writes[2] = {};
// binding 0: texture
writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writes[0].dstSet = bgpu.materialSet;
writes[0].dstBinding = 0;
writes[0].descriptorCount = 1;
writes[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
writes[0].pImageInfo = &imgInfo;
// binding 2: M2Material UBO
writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writes[1].dstSet = bgpu.materialSet;
writes[1].dstBinding = 2;
writes[1].descriptorCount = 1;
writes[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
writes[1].pBufferInfo = &matBufInfo;
vkUpdateDescriptorSets(vkCtx_->getDevice(), 2, writes, 0, nullptr);
}
}
// Pre-compute available LOD levels to avoid per-instance batch iteration
gpuModel.availableLODs = 0;
for (const auto& b : gpuModel.batches) {
if (b.submeshLevel < 8) gpuModel.availableLODs |= (1u << b.submeshLevel);
}
models[modelId] = std::move(gpuModel);
spatialIndexDirty_ = true; // Map may have rehashed — refresh cachedModel pointers
LOG_DEBUG("Loaded M2 model: ", model.name, " (", models[modelId].vertexCount, " vertices, ",
models[modelId].indexCount / 3, " triangles, ", models[modelId].batches.size(), " batches)");
return true;
}
} // namespace rendering
} // namespace wowee