diff --git a/llama/patches/0024-GPU-discovery-enhancements.patch b/llama/patches/0024-GPU-discovery-enhancements.patch
index 11106f4e7..6e4ef2394 100644
--- a/llama/patches/0024-GPU-discovery-enhancements.patch
+++ b/llama/patches/0024-GPU-discovery-enhancements.patch
@@ -20,10 +20,10 @@ fix vulkan PCI ID and ID handling
  ggml/src/ggml-cuda/vendors/hip.h     |   3 +
  ggml/src/ggml-impl.h                 |   8 +
  ggml/src/ggml-metal/ggml-metal.cpp   |   2 +
- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 169 ++++++++-
- ggml/src/mem_hip.cpp                 | 529 +++++++++++++++++++++++++++
- ggml/src/mem_nvml.cpp                | 209 +++++++++++
- 9 files changed, 976 insertions(+), 17 deletions(-)
+ ggml/src/ggml-vulkan/ggml-vulkan.cpp | 169 +++++++-
+ ggml/src/mem_hip.cpp                 | 558 +++++++++++++++++++++++++++
+ ggml/src/mem_nvml.cpp                | 209 ++++++++++
+ 9 files changed, 1005 insertions(+), 17 deletions(-)
  create mode 100644 ggml/src/mem_hip.cpp
  create mode 100644 ggml/src/mem_nvml.cpp
 
@@ -58,7 +58,7 @@ index d55aed348..99ae293cc 100644
  
  set_target_properties(ggml-base PROPERTIES
 diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
-index 6852d2e20..48cdb1dcf 100644
+index 6852d2e20..334a30135 100644
 --- a/ggml/src/ggml-cuda/ggml-cuda.cu
 +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
 @@ -267,6 +267,16 @@ static ggml_cuda_device_info ggml_cuda_init() {
@@ -109,7 +109,7 @@ index 6852d2e20..48cdb1dcf 100644
 +
 +#if defined(GGML_USE_HIP)
 +    if (ggml_hip_mgmt_init() == 0) {
-+        int status = ggml_hip_get_device_memory(ctx->pci_bus_id.c_str(), free, total);
++        int status = ggml_hip_get_device_memory(ctx->pci_bus_id.c_str(), free, total, ctx->integrated != 0);
 +        if (status == 0) {
 +            GGML_LOG_DEBUG("%s device %s utilizing AMD specific memory reporting free: %zu total: %zu\n", __func__, ctx->pci_bus_id.c_str(), *free, *total);
 +            ggml_hip_mgmt_release();
@@ -204,7 +204,7 @@ index 4e162258d..d89e35a8e 100644
  #define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled
  #define cudaErrorPeerAccessNotEnabled hipErrorPeerAccessNotEnabled
 diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h
-index fe57d4c58..1c07e767a 100644
+index fe57d4c58..dba8f4695 100644
 --- a/ggml/src/ggml-impl.h
 +++ b/ggml/src/ggml-impl.h
 @@ -677,6 +677,14 @@ static inline bool ggml_can_fuse_subgraph(const struct ggml_cgraph * cgraph,
@@ -216,7 +216,7 @@ index fe57d4c58..1c07e767a 100644
 +GGML_API int ggml_nvml_get_device_memory(const char *uuid, size_t *free, size_t *total);
 +GGML_API void ggml_nvml_release();
 +GGML_API int ggml_hip_mgmt_init();
-+GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total);
++GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total, bool is_integrated_gpu);
 +GGML_API void ggml_hip_mgmt_release();
 +
  #ifdef __cplusplus
@@ -243,7 +243,7 @@ index ba95b4acc..f6f8f7a10 100644
          /* .async                 = */ true,
          /* .host_buffer           = */ false,
 diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
-index 5349bce24..d43d46d1d 100644
+index 5349bce24..0103fd03a 100644
 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
 +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
 @@ -236,6 +236,7 @@ class vk_memory_logger;
@@ -334,7 +334,7 @@ index 5349bce24..d43d46d1d 100644
 +        switch (props2.properties.vendorID) {
 +        case VK_VENDOR_ID_AMD:
 +            if (ggml_hip_mgmt_init() == 0) {
-+                int status = ggml_hip_get_device_memory(ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), free, total);
++                int status = ggml_hip_get_device_memory(ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), free, total, ctx->is_integrated_gpu);
 +                if (status == 0) {
 +                    GGML_LOG_DEBUG("%s device %s utilizing AMD specific memory reporting free: %zu total: %zu\n", __func__, ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), *free, *total);
 +                    ggml_hip_mgmt_release();
@@ -505,10 +505,10 @@ index 5349bce24..d43d46d1d 100644
          }
 diff --git a/ggml/src/mem_hip.cpp b/ggml/src/mem_hip.cpp
 new file mode 100644
-index 000000000..c1949b899
+index 000000000..23c765806
 --- /dev/null
 +++ b/ggml/src/mem_hip.cpp
-@@ -0,0 +1,529 @@
+@@ -0,0 +1,558 @@
 +#include "ggml.h"
 +#include "ggml-impl.h"
 +
@@ -842,7 +842,7 @@ index 000000000..c1949b899
 +    if (gpus != NULL) gpus->pVtbl->Release(gpus); \
 +    if (gpu != NULL) gpu->pVtbl->Release(gpu)
 +
-+int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
++int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total, bool is_integrated_gpu) {
 +    std::lock_guard<std::mutex> lock(ggml_adlx_lock);
 +    if (adlx.handle == NULL) {
 +        GGML_LOG_INFO("%s ADLX was not initialized\n", __func__);
@@ -966,13 +966,16 @@ index 000000000..c1949b899
 +    return 0;
 +}
 +void ggml_hip_mgmt_release() {}
-+int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
++int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total, bool is_integrated_gpu) {
 +    GGML_LOG_INFO("%s searching for device %s\n", __func__, id);
 +    const std::string drmDeviceGlob = "/sys/class/drm/card*/device/uevent";
 +    const std::string drmTotalMemoryFile = "mem_info_vram_total";
 +    const std::string drmUsedMemoryFile = "mem_info_vram_used";
++    const std::string drmGTTTotalMemoryFile = "mem_info_gtt_total";
++    const std::string drmGTTUsedMemoryFile = "mem_info_gtt_used";
 +    const std::string drmUeventPCISlotLabel = "PCI_SLOT_NAME=";
 +
++
 +    glob_t glob_result;
 +    glob(drmDeviceGlob.c_str(), GLOB_NOSORT, NULL, &glob_result);
 +
@@ -1006,7 +1009,6 @@ index 000000000..c1949b899
 +
 +                    uint64_t memory;
 +                    totalFileStream >> memory;
-+                    *total = memory;
 +
 +                    std::string usedFile = dir + "/" + drmUsedMemoryFile;
 +                    std::ifstream usedFileStream(usedFile.c_str());
@@ -1019,6 +1021,33 @@ index 000000000..c1949b899
 +
 +                    uint64_t memoryUsed;
 +                    usedFileStream >> memoryUsed;
++
++                    if (is_integrated_gpu) {
++                        std::string totalFile = dir + "/" + drmGTTTotalMemoryFile;
++                        std::ifstream totalFileStream(totalFile.c_str());
++                        if (!totalFileStream.is_open()) {
++                            GGML_LOG_DEBUG("%s Failed to read sysfs node %s\n", __func__, totalFile.c_str());
++                            file.close();
++                            globfree(&glob_result);
++                            return 1;
++                        }
++                        uint64_t gtt;
++                        totalFileStream >> gtt;
++                        std::string usedFile = dir + "/" + drmGTTUsedMemoryFile;
++                        std::ifstream usedFileStream(usedFile.c_str());
++                        if (!usedFileStream.is_open()) {
++                            GGML_LOG_DEBUG("%s Failed to read sysfs node %s\n", __func__, usedFile.c_str());
++                            file.close();
++                            globfree(&glob_result);
++                            return 1;
++                        }
++                        uint64_t gttUsed;
++                        usedFileStream >> gttUsed;
++                        memory += gtt;
++                        memoryUsed += gttUsed;
++                    }
++
++                    *total = memory;
 +                    *free = memory - memoryUsed;
 +
 +                    file.close();
diff --git a/llama/patches/0028-Add-memory-detection-using-DXGI-PDH.patch b/llama/patches/0028-Add-memory-detection-using-DXGI-PDH.patch
index d45e4ec75..e7bca2de0 100644
--- a/llama/patches/0028-Add-memory-detection-using-DXGI-PDH.patch
+++ b/llama/patches/0028-Add-memory-detection-using-DXGI-PDH.patch
@@ -24,12 +24,12 @@ index 99ae293cc..9a134b7af 100644
  
  set_target_properties(ggml-base PROPERTIES
 diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h
-index 1c07e767a..0da3e065b 100644
+index dba8f4695..7e17032c7 100644
 --- a/ggml/src/ggml-impl.h
 +++ b/ggml/src/ggml-impl.h
 @@ -684,6 +684,9 @@ GGML_API void ggml_nvml_release();
  GGML_API int ggml_hip_mgmt_init();
- GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total);
+ GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total, bool is_integrated_gpu);
  GGML_API void ggml_hip_mgmt_release();
 +GGML_API int ggml_dxgi_pdh_init();
 +GGML_API int ggml_dxgi_pdh_get_device_memory(const char* luid, size_t *free, size_t *total, bool is_integrated_gpu);
@@ -38,7 +38,7 @@ index 1c07e767a..0da3e065b 100644
  #ifdef __cplusplus
  }
 diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
-index d43d46d1d..df79f9f79 100644
+index 0103fd03a..9cc4ebdef 100644
 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
 +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
 @@ -74,6 +74,7 @@ DispatchLoaderDynamic & ggml_vk_default_dispatcher();
diff --git a/llama/patches/0029-ggml-cuda-skip-large-batches.patch b/llama/patches/0029-ggml-cuda-skip-large-batches.patch
index a1005c580..483c56537 100644
--- a/llama/patches/0029-ggml-cuda-skip-large-batches.patch
+++ b/llama/patches/0029-ggml-cuda-skip-large-batches.patch
@@ -10,7 +10,7 @@ fallback to cpu
  1 file changed, 3 insertions(+)
 
 diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
-index 48cdb1dcf..3102d7ea7 100644
+index 334a30135..5c9dfd032 100644
 --- a/ggml/src/ggml-cuda/ggml-cuda.cu
 +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
 @@ -4633,6 +4633,9 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
diff --git a/llama/patches/0031-fix-bakllava-regression.patch b/llama/patches/0030-fix-bakllava-regression.patch
similarity index 100%
rename from llama/patches/0031-fix-bakllava-regression.patch
rename to llama/patches/0030-fix-bakllava-regression.patch
diff --git a/llama/patches/0030-win-exit-instead-of-abort.patch b/llama/patches/0031-win-exit-instead-of-abort.patch
similarity index 100%
rename from llama/patches/0030-win-exit-instead-of-abort.patch
rename to llama/patches/0031-win-exit-instead-of-abort.patch
diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
index 3102d7ea7..5c9dfd032 100644
--- a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -4436,7 +4436,7 @@ static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t *
 
 #if defined(GGML_USE_HIP)
     if (ggml_hip_mgmt_init() == 0) {
-        int status = ggml_hip_get_device_memory(ctx->pci_bus_id.c_str(), free, total);
+        int status = ggml_hip_get_device_memory(ctx->pci_bus_id.c_str(), free, total, ctx->integrated != 0);
         if (status == 0) {
             GGML_LOG_DEBUG("%s device %s utilizing AMD specific memory reporting free: %zu total: %zu\n", __func__, ctx->pci_bus_id.c_str(), *free, *total);
             ggml_hip_mgmt_release();
diff --git a/ml/backend/ggml/ggml/src/ggml-impl.h b/ml/backend/ggml/ggml/src/ggml-impl.h
index 0da3e065b..7e17032c7 100644
--- a/ml/backend/ggml/ggml/src/ggml-impl.h
+++ b/ml/backend/ggml/ggml/src/ggml-impl.h
@@ -682,7 +682,7 @@ GGML_API int ggml_nvml_init();
 GGML_API int ggml_nvml_get_device_memory(const char *uuid, size_t *free, size_t *total);
 GGML_API void ggml_nvml_release();
 GGML_API int ggml_hip_mgmt_init();
-GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total);
+GGML_API int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total, bool is_integrated_gpu);
 GGML_API void ggml_hip_mgmt_release();
 GGML_API int ggml_dxgi_pdh_init();
 GGML_API int ggml_dxgi_pdh_get_device_memory(const char* luid, size_t *free, size_t *total, bool is_integrated_gpu);
diff --git a/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index df79f9f79..9cc4ebdef 100644
--- a/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -13710,7 +13710,7 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
         switch (props2.properties.vendorID) {
         case VK_VENDOR_ID_AMD:
             if (ggml_hip_mgmt_init() == 0) {
-                int status = ggml_hip_get_device_memory(ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), free, total);
+                int status = ggml_hip_get_device_memory(ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), free, total, ctx->is_integrated_gpu);
                 if (status == 0) {
                     GGML_LOG_DEBUG("%s device %s utilizing AMD specific memory reporting free: %zu total: %zu\n", __func__, ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), *free, *total);
                     ggml_hip_mgmt_release();
diff --git a/ml/backend/ggml/ggml/src/mem_hip.cpp b/ml/backend/ggml/ggml/src/mem_hip.cpp
index c1949b899..23c765806 100644
--- a/ml/backend/ggml/ggml/src/mem_hip.cpp
+++ b/ml/backend/ggml/ggml/src/mem_hip.cpp
@@ -331,7 +331,7 @@ void ggml_hip_mgmt_release() {
     if (gpus != NULL) gpus->pVtbl->Release(gpus); \
     if (gpu != NULL) gpu->pVtbl->Release(gpu)
 
-int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
+int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total, bool is_integrated_gpu) {
     std::lock_guard<std::mutex> lock(ggml_adlx_lock);
     if (adlx.handle == NULL) {
         GGML_LOG_INFO("%s ADLX was not initialized\n", __func__);
@@ -455,13 +455,16 @@ int ggml_hip_mgmt_init() {
     return 0;
 }
 void ggml_hip_mgmt_release() {}
-int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
+int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total, bool is_integrated_gpu) {
     GGML_LOG_INFO("%s searching for device %s\n", __func__, id);
     const std::string drmDeviceGlob = "/sys/class/drm/card*/device/uevent";
     const std::string drmTotalMemoryFile = "mem_info_vram_total";
     const std::string drmUsedMemoryFile = "mem_info_vram_used";
+    const std::string drmGTTTotalMemoryFile = "mem_info_gtt_total";
+    const std::string drmGTTUsedMemoryFile = "mem_info_gtt_used";
     const std::string drmUeventPCISlotLabel = "PCI_SLOT_NAME=";
 
+
     glob_t glob_result;
     glob(drmDeviceGlob.c_str(), GLOB_NOSORT, NULL, &glob_result);
 
@@ -495,7 +498,6 @@ int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
 
                     uint64_t memory;
                     totalFileStream >> memory;
-                    *total = memory;
 
                     std::string usedFile = dir + "/" + drmUsedMemoryFile;
                     std::ifstream usedFileStream(usedFile.c_str());
@@ -508,6 +510,33 @@ int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
 
                     uint64_t memoryUsed;
                     usedFileStream >> memoryUsed;
+
+                    if (is_integrated_gpu) {
+                        std::string totalFile = dir + "/" + drmGTTTotalMemoryFile;
+                        std::ifstream totalFileStream(totalFile.c_str());
+                        if (!totalFileStream.is_open()) {
+                            GGML_LOG_DEBUG("%s Failed to read sysfs node %s\n", __func__, totalFile.c_str());
+                            file.close();
+                            globfree(&glob_result);
+                            return 1;
+                        }
+                        uint64_t gtt;
+                        totalFileStream >> gtt;
+                        std::string usedFile = dir + "/" + drmGTTUsedMemoryFile;
+                        std::ifstream usedFileStream(usedFile.c_str());
+                        if (!usedFileStream.is_open()) {
+                            GGML_LOG_DEBUG("%s Failed to read sysfs node %s\n", __func__, usedFile.c_str());
+                            file.close();
+                            globfree(&glob_result);
+                            return 1;
+                        }
+                        uint64_t gttUsed;
+                        usedFileStream >> gttUsed;
+                        memory += gtt;
+                        memoryUsed += gttUsed;
+                    }
+
+                    *total = memory;
                     *free = memory - memoryUsed;
 
                     file.close();