Compare commits

..

3 Commits

Author SHA1 Message Date
Ryuichi Leo Takashige
00fc33f260 add some logs for david 2026-02-24 12:15:26 +00:00
Ryuichi Leo Takashige
93a0f5b7d5 add some logs for david 2026-02-24 12:12:38 +00:00
Ryuichi Leo Takashige
6e86e0386e add some logs for david 2026-02-24 12:08:08 +00:00
2 changed files with 30 additions and 2 deletions

View File

@@ -95,7 +95,7 @@
{#if showHome}
<button
onclick={handleHome}
class="text-sm text-white/70 hover:text-exo-yellow transition-colors tracking-wider uppercase flex items-center gap-2 cursor-pointer"
class="text-sm text-exo-light-gray hover:text-exo-yellow transition-colors tracking-wider uppercase flex items-center gap-2 cursor-pointer"
title="Back to topology view"
>
<svg
@@ -116,7 +116,7 @@
{/if}
<a
href="/#/downloads"
class="text-sm text-white/70 hover:text-exo-yellow transition-colors tracking-wider uppercase flex items-center gap-2 cursor-pointer"
class="text-sm text-exo-light-gray hover:text-exo-yellow transition-colors tracking-wider uppercase flex items-center gap-2 cursor-pointer"
title="View downloads overview"
>
{#if downloadProgress}

View File

@@ -620,6 +620,34 @@ class DeepSeekShardingStrategy(TensorParallelShardingStrategy):
if on_layer_loaded is not None:
on_layer_loaded(i, total)
def log_info(stuff: nn.Module, name: str):
logger.info(f"Info for {name}:")
weights = stuff.weight
logger.info(f"Weights: {weights.shape} {weights.dtype}")
if hasattr(stuff, "scales"):
scales = stuff.scales
logger.info(f"Scales: {scales.shape} {scales.dtype}")
else:
logger.info("Scales: None")
if hasattr(stuff, "biases"):
biases = stuff.biases
logger.info(f"Biases: {biases.shape} {biases.dtype}")
else:
logger.info("Biases: None")
if i == 9:
if getattr(layer.mlp, "shared_experts", None) is not None:
log_info(
layer.mlp.shared_experts.down_proj, "Shared experts Down Proj"
)
log_info(layer.mlp.switch_mlp.gate_proj, "Switch MLP Gate Proj")
log_info(layer.self_attn.o_proj, "Self Attn O_proj")
return model