mirror of
https://github.com/exo-explore/exo.git
synced 2026-02-24 10:18:12 -05:00
Compare commits
3 Commits
main
...
leo/log-ki
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
00fc33f260 | ||
|
|
93a0f5b7d5 | ||
|
|
6e86e0386e |
@@ -95,7 +95,7 @@
|
||||
{#if showHome}
|
||||
<button
|
||||
onclick={handleHome}
|
||||
class="text-sm text-white/70 hover:text-exo-yellow transition-colors tracking-wider uppercase flex items-center gap-2 cursor-pointer"
|
||||
class="text-sm text-exo-light-gray hover:text-exo-yellow transition-colors tracking-wider uppercase flex items-center gap-2 cursor-pointer"
|
||||
title="Back to topology view"
|
||||
>
|
||||
<svg
|
||||
@@ -116,7 +116,7 @@
|
||||
{/if}
|
||||
<a
|
||||
href="/#/downloads"
|
||||
class="text-sm text-white/70 hover:text-exo-yellow transition-colors tracking-wider uppercase flex items-center gap-2 cursor-pointer"
|
||||
class="text-sm text-exo-light-gray hover:text-exo-yellow transition-colors tracking-wider uppercase flex items-center gap-2 cursor-pointer"
|
||||
title="View downloads overview"
|
||||
>
|
||||
{#if downloadProgress}
|
||||
|
||||
@@ -620,6 +620,34 @@ class DeepSeekShardingStrategy(TensorParallelShardingStrategy):
|
||||
if on_layer_loaded is not None:
|
||||
on_layer_loaded(i, total)
|
||||
|
||||
def log_info(stuff: nn.Module, name: str):
|
||||
logger.info(f"Info for {name}:")
|
||||
|
||||
weights = stuff.weight
|
||||
logger.info(f"Weights: {weights.shape} {weights.dtype}")
|
||||
|
||||
if hasattr(stuff, "scales"):
|
||||
scales = stuff.scales
|
||||
logger.info(f"Scales: {scales.shape} {scales.dtype}")
|
||||
else:
|
||||
logger.info("Scales: None")
|
||||
|
||||
if hasattr(stuff, "biases"):
|
||||
biases = stuff.biases
|
||||
logger.info(f"Biases: {biases.shape} {biases.dtype}")
|
||||
else:
|
||||
logger.info("Biases: None")
|
||||
|
||||
if i == 9:
|
||||
if getattr(layer.mlp, "shared_experts", None) is not None:
|
||||
log_info(
|
||||
layer.mlp.shared_experts.down_proj, "Shared experts Down Proj"
|
||||
)
|
||||
|
||||
log_info(layer.mlp.switch_mlp.gate_proj, "Switch MLP Gate Proj")
|
||||
|
||||
log_info(layer.self_attn.o_proj, "Self Attn O_proj")
|
||||
|
||||
return model
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user