mirror of
https://github.com/exo-explore/exo.git
synced 2026-02-24 10:18:12 -05:00
Compare commits
3 Commits
bump-versi
...
leo/log-ki
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
00fc33f260 | ||
|
|
93a0f5b7d5 | ||
|
|
6e86e0386e |
@@ -620,6 +620,34 @@ class DeepSeekShardingStrategy(TensorParallelShardingStrategy):
|
||||
if on_layer_loaded is not None:
|
||||
on_layer_loaded(i, total)
|
||||
|
||||
def log_info(stuff: nn.Module, name: str):
|
||||
logger.info(f"Info for {name}:")
|
||||
|
||||
weights = stuff.weight
|
||||
logger.info(f"Weights: {weights.shape} {weights.dtype}")
|
||||
|
||||
if hasattr(stuff, "scales"):
|
||||
scales = stuff.scales
|
||||
logger.info(f"Scales: {scales.shape} {scales.dtype}")
|
||||
else:
|
||||
logger.info("Scales: None")
|
||||
|
||||
if hasattr(stuff, "biases"):
|
||||
biases = stuff.biases
|
||||
logger.info(f"Biases: {biases.shape} {biases.dtype}")
|
||||
else:
|
||||
logger.info("Biases: None")
|
||||
|
||||
if i == 9:
|
||||
if getattr(layer.mlp, "shared_experts", None) is not None:
|
||||
log_info(
|
||||
layer.mlp.shared_experts.down_proj, "Shared experts Down Proj"
|
||||
)
|
||||
|
||||
log_info(layer.mlp.switch_mlp.gate_proj, "Switch MLP Gate Proj")
|
||||
|
||||
log_info(layer.self_attn.o_proj, "Self Attn O_proj")
|
||||
|
||||
return model
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user