migrate model cards to .toml files

dashboard: add peer: true to package lock (#1162 )
this happens every time i run npm install - lets upstream it ## testing dashboard builds and renders
2026-01-16 01:51:03 -05:00 · 2026-01-15 17:07:48 +00:00 · 2026-01-15 17:01:43 +00:00 · 2026-01-15 16:51:46 +00:00 · 2026-01-15 13:21:58 +00:00
45 changed files with 1309 additions and 997 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4340,25 +4340,6 @@ dependencies = [
 "libc",
 ]

-[[package]]
-name = "system_custodian"
-version = "0.0.1"
-dependencies = [
- "delegate",
- "derive_more",
- "either",
- "extend",
- "futures",
- "futures-timer",
- "impl-trait-for-tuples",
- "keccak-const",
- "log",
- "thiserror 2.0.17",
- "tokio",
- "tracing-subscriber",
- "util",
-]
-
 [[package]]
 name = "tagptr"
 version = "0.2.0"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,7 +3,6 @@ resolver = "3"
 members = [
    "rust/networking",
    "rust/exo_pyo3_bindings",
-    "rust/system_custodian",
    "rust/util",
 ]

@@ -25,7 +24,6 @@ opt-level = 3
 [workspace.dependencies]
 ## Crate members as common dependencies
 networking = { path = "rust/networking" }
-system_custodian = { path = "rust/system_custodian" }
 util = { path = "rust/util" }

 # Proc-macro authoring tools
--- a/dashboard/package-lock.json
+++ b/dashboard/package-lock.json
@@ -863,6 +863,7 @@
 			"integrity": "sha512-oH8tXw7EZnie8FdOWYrF7Yn4IKrqTFHhXvl8YxXxbKwTMcD/5NNCryUSEXRk2ZR4ojnub0P8rNrsVGHXWqIDtA==",
 			"dev": true,
 			"license": "MIT",
+			"peer": true,
 			"dependencies": {
 				"@standard-schema/spec": "^1.0.0",
 				"@sveltejs/acorn-typescript": "^1.0.5",
@@ -902,6 +903,7 @@
 			"integrity": "sha512-Y1Cs7hhTc+a5E9Va/xwKlAJoariQyHY+5zBgCZg4PFWNYQ1nMN9sjK1zhw1gK69DuqVP++sht/1GZg1aRwmAXQ==",
 			"dev": true,
 			"license": "MIT",
+			"peer": true,
 			"dependencies": {
 				"@sveltejs/vite-plugin-svelte-inspector": "^4.0.1",
 				"debug": "^4.4.1",
@@ -1518,6 +1520,7 @@
 			"integrity": "sha512-LCCV0HdSZZZb34qifBsyWlUmok6W7ouER+oQIGBScS8EsZsQbrtFTUrDX4hOl+CS6p7cnNC4td+qrSVGSCTUfQ==",
 			"dev": true,
 			"license": "MIT",
+			"peer": true,
 			"dependencies": {
 				"undici-types": "~6.21.0"
 			}
@@ -1527,6 +1530,7 @@
 			"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
 			"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
 			"license": "MIT",
+			"peer": true,
 			"bin": {
 				"acorn": "bin/acorn"
 			},
@@ -1939,6 +1943,7 @@
 			"integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==",
 			"dev": true,
 			"license": "ISC",
+			"peer": true,
 			"engines": {
 				"node": ">=12"
 			}
@@ -2646,6 +2651,7 @@
 			"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
 			"dev": true,
 			"license": "MIT",
+			"peer": true,
 			"engines": {
 				"node": ">=12"
 			},
@@ -2833,6 +2839,7 @@
 			"resolved": "https://registry.npmjs.org/svelte/-/svelte-5.45.3.tgz",
 			"integrity": "sha512-ngKXNhNvwPzF43QqEhDOue7TQTrG09em1sd4HBxVF0Wr2gopAmdEWan+rgbdgK4fhBtSOTJO8bYU4chUG7VXZQ==",
 			"license": "MIT",
+			"peer": true,
 			"dependencies": {
 				"@jridgewell/remapping": "^2.3.4",
 				"@jridgewell/sourcemap-codec": "^1.5.0",
@@ -2977,6 +2984,7 @@
 			"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
 			"dev": true,
 			"license": "Apache-2.0",
+			"peer": true,
 			"bin": {
 				"tsc": "bin/tsc",
 				"tsserver": "bin/tsserver"
@@ -2998,6 +3006,7 @@
 			"integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
 			"dev": true,
 			"license": "MIT",
+			"peer": true,
 			"dependencies": {
 				"esbuild": "^0.25.0",
 				"fdir": "^6.4.4",
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,6 +23,7 @@ dependencies = [
    "tiktoken>=0.12.0", # required for kimi k2 tokenizer
    "hypercorn>=0.18.0",
    "openai-harmony>=0.0.8",
+    "tomlkit>=0.14.0",
 ]

 [project.scripts]
--- a/resources/model_cards/deepseek-v3.1-4bit.toml
+++ b/resources/model_cards/deepseek-v3.1-4bit.toml
@@ -0,0 +1,15 @@
+short_id = "deepseek-v3.1-4bit"
+model_id = "mlx-community/DeepSeek-V3.1-4bit"
+name = "DeepSeek V3.1 (4-bit)"
+description = "DeepSeek V3.1 is a large language model trained on the DeepSeek V3.1 dataset."
+tags = []
+
+[metadata]
+model_id = "mlx-community/DeepSeek-V3.1-4bit"
+pretty_name = "DeepSeek V3.1 (4-bit)"
+n_layers = 61
+hidden_size = 7168
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 405874409472
--- a/resources/model_cards/deepseek-v3.1-8bit.toml
+++ b/resources/model_cards/deepseek-v3.1-8bit.toml
@@ -0,0 +1,15 @@
+short_id = "deepseek-v3.1-8bit"
+model_id = "mlx-community/DeepSeek-V3.1-8bit"
+name = "DeepSeek V3.1 (8-bit)"
+description = "DeepSeek V3.1 is a large language model trained on the DeepSeek V3.1 dataset."
+tags = []
+
+[metadata]
+model_id = "mlx-community/DeepSeek-V3.1-8bit"
+pretty_name = "DeepSeek V3.1 (8-bit)"
+n_layers = 61
+hidden_size = 7168
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 765577920512
--- a/resources/model_cards/glm-4.5-air-8bit.toml
+++ b/resources/model_cards/glm-4.5-air-8bit.toml
@@ -0,0 +1,15 @@
+short_id = "glm-4.5-air-8bit"
+model_id = "mlx-community/GLM-4.5-Air-8bit"
+name = "GLM 4.5 Air 8bit"
+description = "GLM 4.5 Air 8bit"
+tags = []
+
+[metadata]
+model_id = "mlx-community/GLM-4.5-Air-8bit"
+pretty_name = "GLM 4.5 Air 8bit"
+n_layers = 46
+hidden_size = 4096
+supports_tensor = false
+
+[metadata.storage_size]
+in_bytes = 122406567936
--- a/resources/model_cards/glm-4.5-air-bf16.toml
+++ b/resources/model_cards/glm-4.5-air-bf16.toml
@@ -0,0 +1,15 @@
+short_id = "glm-4.5-air-bf16"
+model_id = "mlx-community/GLM-4.5-Air-bf16"
+name = "GLM 4.5 Air bf16"
+description = "GLM 4.5 Air bf16"
+tags = []
+
+[metadata]
+model_id = "mlx-community/GLM-4.5-Air-bf16"
+pretty_name = "GLM 4.5 Air bf16"
+n_layers = 46
+hidden_size = 4096
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 229780750336
--- a/resources/model_cards/glm-4.7-4bit.toml
+++ b/resources/model_cards/glm-4.7-4bit.toml
@@ -0,0 +1,15 @@
+short_id = "glm-4.7-4bit"
+model_id = "mlx-community/GLM-4.7-4bit"
+name = "GLM 4.7 4bit"
+description = "GLM 4.7 4bit"
+tags = []
+
+[metadata]
+model_id = "mlx-community/GLM-4.7-4bit"
+pretty_name = "GLM 4.7 4bit"
+n_layers = 91
+hidden_size = 5120
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 198556925568
--- a/resources/model_cards/glm-4.7-6bit.toml
+++ b/resources/model_cards/glm-4.7-6bit.toml
@@ -0,0 +1,15 @@
+short_id = "glm-4.7-6bit"
+model_id = "mlx-community/GLM-4.7-6bit"
+name = "GLM 4.7 6bit"
+description = "GLM 4.7 6bit"
+tags = []
+
+[metadata]
+model_id = "mlx-community/GLM-4.7-6bit"
+pretty_name = "GLM 4.7 6bit"
+n_layers = 91
+hidden_size = 5120
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 286737579648
--- a/resources/model_cards/glm-4.7-8bit-gs32.toml
+++ b/resources/model_cards/glm-4.7-8bit-gs32.toml
@@ -0,0 +1,15 @@
+short_id = "glm-4.7-8bit-gs32"
+model_id = "mlx-community/GLM-4.7-8bit-gs32"
+name = "GLM 4.7 8bit (gs32)"
+description = "GLM 4.7 8bit (gs32)"
+tags = []
+
+[metadata]
+model_id = "mlx-community/GLM-4.7-8bit-gs32"
+pretty_name = "GLM 4.7 8bit (gs32)"
+n_layers = 91
+hidden_size = 5120
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 396963397248
--- a/resources/model_cards/gpt-oss-120b-MXFP4-Q8.toml
+++ b/resources/model_cards/gpt-oss-120b-MXFP4-Q8.toml
@@ -0,0 +1,15 @@
+short_id = "gpt-oss-120b-MXFP4-Q8"
+model_id = "mlx-community/gpt-oss-120b-MXFP4-Q8"
+name = "GPT-OSS 120B (MXFP4-Q8, MLX)"
+description = "OpenAI's GPT-OSS 120B is a 117B-parameter Mixture-of-Experts model designed for high-reasoning and general-purpose use; this variant is a 4-bit MLX conversion for Apple Silicon."
+tags = []
+
+[metadata]
+model_id = "mlx-community/gpt-oss-120b-MXFP4-Q8"
+pretty_name = "GPT-OSS 120B (MXFP4-Q8, MLX)"
+n_layers = 36
+hidden_size = 2880
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 70652212224
--- a/resources/model_cards/gpt-oss-20b-4bit.toml
+++ b/resources/model_cards/gpt-oss-20b-4bit.toml
@@ -0,0 +1,15 @@
+short_id = "gpt-oss-20b-4bit"
+model_id = "mlx-community/gpt-oss-20b-MXFP4-Q4"
+name = "GPT-OSS 20B (MXFP4-Q4, MLX)"
+description = "OpenAI's GPT-OSS 20B is a medium-sized MoE model for lower-latency and local or specialized use cases; this MLX variant uses MXFP4 4-bit quantization."
+tags = []
+
+[metadata]
+model_id = "mlx-community/gpt-oss-20b-MXFP4-Q4"
+pretty_name = "GPT-OSS 20B (MXFP4-Q4, MLX)"
+n_layers = 24
+hidden_size = 2880
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 12025908224
--- a/resources/model_cards/kimi-k2-instruct-4bit.toml
+++ b/resources/model_cards/kimi-k2-instruct-4bit.toml
@@ -0,0 +1,15 @@
+short_id = "kimi-k2-instruct-4bit"
+model_id = "mlx-community/Kimi-K2-Instruct-4bit"
+name = "Kimi K2 Instruct (4-bit)"
+description = "Kimi K2 is a large language model trained on the Kimi K2 dataset."
+tags = []
+
+[metadata]
+model_id = "mlx-community/Kimi-K2-Instruct-4bit"
+pretty_name = "Kimi K2 Instruct (4-bit)"
+n_layers = 61
+hidden_size = 7168
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 620622774272
--- a/resources/model_cards/kimi-k2-thinking.toml
+++ b/resources/model_cards/kimi-k2-thinking.toml
@@ -0,0 +1,15 @@
+short_id = "kimi-k2-thinking"
+model_id = "mlx-community/Kimi-K2-Thinking"
+name = "Kimi K2 Thinking (4-bit)"
+description = "Kimi K2 Thinking is the latest, most capable version of open-source thinking model."
+tags = []
+
+[metadata]
+model_id = "mlx-community/Kimi-K2-Thinking"
+pretty_name = "Kimi K2 Thinking (4-bit)"
+n_layers = 61
+hidden_size = 7168
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 706522120192
--- a/resources/model_cards/llama-3.1-70b.toml
+++ b/resources/model_cards/llama-3.1-70b.toml
@@ -0,0 +1,15 @@
+short_id = "llama-3.1-70b"
+model_id = "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"
+name = "Llama 3.1 70B (4-bit)"
+description = "Llama 3.1 is a large language model trained on the Llama 3.1 dataset."
+tags = []
+
+[metadata]
+model_id = "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"
+pretty_name = "Llama 3.1 70B (4-bit)"
+n_layers = 80
+hidden_size = 8192
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 40652242944
--- a/resources/model_cards/llama-3.1-8b-8bit.toml
+++ b/resources/model_cards/llama-3.1-8b-8bit.toml
@@ -0,0 +1,15 @@
+short_id = "llama-3.1-8b-8bit"
+model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"
+name = "Llama 3.1 8B (8-bit)"
+description = "Llama 3.1 is a large language model trained on the Llama 3.1 dataset."
+tags = []
+
+[metadata]
+model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"
+pretty_name = "Llama 3.1 8B (8-bit)"
+n_layers = 32
+hidden_size = 4096
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 8954839040
--- a/resources/model_cards/llama-3.1-8b-bf16.toml
+++ b/resources/model_cards/llama-3.1-8b-bf16.toml
@@ -0,0 +1,15 @@
+short_id = "llama-3.1-8b-bf16"
+model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-bf16"
+name = "Llama 3.1 8B (BF16)"
+description = "Llama 3.1 is a large language model trained on the Llama 3.1 dataset."
+tags = []
+
+[metadata]
+model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-bf16"
+pretty_name = "Llama 3.1 8B (BF16)"
+n_layers = 32
+hidden_size = 4096
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 16882073600
--- a/resources/model_cards/llama-3.1-8b.toml
+++ b/resources/model_cards/llama-3.1-8b.toml
@@ -0,0 +1,15 @@
+short_id = "llama-3.1-8b"
+model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"
+name = "Llama 3.1 8B (4-bit)"
+description = "Llama 3.1 is a large language model trained on the Llama 3.1 dataset."
+tags = []
+
+[metadata]
+model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"
+pretty_name = "Llama 3.1 8B (4-bit)"
+n_layers = 32
+hidden_size = 4096
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 4637851648
--- a/resources/model_cards/llama-3.2-1b.toml
+++ b/resources/model_cards/llama-3.2-1b.toml
@@ -0,0 +1,15 @@
+short_id = "llama-3.2-1b"
+model_id = "mlx-community/Llama-3.2-1B-Instruct-4bit"
+name = "Llama 3.2 1B (4-bit)"
+description = "Llama 3.2 is a large language model trained on the Llama 3.2 dataset."
+tags = []
+
+[metadata]
+model_id = "mlx-community/Llama-3.2-1B-Instruct-4bit"
+pretty_name = "Llama 3.2 1B (4-bit)"
+n_layers = 16
+hidden_size = 2048
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 729808896
--- a/resources/model_cards/llama-3.2-3b-8bit.toml
+++ b/resources/model_cards/llama-3.2-3b-8bit.toml
@@ -0,0 +1,15 @@
+short_id = "llama-3.2-3b-8bit"
+model_id = "mlx-community/Llama-3.2-3B-Instruct-8bit"
+name = "Llama 3.2 3B (8-bit)"
+description = "Llama 3.2 is a large language model trained on the Llama 3.2 dataset."
+tags = []
+
+[metadata]
+model_id = "mlx-community/Llama-3.2-3B-Instruct-8bit"
+pretty_name = "Llama 3.2 3B (8-bit)"
+n_layers = 28
+hidden_size = 3072
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 3501195264
--- a/resources/model_cards/llama-3.2-3b.toml
+++ b/resources/model_cards/llama-3.2-3b.toml
@@ -0,0 +1,15 @@
+short_id = "llama-3.2-3b"
+model_id = "mlx-community/Llama-3.2-3B-Instruct-4bit"
+name = "Llama 3.2 3B (4-bit)"
+description = "Llama 3.2 is a large language model trained on the Llama 3.2 dataset."
+tags = []
+
+[metadata]
+model_id = "mlx-community/Llama-3.2-3B-Instruct-4bit"
+pretty_name = "Llama 3.2 3B (4-bit)"
+n_layers = 28
+hidden_size = 3072
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 1863319552
--- a/resources/model_cards/llama-3.3-70b-8bit.toml
+++ b/resources/model_cards/llama-3.3-70b-8bit.toml
@@ -0,0 +1,15 @@
+short_id = "llama-3.3-70b-8bit"
+model_id = "mlx-community/Llama-3.3-70B-Instruct-8bit"
+name = "Llama 3.3 70B (8-bit)"
+description = "The Meta Llama 3.3 multilingual large language model (LLM) is an instruction tuned generative model in 70B (text in/text out)"
+tags = []
+
+[metadata]
+model_id = "mlx-community/Llama-3.3-70B-Instruct-8bit"
+pretty_name = "Llama 3.3 70B (8-bit)"
+n_layers = 80
+hidden_size = 8192
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 76799803392
--- a/resources/model_cards/llama-3.3-70b-fp16.toml
+++ b/resources/model_cards/llama-3.3-70b-fp16.toml
@@ -0,0 +1,15 @@
+short_id = "llama-3.3-70b-fp16"
+model_id = "mlx-community/llama-3.3-70b-instruct-fp16"
+name = "Llama 3.3 70B (FP16)"
+description = "The Meta Llama 3.3 multilingual large language model (LLM) is an instruction tuned generative model in 70B (text in/text out)"
+tags = []
+
+[metadata]
+model_id = "mlx-community/llama-3.3-70b-instruct-fp16"
+pretty_name = "Llama 3.3 70B (FP16)"
+n_layers = 80
+hidden_size = 8192
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 144383672320
--- a/resources/model_cards/llama-3.3-70b.toml
+++ b/resources/model_cards/llama-3.3-70b.toml
@@ -0,0 +1,15 @@
+short_id = "llama-3.3-70b"
+model_id = "mlx-community/Llama-3.3-70B-Instruct-4bit"
+name = "Llama 3.3 70B (4-bit)"
+description = "The Meta Llama 3.3 multilingual large language model (LLM) is an instruction tuned generative model in 70B (text in/text out)"
+tags = []
+
+[metadata]
+model_id = "mlx-community/Llama-3.3-70B-Instruct-4bit"
+pretty_name = "Llama 3.3 70B"
+n_layers = 80
+hidden_size = 8192
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 40652242944
--- a/resources/model_cards/minimax-m2.1-3bit.toml
+++ b/resources/model_cards/minimax-m2.1-3bit.toml
@@ -0,0 +1,15 @@
+short_id = "minimax-m2.1-3bit"
+model_id = "mlx-community/MiniMax-M2.1-3bit"
+name = "MiniMax M2.1 3bit"
+description = "MiniMax M2.1 3bit"
+tags = []
+
+[metadata]
+model_id = "mlx-community/MiniMax-M2.1-3bit"
+pretty_name = "MiniMax M2.1 3bit"
+n_layers = 61
+hidden_size = 3072
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 100086644736
--- a/resources/model_cards/minimax-m2.1-8bit.toml
+++ b/resources/model_cards/minimax-m2.1-8bit.toml
@@ -0,0 +1,15 @@
+short_id = "minimax-m2.1-8bit"
+model_id = "mlx-community/MiniMax-M2.1-8bit"
+name = "MiniMax M2.1 8bit"
+description = "MiniMax M2.1 8bit"
+tags = []
+
+[metadata]
+model_id = "mlx-community/MiniMax-M2.1-8bit"
+pretty_name = "MiniMax M2.1 8bit"
+n_layers = 61
+hidden_size = 3072
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 242986745856
--- a/resources/model_cards/qwen3-0.6b-8bit.toml
+++ b/resources/model_cards/qwen3-0.6b-8bit.toml
@@ -0,0 +1,15 @@
+short_id = "qwen3-0.6b-8bit"
+model_id = "mlx-community/Qwen3-0.6B-8bit"
+name = "Qwen3 0.6B (8-bit)"
+description = "Qwen3 0.6B is a large language model trained on the Qwen3 0.6B dataset."
+tags = []
+
+[metadata]
+model_id = "mlx-community/Qwen3-0.6B-8bit"
+pretty_name = "Qwen3 0.6B (8-bit)"
+n_layers = 28
+hidden_size = 1024
+supports_tensor = false
+
+[metadata.storage_size]
+in_bytes = 698351616
--- a/resources/model_cards/qwen3-0.6b.toml
+++ b/resources/model_cards/qwen3-0.6b.toml
@@ -0,0 +1,15 @@
+short_id = "qwen3-0.6b"
+model_id = "mlx-community/Qwen3-0.6B-4bit"
+name = "Qwen3 0.6B (4-bit)"
+description = "Qwen3 0.6B is a large language model trained on the Qwen3 0.6B dataset."
+tags = []
+
+[metadata]
+model_id = "mlx-community/Qwen3-0.6B-4bit"
+pretty_name = "Qwen3 0.6B (4-bit)"
+n_layers = 28
+hidden_size = 1024
+supports_tensor = false
+
+[metadata.storage_size]
+in_bytes = 342884352
--- a/resources/model_cards/qwen3-235b-a22b-4bit.toml
+++ b/resources/model_cards/qwen3-235b-a22b-4bit.toml
@@ -0,0 +1,15 @@
+short_id = "qwen3-235b-a22b-4bit"
+model_id = "mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit"
+name = "Qwen3 235B A22B (4-bit)"
+description = "Qwen3 235B (Active 22B) is a large language model trained on the Qwen3 235B dataset."
+tags = []
+
+[metadata]
+model_id = "mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit"
+pretty_name = "Qwen3 235B A22B (4-bit)"
+n_layers = 94
+hidden_size = 4096
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 141733920768
--- a/resources/model_cards/qwen3-235b-a22b-8bit.toml
+++ b/resources/model_cards/qwen3-235b-a22b-8bit.toml
@@ -0,0 +1,15 @@
+short_id = "qwen3-235b-a22b-8bit"
+model_id = "mlx-community/Qwen3-235B-A22B-Instruct-2507-8bit"
+name = "Qwen3 235B A22B (8-bit)"
+description = "Qwen3 235B (Active 22B) is a large language model trained on the Qwen3 235B dataset."
+tags = []
+
+[metadata]
+model_id = "mlx-community/Qwen3-235B-A22B-Instruct-2507-8bit"
+pretty_name = "Qwen3 235B A22B (8-bit)"
+n_layers = 94
+hidden_size = 4096
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 268435456000
--- a/resources/model_cards/qwen3-30b-8bit.toml
+++ b/resources/model_cards/qwen3-30b-8bit.toml
@@ -0,0 +1,15 @@
+short_id = "qwen3-30b-8bit"
+model_id = "mlx-community/Qwen3-30B-A3B-8bit"
+name = "Qwen3 30B A3B (8-bit)"
+description = "Qwen3 30B is a large language model trained on the Qwen3 30B dataset."
+tags = []
+
+[metadata]
+model_id = "mlx-community/Qwen3-30B-A3B-8bit"
+pretty_name = "Qwen3 30B A3B (8-bit)"
+n_layers = 48
+hidden_size = 2048
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 33279705088
--- a/resources/model_cards/qwen3-30b.toml
+++ b/resources/model_cards/qwen3-30b.toml
@@ -0,0 +1,15 @@
+short_id = "qwen3-30b"
+model_id = "mlx-community/Qwen3-30B-A3B-4bit"
+name = "Qwen3 30B A3B (4-bit)"
+description = "Qwen3 30B is a large language model trained on the Qwen3 30B dataset."
+tags = []
+
+[metadata]
+model_id = "mlx-community/Qwen3-30B-A3B-4bit"
+pretty_name = "Qwen3 30B A3B (4-bit)"
+n_layers = 48
+hidden_size = 2048
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 17612931072
--- a/resources/model_cards/qwen3-80b-a3B-4bit.toml
+++ b/resources/model_cards/qwen3-80b-a3B-4bit.toml
@@ -0,0 +1,15 @@
+short_id = "qwen3-80b-a3B-4bit"
+model_id = "mlx-community/Qwen3-Next-80B-A3B-Instruct-4bit"
+name = "Qwen3 80B A3B (4-bit)"
+description = "Qwen3 80B"
+tags = []
+
+[metadata]
+model_id = "mlx-community/Qwen3-Next-80B-A3B-Instruct-4bit"
+pretty_name = "Qwen3 80B A3B (4-bit)"
+n_layers = 48
+hidden_size = 2048
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 46976204800
--- a/resources/model_cards/qwen3-80b-a3B-8bit.toml
+++ b/resources/model_cards/qwen3-80b-a3B-8bit.toml
@@ -0,0 +1,15 @@
+short_id = "qwen3-80b-a3B-8bit"
+model_id = "mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit"
+name = "Qwen3 80B A3B (8-bit)"
+description = "Qwen3 80B"
+tags = []
+
+[metadata]
+model_id = "mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit"
+pretty_name = "Qwen3 80B A3B (8-bit)"
+n_layers = 48
+hidden_size = 2048
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 88814387200
--- a/resources/model_cards/qwen3-80b-a3B-thinking-4bit.toml
+++ b/resources/model_cards/qwen3-80b-a3B-thinking-4bit.toml
@@ -0,0 +1,15 @@
+short_id = "qwen3-80b-a3B-thinking-4bit"
+model_id = "mlx-community/Qwen3-Next-80B-A3B-Thinking-4bit"
+name = "Qwen3 80B A3B Thinking (4-bit)"
+description = "Qwen3 80B Reasoning model"
+tags = []
+
+[metadata]
+model_id = "mlx-community/Qwen3-Next-80B-A3B-Thinking-4bit"
+pretty_name = "Qwen3 80B A3B (4-bit)"
+n_layers = 48
+hidden_size = 2048
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 88814387200
--- a/resources/model_cards/qwen3-80b-a3B-thinking-8bit.toml
+++ b/resources/model_cards/qwen3-80b-a3B-thinking-8bit.toml
@@ -0,0 +1,15 @@
+short_id = "qwen3-80b-a3B-thinking-8bit"
+model_id = "mlx-community/Qwen3-Next-80B-A3B-Thinking-8bit"
+name = "Qwen3 80B A3B Thinking (8-bit)"
+description = "Qwen3 80B Reasoning model"
+tags = []
+
+[metadata]
+model_id = "mlx-community/Qwen3-Next-80B-A3B-Thinking-8bit"
+pretty_name = "Qwen3 80B A3B (8-bit)"
+n_layers = 48
+hidden_size = 2048
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 88814387200
--- a/resources/model_cards/qwen3-coder-480b-a35b-4bit.toml
+++ b/resources/model_cards/qwen3-coder-480b-a35b-4bit.toml
@@ -0,0 +1,15 @@
+short_id = "qwen3-coder-480b-a35b-4bit"
+model_id = "mlx-community/Qwen3-Coder-480B-A35B-Instruct-4bit"
+name = "Qwen3 Coder 480B A35B (4-bit)"
+description = "Qwen3 Coder 480B (Active 35B) is a large language model trained on the Qwen3 Coder 480B dataset."
+tags = []
+
+[metadata]
+model_id = "mlx-community/Qwen3-Coder-480B-A35B-Instruct-4bit"
+pretty_name = "Qwen3 Coder 480B A35B (4-bit)"
+n_layers = 62
+hidden_size = 6144
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 289910292480
--- a/resources/model_cards/qwen3-coder-480b-a35b-8bit.toml
+++ b/resources/model_cards/qwen3-coder-480b-a35b-8bit.toml
@@ -0,0 +1,15 @@
+short_id = "qwen3-coder-480b-a35b-8bit"
+model_id = "mlx-community/Qwen3-Coder-480B-A35B-Instruct-8bit"
+name = "Qwen3 Coder 480B A35B (8-bit)"
+description = "Qwen3 Coder 480B (Active 35B) is a large language model trained on the Qwen3 Coder 480B dataset."
+tags = []
+
+[metadata]
+model_id = "mlx-community/Qwen3-Coder-480B-A35B-Instruct-8bit"
+pretty_name = "Qwen3 Coder 480B A35B (8-bit)"
+n_layers = 62
+hidden_size = 6144
+supports_tensor = true
+
+[metadata.storage_size]
+in_bytes = 579820584960
--- a/rust/parts.nix
+++ b/rust/parts.nix
@@ -81,20 +81,6 @@

      config = {
        packages = {
-          # The system_custodian binary
-          system_custodian = craneLib.buildPackage (
-            commonArgs
-            // {
-              inherit cargoArtifacts;
-              cargoExtraArgs = "-p system_custodian";
-
-              meta = {
-                description = "System custodian daemon for exo";
-                mainProgram = "system_custodian";
-              };
-            }
-          );
-
          # Python bindings wheel via maturin
          exo_pyo3_bindings = craneLib.buildPackage (
            commonArgs
--- a/rust/system_custodian/Cargo.toml
+++ b/rust/system_custodian/Cargo.toml
@@ -1,47 +0,0 @@
-[package]
-name = "system_custodian"
-version = { workspace = true }
-edition = { workspace = true }
-publish = false
-
-[lib]
-doctest = false
-name = "system_custodian"
-path = "src/lib.rs"
-
-[[bin]]
-path = "src/bin/main.rs"
-name = "system_custodian"
-doc = false
-
-[lints]
-workspace = true
-
-[dependencies]
-# datastructures
-either = { workspace = true }
-
-# macro dependencies
-extend = { workspace = true }
-delegate = { workspace = true }
-impl-trait-for-tuples = { workspace = true }
-derive_more = { workspace = true }
-
-# async
-tokio = { workspace = true, features = ["full"] }
-futures = { workspace = true }
-futures-timer = { workspace = true }
-
-# utility dependencies
-util = { workspace = true }
-thiserror = { workspace = true }
-#internment = { workspace = true }
-#recursion = { workspace = true }
-#generativity = { workspace = true }
-#itertools = { workspace = true }
-tracing-subscriber = { version = "0.3.19", features = ["default", "env-filter"] }
-keccak-const = { workspace = true }
-
-# tracing/logging
-log = { workspace = true }
-
--- a/rust/system_custodian/src/bin/main.rs
+++ b/rust/system_custodian/src/bin/main.rs
@@ -1,4 +0,0 @@
-//! TODO: documentation
-//!
-
-fn main() {}
--- a/rust/system_custodian/src/lib.rs
+++ b/rust/system_custodian/src/lib.rs
@@ -1,69 +0,0 @@
-//! This crate defines the logic of, and ways to interact with, Exo's **_System Custodian_** daemon.
-//!
-//! The **_System Custodian_** daemon is supposed to be a long-living process that precedes the
-//! launch of the Exo application, and responsible for ensuring the system (configuration, settings,
-//! etc.) is in an appropriate state to facilitate the running of Exo application.
-//! The **_System Custodian_** daemon shall expose a [D-Bus](https://www.freedesktop.org/wiki/Software/dbus/)
-//! service which Exo application use to _control & query_ it.
-//!
-//! # Lifecycle
-//! When the Exo application starts, it will _wake_ the **_System Custodian_** daemon for the
-//! duration of its lifetime, and after it has terminated the daemon will go back to sleep. When
-//! the daemon wakes up, it will configure the system into a state suitable for the Exo Application;
-//! When the daemon goes to sleep, it will revert those changes as much as it can in case they were
-//! destructive to the user's pre-existing configurations.
-//!
-//! # Responsibilities
-//! TODO: these are purely on MacOS, but change to be more broad
-//! The **_System Custodian_** daemon is responsible for using System Configuration framework to
-//!  1. duplicate the current network set
-//!  2. modify existing services to turn on IPv6 if not there
-//!  3. remove any bridge services & add any missing services that AREN'T bridge
-//! TODO: In the future:
-//!  1. run a dummy AWDL service to [allow for macOS peer-to-peer wireless networking](https://yggdrasil-network.github.io/2019/08/19/awdl.html)
-//!  2. toggle some GPU/memory configurations to speed up GPU (ask Alex what those configurations are)
-//!  3. if we ever decide to provide our **own network interfaces** that abstract over some userland
-//!     logic, this would be the place to spin that up.
-//!
-//! Then it will watch the SCDynamicStore for:
-//!  1. all __actual__ network interfaces -> collect information on them e.g. their BSD name, MAC
-//!     address, MTU, IPv6 addresses, etc. -> and set up watchers/notifiers to inform the DBus
-//!     interface of any changes
-//!  2. watch for any __undesirable__ changes to configuration and revert it
-//!
-//! It should somehow (probably through system sockets and/or BSD interface) trigger IPv6 NDP on
-//! each of the interfaces & also listen to/query for any changes on the OS routing cache??
-//! Basically emulate the `ping6 ff02::1%enX` and `ndp -an` commands BUT BETTER!!!
-//!  1. all that info should coalesce back to the overall state colleted -> should be queryable
-//!     over D-Bus
-//! TODO:
-//!  1. we might potentially add to this step a handshake of some kind...? To ensure that we can
-//!     ACTUALLY communicate with that machine over that link over e.g. TCP, UDP, etc. Will the
-//!     handshake require to know Node ID? Will the handshake require heartbeats? Who knows...
-//!  2. if we ever decide to write proprietary L2/L3 protocols for quicker communication,
-//!     e.g. [AF_NDRV](https://www.zerotier.com/blog/how-zerotier-eliminated-kernel-extensions-on-macos/)
-//!     for raw ethernet frame communication, or even a [custom thunderbolt PCIe driver](https://developer.apple.com/documentation/pcidriverkit/creating-custom-pcie-drivers-for-thunderbolt-devices),
-//!     then this would be the place to carry out discovery and propper handshakes with devices
-//!     on the other end of the link.
-//!
-
-// enable Rust-unstable features for convenience
-#![feature(trait_alias)]
-#![feature(stmt_expr_attributes)]
-#![feature(type_alias_impl_trait)]
-#![feature(specialization)]
-#![feature(unboxed_closures)]
-#![feature(const_trait_impl)]
-#![feature(fn_traits)]
-
-pub(crate) mod private {
-    // sealed traits support
-    pub trait Sealed {}
-    impl<T: ?Sized> Sealed for T {}
-}
-
-/// Namespace for all the type/trait aliases used by this crate.
-pub(crate) mod alias {}
-
-/// Namespace for crate-wide extension traits/methods
-pub(crate) mod ext {}
--- a/src/exo/shared/models/model_cards.py
+++ b/src/exo/shared/models/model_cards.py
@@ -1,3 +1,6 @@
+from anyio import Path, open_file
+import tomlkit 
+
 from exo.shared.types.memory import Memory
 from exo.shared.types.models import ModelId, ModelMetadata
 from exo.utils.pydantic_ext import CamelCaseModel
@@ -11,35 +14,24 @@ class ModelCard(CamelCaseModel):
    tags: list[str]
    metadata: ModelMetadata

+    @staticmethod
+    async def load(path: Path) -> "ModelCard":
+        async with await open_file(path) as f:
+            data = await f.read()
+            py = tomlkit.loads(data)
+            return ModelCard.model_validate(py)
+
+    async def save(self, path: Path):
+        async with await open_file(path, "w") as f:
+            py = self.model_dump()
+            data = tomlkit.dumps(py) # pyright: ignore[reportUnknownMemberType]
+            await f.write(data)
+
+
+

 MODEL_CARDS: dict[str, ModelCard] = {
    # deepseek v3
-    # "deepseek-v3-0324:4bit": ModelCard(
-    #     short_id="deepseek-v3-0324:4bit",
-    #     model_id="mlx-community/DeepSeek-V3-0324-4bit",
-    #     name="DeepSeek V3 0324 (4-bit)",
-    #     description="""DeepSeek V3 is a large language model trained on the DeepSeek V3 dataset.""",
-    #     tags=[],
-    #     metadata=ModelMetadata(
-    #         model_id=ModelId("mlx-community/DeepSeek-V3-0324-4bit"),
-    #         pretty_name="DeepSeek V3 0324 (4-bit)",
-    #         storage_size=Memory.from_kb(409706307),
-    #         n_layers=61,
-    #     ),
-    # ),
-    # "deepseek-v3-0324": ModelCard(
-    #     short_id="deepseek-v3-0324",
-    #     model_id="mlx-community/DeepSeek-v3-0324-8bit",
-    #     name="DeepSeek V3 0324 (8-bit)",
-    #     description="""DeepSeek V3 is a large language model trained on the DeepSeek V3 dataset.""",
-    #     tags=[],
-    #     metadata=ModelMetadata(
-    #         model_id=ModelId("mlx-community/DeepSeek-v3-0324-8bit"),
-    #         pretty_name="DeepSeek V3 0324 (8-bit)",
-    #         storage_size=Memory.from_kb(754706307),
-    #         n_layers=61,
-    #     ),
-    # ),
    "deepseek-v3.1-4bit": ModelCard(
        short_id="deepseek-v3.1-4bit",
        model_id=ModelId("mlx-community/DeepSeek-V3.1-4bit"),
@@ -70,65 +62,6 @@ MODEL_CARDS: dict[str, ModelCard] = {
            supports_tensor=True,
        ),
    ),
-    # "deepseek-v3.2": ModelCard(
-    #     short_id="deepseek-v3.2",
-    #     model_id=ModelId("mlx-community/DeepSeek-V3.2-8bit"),
-    #     name="DeepSeek V3.2 (8-bit)",
-    #     description="""DeepSeek V3.2 is a large language model trained on the DeepSeek V3.2 dataset.""",
-    #     tags=[],
-    #     metadata=ModelMetadata(
-    #         model_id=ModelId("mlx-community/DeepSeek-V3.2-8bit"),
-    #         pretty_name="DeepSeek V3.2 (8-bit)",
-    #         storage_size=Memory.from_kb(754706307),
-    #         n_layers=61,
-    #         hidden_size=7168,
-    #         supports_tensor=True,
-    #     ),
-    # ),
-    # "deepseek-v3.2-4bit": ModelCard(
-    #     short_id="deepseek-v3.2-4bit",
-    #     model_id=ModelId("mlx-community/DeepSeek-V3.2-4bit"),
-    #     name="DeepSeek V3.2 (4-bit)",
-    #     description="""DeepSeek V3.2 is a large language model trained on the DeepSeek V3.2 dataset.""",
-    #     tags=[],
-    #     metadata=ModelMetadata(
-    #         model_id=ModelId("mlx-community/DeepSeek-V3.2-4bit"),
-    #         pretty_name="DeepSeek V3.2 (4-bit)",
-    #         storage_size=Memory.from_kb(754706307 // 2),  # TODO !!!!!
-    #         n_layers=61,
-    #         hidden_size=7168,
-    #         supports_tensor=True,
-    #     ),
-    # ),
-    # deepseek r1
-    # "deepseek-r1-0528-4bit": ModelCard(
-    #     short_id="deepseek-r1-0528-4bit",
-    #     model_id="mlx-community/DeepSeek-R1-0528-4bit",
-    #     name="DeepSeek-R1-0528 (4-bit)",
-    #     description="""DeepSeek R1 is a large language model trained on the DeepSeek R1 dataset.""",
-    #     tags=[],
-    #     metadata=ModelMetadata(
-    #         model_id=ModelId("mlx-community/DeepSeek-R1-0528-4bit"),
-    #         pretty_name="DeepSeek R1 671B (4-bit)",
-    #         storage_size=Memory.from_kb(409706307),
-    #         n_layers=61,
-    #         hidden_size=7168,
-    #     ),
-    # ),
-    # "deepseek-r1-0528": ModelCard(
-    #     short_id="deepseek-r1-0528",
-    #     model_id="mlx-community/DeepSeek-R1-0528-8bit",
-    #     name="DeepSeek-R1-0528 (8-bit)",
-    #     description="""DeepSeek R1 is a large language model trained on the DeepSeek R1 dataset.""",
-    #     tags=[],
-    #     metadata=ModelMetadata(
-    #         model_id=ModelId("mlx-community/DeepSeek-R1-0528-8bit"),
-    #         pretty_name="DeepSeek R1 671B (8-bit)",
-    #         storage_size=Memory.from_bytes(754998771712),
-    #         n_layers=61,
-    # .       hidden_size=7168,
-    #     ),
-    # ),
    # kimi k2
    "kimi-k2-instruct-4bit": ModelCard(
        short_id="kimi-k2-instruct-4bit",
@@ -525,8 +458,9 @@ MODEL_CARDS: dict[str, ModelCard] = {
            supports_tensor=True,
        ),
    ),
-    # Needs to be quantized g32 or g16.
+    # glm 4.5
    "glm-4.5-air-8bit": ModelCard(
+        # Needs to be quantized g32 or g16 to work with tensor parallel
        short_id="glm-4.5-air-8bit",
        model_id=ModelId("mlx-community/GLM-4.5-Air-8bit"),
        name="GLM 4.5 Air 8bit",
@@ -556,6 +490,7 @@ MODEL_CARDS: dict[str, ModelCard] = {
            supports_tensor=True,
        ),
    ),
+    # glm 4.7
    "glm-4.7-4bit": ModelCard(
        short_id="glm-4.7-4bit",
        model_id=ModelId("mlx-community/GLM-4.7-4bit"),
@@ -601,6 +536,7 @@ MODEL_CARDS: dict[str, ModelCard] = {
            supports_tensor=True,
        ),
    ),
+    # minimax-m2
    "minimax-m2.1-8bit": ModelCard(
        short_id="minimax-m2.1-8bit",
        model_id=ModelId("mlx-community/MiniMax-M2.1-8bit"),
@@ -631,19 +567,4 @@ MODEL_CARDS: dict[str, ModelCard] = {
            supports_tensor=True,
        ),
    ),
-    # "devstral-2-123b-instruct-2512-8bit": ModelCard(
-    #     short_id="devstral-2-123b-instruct-2512-8bit",
-    #     model_id=ModelId("mlx-community/Devstral-2-123B-Instruct-2512-8bit"),
-    #     name="Devstral 2 123B Instruct 2512 (8-bit, MLX)",
-    #     description="""Mistral AI's Devstral 2 123B Instruct (2512) is an agentic coding model.""",
-    #     tags=[],
-    #     metadata=ModelMetadata(
-    #         model_id=ModelId("mlx-community/Devstral-2-123B-Instruct-2512-8bit"),
-    #         pretty_name="Devstral 2 123B Instruct 2512 (8-bit, MLX)",
-    #         storage_size=Memory.from_kb(133_000_000),
-    #         n_layers=88,
-    #         hidden_size=12288,
-    #         supports_tensor=True,
-    #     ),
-    # ),
 }
--- a/uv.lock
+++ b/uv.lock
Author	SHA1	Message	Date
Evan	642b1bb1b4	migrate model cards to .toml files	2026-01-15 17:07:48 +00:00
Evan Quiney	c22dad8a7d	dashboard: add peer: true to package lock (#1162 ) this happens every time i run npm install - lets upstream it ## testing dashboard builds and renders	2026-01-15 17:01:43 +00:00
Evan	4bc4d50685	rust: remove dead code the system custodian has been made unnecessary with the swift app - we can remove it ## testing everything still builds	2026-01-15 16:51:46 +00:00
Jake Hillion	e0aab46fd8	model_cards.py: clean up commented out code Clean up the commented out code and make sure the comments are unified. Carrying around the commented out code means people making changes to model_cards are supposed to update it, but that's not clear and won't be picked up by type checking etc. Drop it for now - it's in the git history. Also make the rest of the comments a bit more uniform, and place comments about a specific model card inside the model card (instead of above) so they don't get lost when code is added/moved around. Test plan: - my eyes	2026-01-15 13:21:58 +00:00