mirror of
https://github.com/exo-explore/exo.git
synced 2026-01-16 01:51:03 -05:00
Compare commits
4 Commits
sami/flash
...
model-card
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
642b1bb1b4 | ||
|
|
c22dad8a7d | ||
|
|
4bc4d50685 | ||
|
|
e0aab46fd8 |
19
Cargo.lock
generated
19
Cargo.lock
generated
@@ -4340,25 +4340,6 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "system_custodian"
|
||||
version = "0.0.1"
|
||||
dependencies = [
|
||||
"delegate",
|
||||
"derive_more",
|
||||
"either",
|
||||
"extend",
|
||||
"futures",
|
||||
"futures-timer",
|
||||
"impl-trait-for-tuples",
|
||||
"keccak-const",
|
||||
"log",
|
||||
"thiserror 2.0.17",
|
||||
"tokio",
|
||||
"tracing-subscriber",
|
||||
"util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tagptr"
|
||||
version = "0.2.0"
|
||||
|
||||
@@ -3,7 +3,6 @@ resolver = "3"
|
||||
members = [
|
||||
"rust/networking",
|
||||
"rust/exo_pyo3_bindings",
|
||||
"rust/system_custodian",
|
||||
"rust/util",
|
||||
]
|
||||
|
||||
@@ -25,7 +24,6 @@ opt-level = 3
|
||||
[workspace.dependencies]
|
||||
## Crate members as common dependencies
|
||||
networking = { path = "rust/networking" }
|
||||
system_custodian = { path = "rust/system_custodian" }
|
||||
util = { path = "rust/util" }
|
||||
|
||||
# Proc-macro authoring tools
|
||||
|
||||
9
dashboard/package-lock.json
generated
9
dashboard/package-lock.json
generated
@@ -863,6 +863,7 @@
|
||||
"integrity": "sha512-oH8tXw7EZnie8FdOWYrF7Yn4IKrqTFHhXvl8YxXxbKwTMcD/5NNCryUSEXRk2ZR4ojnub0P8rNrsVGHXWqIDtA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@standard-schema/spec": "^1.0.0",
|
||||
"@sveltejs/acorn-typescript": "^1.0.5",
|
||||
@@ -902,6 +903,7 @@
|
||||
"integrity": "sha512-Y1Cs7hhTc+a5E9Va/xwKlAJoariQyHY+5zBgCZg4PFWNYQ1nMN9sjK1zhw1gK69DuqVP++sht/1GZg1aRwmAXQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@sveltejs/vite-plugin-svelte-inspector": "^4.0.1",
|
||||
"debug": "^4.4.1",
|
||||
@@ -1518,6 +1520,7 @@
|
||||
"integrity": "sha512-LCCV0HdSZZZb34qifBsyWlUmok6W7ouER+oQIGBScS8EsZsQbrtFTUrDX4hOl+CS6p7cnNC4td+qrSVGSCTUfQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"undici-types": "~6.21.0"
|
||||
}
|
||||
@@ -1527,6 +1530,7 @@
|
||||
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
|
||||
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"bin": {
|
||||
"acorn": "bin/acorn"
|
||||
},
|
||||
@@ -1939,6 +1943,7 @@
|
||||
"integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
@@ -2646,6 +2651,7 @@
|
||||
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
@@ -2833,6 +2839,7 @@
|
||||
"resolved": "https://registry.npmjs.org/svelte/-/svelte-5.45.3.tgz",
|
||||
"integrity": "sha512-ngKXNhNvwPzF43QqEhDOue7TQTrG09em1sd4HBxVF0Wr2gopAmdEWan+rgbdgK4fhBtSOTJO8bYU4chUG7VXZQ==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@jridgewell/remapping": "^2.3.4",
|
||||
"@jridgewell/sourcemap-codec": "^1.5.0",
|
||||
@@ -2977,6 +2984,7 @@
|
||||
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"bin": {
|
||||
"tsc": "bin/tsc",
|
||||
"tsserver": "bin/tsserver"
|
||||
@@ -2998,6 +3006,7 @@
|
||||
"integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"esbuild": "^0.25.0",
|
||||
"fdir": "^6.4.4",
|
||||
|
||||
@@ -23,6 +23,7 @@ dependencies = [
|
||||
"tiktoken>=0.12.0", # required for kimi k2 tokenizer
|
||||
"hypercorn>=0.18.0",
|
||||
"openai-harmony>=0.0.8",
|
||||
"tomlkit>=0.14.0",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
|
||||
15
resources/model_cards/deepseek-v3.1-4bit.toml
Normal file
15
resources/model_cards/deepseek-v3.1-4bit.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "deepseek-v3.1-4bit"
|
||||
model_id = "mlx-community/DeepSeek-V3.1-4bit"
|
||||
name = "DeepSeek V3.1 (4-bit)"
|
||||
description = "DeepSeek V3.1 is a large language model trained on the DeepSeek V3.1 dataset."
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/DeepSeek-V3.1-4bit"
|
||||
pretty_name = "DeepSeek V3.1 (4-bit)"
|
||||
n_layers = 61
|
||||
hidden_size = 7168
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 405874409472
|
||||
15
resources/model_cards/deepseek-v3.1-8bit.toml
Normal file
15
resources/model_cards/deepseek-v3.1-8bit.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "deepseek-v3.1-8bit"
|
||||
model_id = "mlx-community/DeepSeek-V3.1-8bit"
|
||||
name = "DeepSeek V3.1 (8-bit)"
|
||||
description = "DeepSeek V3.1 is a large language model trained on the DeepSeek V3.1 dataset."
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/DeepSeek-V3.1-8bit"
|
||||
pretty_name = "DeepSeek V3.1 (8-bit)"
|
||||
n_layers = 61
|
||||
hidden_size = 7168
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 765577920512
|
||||
15
resources/model_cards/glm-4.5-air-8bit.toml
Normal file
15
resources/model_cards/glm-4.5-air-8bit.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "glm-4.5-air-8bit"
|
||||
model_id = "mlx-community/GLM-4.5-Air-8bit"
|
||||
name = "GLM 4.5 Air 8bit"
|
||||
description = "GLM 4.5 Air 8bit"
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/GLM-4.5-Air-8bit"
|
||||
pretty_name = "GLM 4.5 Air 8bit"
|
||||
n_layers = 46
|
||||
hidden_size = 4096
|
||||
supports_tensor = false
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 122406567936
|
||||
15
resources/model_cards/glm-4.5-air-bf16.toml
Normal file
15
resources/model_cards/glm-4.5-air-bf16.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "glm-4.5-air-bf16"
|
||||
model_id = "mlx-community/GLM-4.5-Air-bf16"
|
||||
name = "GLM 4.5 Air bf16"
|
||||
description = "GLM 4.5 Air bf16"
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/GLM-4.5-Air-bf16"
|
||||
pretty_name = "GLM 4.5 Air bf16"
|
||||
n_layers = 46
|
||||
hidden_size = 4096
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 229780750336
|
||||
15
resources/model_cards/glm-4.7-4bit.toml
Normal file
15
resources/model_cards/glm-4.7-4bit.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "glm-4.7-4bit"
|
||||
model_id = "mlx-community/GLM-4.7-4bit"
|
||||
name = "GLM 4.7 4bit"
|
||||
description = "GLM 4.7 4bit"
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/GLM-4.7-4bit"
|
||||
pretty_name = "GLM 4.7 4bit"
|
||||
n_layers = 91
|
||||
hidden_size = 5120
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 198556925568
|
||||
15
resources/model_cards/glm-4.7-6bit.toml
Normal file
15
resources/model_cards/glm-4.7-6bit.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "glm-4.7-6bit"
|
||||
model_id = "mlx-community/GLM-4.7-6bit"
|
||||
name = "GLM 4.7 6bit"
|
||||
description = "GLM 4.7 6bit"
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/GLM-4.7-6bit"
|
||||
pretty_name = "GLM 4.7 6bit"
|
||||
n_layers = 91
|
||||
hidden_size = 5120
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 286737579648
|
||||
15
resources/model_cards/glm-4.7-8bit-gs32.toml
Normal file
15
resources/model_cards/glm-4.7-8bit-gs32.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "glm-4.7-8bit-gs32"
|
||||
model_id = "mlx-community/GLM-4.7-8bit-gs32"
|
||||
name = "GLM 4.7 8bit (gs32)"
|
||||
description = "GLM 4.7 8bit (gs32)"
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/GLM-4.7-8bit-gs32"
|
||||
pretty_name = "GLM 4.7 8bit (gs32)"
|
||||
n_layers = 91
|
||||
hidden_size = 5120
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 396963397248
|
||||
15
resources/model_cards/gpt-oss-120b-MXFP4-Q8.toml
Normal file
15
resources/model_cards/gpt-oss-120b-MXFP4-Q8.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "gpt-oss-120b-MXFP4-Q8"
|
||||
model_id = "mlx-community/gpt-oss-120b-MXFP4-Q8"
|
||||
name = "GPT-OSS 120B (MXFP4-Q8, MLX)"
|
||||
description = "OpenAI's GPT-OSS 120B is a 117B-parameter Mixture-of-Experts model designed for high-reasoning and general-purpose use; this variant is a 4-bit MLX conversion for Apple Silicon."
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/gpt-oss-120b-MXFP4-Q8"
|
||||
pretty_name = "GPT-OSS 120B (MXFP4-Q8, MLX)"
|
||||
n_layers = 36
|
||||
hidden_size = 2880
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 70652212224
|
||||
15
resources/model_cards/gpt-oss-20b-4bit.toml
Normal file
15
resources/model_cards/gpt-oss-20b-4bit.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "gpt-oss-20b-4bit"
|
||||
model_id = "mlx-community/gpt-oss-20b-MXFP4-Q4"
|
||||
name = "GPT-OSS 20B (MXFP4-Q4, MLX)"
|
||||
description = "OpenAI's GPT-OSS 20B is a medium-sized MoE model for lower-latency and local or specialized use cases; this MLX variant uses MXFP4 4-bit quantization."
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/gpt-oss-20b-MXFP4-Q4"
|
||||
pretty_name = "GPT-OSS 20B (MXFP4-Q4, MLX)"
|
||||
n_layers = 24
|
||||
hidden_size = 2880
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 12025908224
|
||||
15
resources/model_cards/kimi-k2-instruct-4bit.toml
Normal file
15
resources/model_cards/kimi-k2-instruct-4bit.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "kimi-k2-instruct-4bit"
|
||||
model_id = "mlx-community/Kimi-K2-Instruct-4bit"
|
||||
name = "Kimi K2 Instruct (4-bit)"
|
||||
description = "Kimi K2 is a large language model trained on the Kimi K2 dataset."
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/Kimi-K2-Instruct-4bit"
|
||||
pretty_name = "Kimi K2 Instruct (4-bit)"
|
||||
n_layers = 61
|
||||
hidden_size = 7168
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 620622774272
|
||||
15
resources/model_cards/kimi-k2-thinking.toml
Normal file
15
resources/model_cards/kimi-k2-thinking.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "kimi-k2-thinking"
|
||||
model_id = "mlx-community/Kimi-K2-Thinking"
|
||||
name = "Kimi K2 Thinking (4-bit)"
|
||||
description = "Kimi K2 Thinking is the latest, most capable version of open-source thinking model."
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/Kimi-K2-Thinking"
|
||||
pretty_name = "Kimi K2 Thinking (4-bit)"
|
||||
n_layers = 61
|
||||
hidden_size = 7168
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 706522120192
|
||||
15
resources/model_cards/llama-3.1-70b.toml
Normal file
15
resources/model_cards/llama-3.1-70b.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "llama-3.1-70b"
|
||||
model_id = "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"
|
||||
name = "Llama 3.1 70B (4-bit)"
|
||||
description = "Llama 3.1 is a large language model trained on the Llama 3.1 dataset."
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"
|
||||
pretty_name = "Llama 3.1 70B (4-bit)"
|
||||
n_layers = 80
|
||||
hidden_size = 8192
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 40652242944
|
||||
15
resources/model_cards/llama-3.1-8b-8bit.toml
Normal file
15
resources/model_cards/llama-3.1-8b-8bit.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "llama-3.1-8b-8bit"
|
||||
model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"
|
||||
name = "Llama 3.1 8B (8-bit)"
|
||||
description = "Llama 3.1 is a large language model trained on the Llama 3.1 dataset."
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"
|
||||
pretty_name = "Llama 3.1 8B (8-bit)"
|
||||
n_layers = 32
|
||||
hidden_size = 4096
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 8954839040
|
||||
15
resources/model_cards/llama-3.1-8b-bf16.toml
Normal file
15
resources/model_cards/llama-3.1-8b-bf16.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "llama-3.1-8b-bf16"
|
||||
model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-bf16"
|
||||
name = "Llama 3.1 8B (BF16)"
|
||||
description = "Llama 3.1 is a large language model trained on the Llama 3.1 dataset."
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-bf16"
|
||||
pretty_name = "Llama 3.1 8B (BF16)"
|
||||
n_layers = 32
|
||||
hidden_size = 4096
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 16882073600
|
||||
15
resources/model_cards/llama-3.1-8b.toml
Normal file
15
resources/model_cards/llama-3.1-8b.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "llama-3.1-8b"
|
||||
model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"
|
||||
name = "Llama 3.1 8B (4-bit)"
|
||||
description = "Llama 3.1 is a large language model trained on the Llama 3.1 dataset."
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"
|
||||
pretty_name = "Llama 3.1 8B (4-bit)"
|
||||
n_layers = 32
|
||||
hidden_size = 4096
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 4637851648
|
||||
15
resources/model_cards/llama-3.2-1b.toml
Normal file
15
resources/model_cards/llama-3.2-1b.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "llama-3.2-1b"
|
||||
model_id = "mlx-community/Llama-3.2-1B-Instruct-4bit"
|
||||
name = "Llama 3.2 1B (4-bit)"
|
||||
description = "Llama 3.2 is a large language model trained on the Llama 3.2 dataset."
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/Llama-3.2-1B-Instruct-4bit"
|
||||
pretty_name = "Llama 3.2 1B (4-bit)"
|
||||
n_layers = 16
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 729808896
|
||||
15
resources/model_cards/llama-3.2-3b-8bit.toml
Normal file
15
resources/model_cards/llama-3.2-3b-8bit.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "llama-3.2-3b-8bit"
|
||||
model_id = "mlx-community/Llama-3.2-3B-Instruct-8bit"
|
||||
name = "Llama 3.2 3B (8-bit)"
|
||||
description = "Llama 3.2 is a large language model trained on the Llama 3.2 dataset."
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/Llama-3.2-3B-Instruct-8bit"
|
||||
pretty_name = "Llama 3.2 3B (8-bit)"
|
||||
n_layers = 28
|
||||
hidden_size = 3072
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 3501195264
|
||||
15
resources/model_cards/llama-3.2-3b.toml
Normal file
15
resources/model_cards/llama-3.2-3b.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "llama-3.2-3b"
|
||||
model_id = "mlx-community/Llama-3.2-3B-Instruct-4bit"
|
||||
name = "Llama 3.2 3B (4-bit)"
|
||||
description = "Llama 3.2 is a large language model trained on the Llama 3.2 dataset."
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/Llama-3.2-3B-Instruct-4bit"
|
||||
pretty_name = "Llama 3.2 3B (4-bit)"
|
||||
n_layers = 28
|
||||
hidden_size = 3072
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 1863319552
|
||||
15
resources/model_cards/llama-3.3-70b-8bit.toml
Normal file
15
resources/model_cards/llama-3.3-70b-8bit.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "llama-3.3-70b-8bit"
|
||||
model_id = "mlx-community/Llama-3.3-70B-Instruct-8bit"
|
||||
name = "Llama 3.3 70B (8-bit)"
|
||||
description = "The Meta Llama 3.3 multilingual large language model (LLM) is an instruction tuned generative model in 70B (text in/text out)"
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/Llama-3.3-70B-Instruct-8bit"
|
||||
pretty_name = "Llama 3.3 70B (8-bit)"
|
||||
n_layers = 80
|
||||
hidden_size = 8192
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 76799803392
|
||||
15
resources/model_cards/llama-3.3-70b-fp16.toml
Normal file
15
resources/model_cards/llama-3.3-70b-fp16.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "llama-3.3-70b-fp16"
|
||||
model_id = "mlx-community/llama-3.3-70b-instruct-fp16"
|
||||
name = "Llama 3.3 70B (FP16)"
|
||||
description = "The Meta Llama 3.3 multilingual large language model (LLM) is an instruction tuned generative model in 70B (text in/text out)"
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/llama-3.3-70b-instruct-fp16"
|
||||
pretty_name = "Llama 3.3 70B (FP16)"
|
||||
n_layers = 80
|
||||
hidden_size = 8192
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 144383672320
|
||||
15
resources/model_cards/llama-3.3-70b.toml
Normal file
15
resources/model_cards/llama-3.3-70b.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "llama-3.3-70b"
|
||||
model_id = "mlx-community/Llama-3.3-70B-Instruct-4bit"
|
||||
name = "Llama 3.3 70B (4-bit)"
|
||||
description = "The Meta Llama 3.3 multilingual large language model (LLM) is an instruction tuned generative model in 70B (text in/text out)"
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/Llama-3.3-70B-Instruct-4bit"
|
||||
pretty_name = "Llama 3.3 70B"
|
||||
n_layers = 80
|
||||
hidden_size = 8192
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 40652242944
|
||||
15
resources/model_cards/minimax-m2.1-3bit.toml
Normal file
15
resources/model_cards/minimax-m2.1-3bit.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "minimax-m2.1-3bit"
|
||||
model_id = "mlx-community/MiniMax-M2.1-3bit"
|
||||
name = "MiniMax M2.1 3bit"
|
||||
description = "MiniMax M2.1 3bit"
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/MiniMax-M2.1-3bit"
|
||||
pretty_name = "MiniMax M2.1 3bit"
|
||||
n_layers = 61
|
||||
hidden_size = 3072
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 100086644736
|
||||
15
resources/model_cards/minimax-m2.1-8bit.toml
Normal file
15
resources/model_cards/minimax-m2.1-8bit.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "minimax-m2.1-8bit"
|
||||
model_id = "mlx-community/MiniMax-M2.1-8bit"
|
||||
name = "MiniMax M2.1 8bit"
|
||||
description = "MiniMax M2.1 8bit"
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/MiniMax-M2.1-8bit"
|
||||
pretty_name = "MiniMax M2.1 8bit"
|
||||
n_layers = 61
|
||||
hidden_size = 3072
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 242986745856
|
||||
15
resources/model_cards/qwen3-0.6b-8bit.toml
Normal file
15
resources/model_cards/qwen3-0.6b-8bit.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "qwen3-0.6b-8bit"
|
||||
model_id = "mlx-community/Qwen3-0.6B-8bit"
|
||||
name = "Qwen3 0.6B (8-bit)"
|
||||
description = "Qwen3 0.6B is a large language model trained on the Qwen3 0.6B dataset."
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/Qwen3-0.6B-8bit"
|
||||
pretty_name = "Qwen3 0.6B (8-bit)"
|
||||
n_layers = 28
|
||||
hidden_size = 1024
|
||||
supports_tensor = false
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 698351616
|
||||
15
resources/model_cards/qwen3-0.6b.toml
Normal file
15
resources/model_cards/qwen3-0.6b.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "qwen3-0.6b"
|
||||
model_id = "mlx-community/Qwen3-0.6B-4bit"
|
||||
name = "Qwen3 0.6B (4-bit)"
|
||||
description = "Qwen3 0.6B is a large language model trained on the Qwen3 0.6B dataset."
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/Qwen3-0.6B-4bit"
|
||||
pretty_name = "Qwen3 0.6B (4-bit)"
|
||||
n_layers = 28
|
||||
hidden_size = 1024
|
||||
supports_tensor = false
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 342884352
|
||||
15
resources/model_cards/qwen3-235b-a22b-4bit.toml
Normal file
15
resources/model_cards/qwen3-235b-a22b-4bit.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "qwen3-235b-a22b-4bit"
|
||||
model_id = "mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit"
|
||||
name = "Qwen3 235B A22B (4-bit)"
|
||||
description = "Qwen3 235B (Active 22B) is a large language model trained on the Qwen3 235B dataset."
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit"
|
||||
pretty_name = "Qwen3 235B A22B (4-bit)"
|
||||
n_layers = 94
|
||||
hidden_size = 4096
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 141733920768
|
||||
15
resources/model_cards/qwen3-235b-a22b-8bit.toml
Normal file
15
resources/model_cards/qwen3-235b-a22b-8bit.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "qwen3-235b-a22b-8bit"
|
||||
model_id = "mlx-community/Qwen3-235B-A22B-Instruct-2507-8bit"
|
||||
name = "Qwen3 235B A22B (8-bit)"
|
||||
description = "Qwen3 235B (Active 22B) is a large language model trained on the Qwen3 235B dataset."
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/Qwen3-235B-A22B-Instruct-2507-8bit"
|
||||
pretty_name = "Qwen3 235B A22B (8-bit)"
|
||||
n_layers = 94
|
||||
hidden_size = 4096
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 268435456000
|
||||
15
resources/model_cards/qwen3-30b-8bit.toml
Normal file
15
resources/model_cards/qwen3-30b-8bit.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "qwen3-30b-8bit"
|
||||
model_id = "mlx-community/Qwen3-30B-A3B-8bit"
|
||||
name = "Qwen3 30B A3B (8-bit)"
|
||||
description = "Qwen3 30B is a large language model trained on the Qwen3 30B dataset."
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/Qwen3-30B-A3B-8bit"
|
||||
pretty_name = "Qwen3 30B A3B (8-bit)"
|
||||
n_layers = 48
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 33279705088
|
||||
15
resources/model_cards/qwen3-30b.toml
Normal file
15
resources/model_cards/qwen3-30b.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "qwen3-30b"
|
||||
model_id = "mlx-community/Qwen3-30B-A3B-4bit"
|
||||
name = "Qwen3 30B A3B (4-bit)"
|
||||
description = "Qwen3 30B is a large language model trained on the Qwen3 30B dataset."
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/Qwen3-30B-A3B-4bit"
|
||||
pretty_name = "Qwen3 30B A3B (4-bit)"
|
||||
n_layers = 48
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 17612931072
|
||||
15
resources/model_cards/qwen3-80b-a3B-4bit.toml
Normal file
15
resources/model_cards/qwen3-80b-a3B-4bit.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "qwen3-80b-a3B-4bit"
|
||||
model_id = "mlx-community/Qwen3-Next-80B-A3B-Instruct-4bit"
|
||||
name = "Qwen3 80B A3B (4-bit)"
|
||||
description = "Qwen3 80B"
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/Qwen3-Next-80B-A3B-Instruct-4bit"
|
||||
pretty_name = "Qwen3 80B A3B (4-bit)"
|
||||
n_layers = 48
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 46976204800
|
||||
15
resources/model_cards/qwen3-80b-a3B-8bit.toml
Normal file
15
resources/model_cards/qwen3-80b-a3B-8bit.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "qwen3-80b-a3B-8bit"
|
||||
model_id = "mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit"
|
||||
name = "Qwen3 80B A3B (8-bit)"
|
||||
description = "Qwen3 80B"
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit"
|
||||
pretty_name = "Qwen3 80B A3B (8-bit)"
|
||||
n_layers = 48
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 88814387200
|
||||
15
resources/model_cards/qwen3-80b-a3B-thinking-4bit.toml
Normal file
15
resources/model_cards/qwen3-80b-a3B-thinking-4bit.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "qwen3-80b-a3B-thinking-4bit"
|
||||
model_id = "mlx-community/Qwen3-Next-80B-A3B-Thinking-4bit"
|
||||
name = "Qwen3 80B A3B Thinking (4-bit)"
|
||||
description = "Qwen3 80B Reasoning model"
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/Qwen3-Next-80B-A3B-Thinking-4bit"
|
||||
pretty_name = "Qwen3 80B A3B (4-bit)"
|
||||
n_layers = 48
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 88814387200
|
||||
15
resources/model_cards/qwen3-80b-a3B-thinking-8bit.toml
Normal file
15
resources/model_cards/qwen3-80b-a3B-thinking-8bit.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "qwen3-80b-a3B-thinking-8bit"
|
||||
model_id = "mlx-community/Qwen3-Next-80B-A3B-Thinking-8bit"
|
||||
name = "Qwen3 80B A3B Thinking (8-bit)"
|
||||
description = "Qwen3 80B Reasoning model"
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/Qwen3-Next-80B-A3B-Thinking-8bit"
|
||||
pretty_name = "Qwen3 80B A3B (8-bit)"
|
||||
n_layers = 48
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 88814387200
|
||||
15
resources/model_cards/qwen3-coder-480b-a35b-4bit.toml
Normal file
15
resources/model_cards/qwen3-coder-480b-a35b-4bit.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "qwen3-coder-480b-a35b-4bit"
|
||||
model_id = "mlx-community/Qwen3-Coder-480B-A35B-Instruct-4bit"
|
||||
name = "Qwen3 Coder 480B A35B (4-bit)"
|
||||
description = "Qwen3 Coder 480B (Active 35B) is a large language model trained on the Qwen3 Coder 480B dataset."
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/Qwen3-Coder-480B-A35B-Instruct-4bit"
|
||||
pretty_name = "Qwen3 Coder 480B A35B (4-bit)"
|
||||
n_layers = 62
|
||||
hidden_size = 6144
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 289910292480
|
||||
15
resources/model_cards/qwen3-coder-480b-a35b-8bit.toml
Normal file
15
resources/model_cards/qwen3-coder-480b-a35b-8bit.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
short_id = "qwen3-coder-480b-a35b-8bit"
|
||||
model_id = "mlx-community/Qwen3-Coder-480B-A35B-Instruct-8bit"
|
||||
name = "Qwen3 Coder 480B A35B (8-bit)"
|
||||
description = "Qwen3 Coder 480B (Active 35B) is a large language model trained on the Qwen3 Coder 480B dataset."
|
||||
tags = []
|
||||
|
||||
[metadata]
|
||||
model_id = "mlx-community/Qwen3-Coder-480B-A35B-Instruct-8bit"
|
||||
pretty_name = "Qwen3 Coder 480B A35B (8-bit)"
|
||||
n_layers = 62
|
||||
hidden_size = 6144
|
||||
supports_tensor = true
|
||||
|
||||
[metadata.storage_size]
|
||||
in_bytes = 579820584960
|
||||
@@ -81,20 +81,6 @@
|
||||
|
||||
config = {
|
||||
packages = {
|
||||
# The system_custodian binary
|
||||
system_custodian = craneLib.buildPackage (
|
||||
commonArgs
|
||||
// {
|
||||
inherit cargoArtifacts;
|
||||
cargoExtraArgs = "-p system_custodian";
|
||||
|
||||
meta = {
|
||||
description = "System custodian daemon for exo";
|
||||
mainProgram = "system_custodian";
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
# Python bindings wheel via maturin
|
||||
exo_pyo3_bindings = craneLib.buildPackage (
|
||||
commonArgs
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
[package]
|
||||
name = "system_custodian"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
publish = false
|
||||
|
||||
[lib]
|
||||
doctest = false
|
||||
name = "system_custodian"
|
||||
path = "src/lib.rs"
|
||||
|
||||
[[bin]]
|
||||
path = "src/bin/main.rs"
|
||||
name = "system_custodian"
|
||||
doc = false
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
# datastructures
|
||||
either = { workspace = true }
|
||||
|
||||
# macro dependencies
|
||||
extend = { workspace = true }
|
||||
delegate = { workspace = true }
|
||||
impl-trait-for-tuples = { workspace = true }
|
||||
derive_more = { workspace = true }
|
||||
|
||||
# async
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
futures = { workspace = true }
|
||||
futures-timer = { workspace = true }
|
||||
|
||||
# utility dependencies
|
||||
util = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
#internment = { workspace = true }
|
||||
#recursion = { workspace = true }
|
||||
#generativity = { workspace = true }
|
||||
#itertools = { workspace = true }
|
||||
tracing-subscriber = { version = "0.3.19", features = ["default", "env-filter"] }
|
||||
keccak-const = { workspace = true }
|
||||
|
||||
# tracing/logging
|
||||
log = { workspace = true }
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
//! TODO: documentation
|
||||
//!
|
||||
|
||||
fn main() {}
|
||||
@@ -1,69 +0,0 @@
|
||||
//! This crate defines the logic of, and ways to interact with, Exo's **_System Custodian_** daemon.
|
||||
//!
|
||||
//! The **_System Custodian_** daemon is supposed to be a long-living process that precedes the
|
||||
//! launch of the Exo application, and responsible for ensuring the system (configuration, settings,
|
||||
//! etc.) is in an appropriate state to facilitate the running of Exo application.
|
||||
//! The **_System Custodian_** daemon shall expose a [D-Bus](https://www.freedesktop.org/wiki/Software/dbus/)
|
||||
//! service which Exo application use to _control & query_ it.
|
||||
//!
|
||||
//! # Lifecycle
|
||||
//! When the Exo application starts, it will _wake_ the **_System Custodian_** daemon for the
|
||||
//! duration of its lifetime, and after it has terminated the daemon will go back to sleep. When
|
||||
//! the daemon wakes up, it will configure the system into a state suitable for the Exo Application;
|
||||
//! When the daemon goes to sleep, it will revert those changes as much as it can in case they were
|
||||
//! destructive to the user's pre-existing configurations.
|
||||
//!
|
||||
//! # Responsibilities
|
||||
//! TODO: these are purely on MacOS, but change to be more broad
|
||||
//! The **_System Custodian_** daemon is responsible for using System Configuration framework to
|
||||
//! 1. duplicate the current network set
|
||||
//! 2. modify existing services to turn on IPv6 if not there
|
||||
//! 3. remove any bridge services & add any missing services that AREN'T bridge
|
||||
//! TODO: In the future:
|
||||
//! 1. run a dummy AWDL service to [allow for macOS peer-to-peer wireless networking](https://yggdrasil-network.github.io/2019/08/19/awdl.html)
|
||||
//! 2. toggle some GPU/memory configurations to speed up GPU (ask Alex what those configurations are)
|
||||
//! 3. if we ever decide to provide our **own network interfaces** that abstract over some userland
|
||||
//! logic, this would be the place to spin that up.
|
||||
//!
|
||||
//! Then it will watch the SCDynamicStore for:
|
||||
//! 1. all __actual__ network interfaces -> collect information on them e.g. their BSD name, MAC
|
||||
//! address, MTU, IPv6 addresses, etc. -> and set up watchers/notifiers to inform the DBus
|
||||
//! interface of any changes
|
||||
//! 2. watch for any __undesirable__ changes to configuration and revert it
|
||||
//!
|
||||
//! It should somehow (probably through system sockets and/or BSD interface) trigger IPv6 NDP on
|
||||
//! each of the interfaces & also listen to/query for any changes on the OS routing cache??
|
||||
//! Basically emulate the `ping6 ff02::1%enX` and `ndp -an` commands BUT BETTER!!!
|
||||
//! 1. all that info should coalesce back to the overall state colleted -> should be queryable
|
||||
//! over D-Bus
|
||||
//! TODO:
|
||||
//! 1. we might potentially add to this step a handshake of some kind...? To ensure that we can
|
||||
//! ACTUALLY communicate with that machine over that link over e.g. TCP, UDP, etc. Will the
|
||||
//! handshake require to know Node ID? Will the handshake require heartbeats? Who knows...
|
||||
//! 2. if we ever decide to write proprietary L2/L3 protocols for quicker communication,
|
||||
//! e.g. [AF_NDRV](https://www.zerotier.com/blog/how-zerotier-eliminated-kernel-extensions-on-macos/)
|
||||
//! for raw ethernet frame communication, or even a [custom thunderbolt PCIe driver](https://developer.apple.com/documentation/pcidriverkit/creating-custom-pcie-drivers-for-thunderbolt-devices),
|
||||
//! then this would be the place to carry out discovery and propper handshakes with devices
|
||||
//! on the other end of the link.
|
||||
//!
|
||||
|
||||
// enable Rust-unstable features for convenience
|
||||
#![feature(trait_alias)]
|
||||
#![feature(stmt_expr_attributes)]
|
||||
#![feature(type_alias_impl_trait)]
|
||||
#![feature(specialization)]
|
||||
#![feature(unboxed_closures)]
|
||||
#![feature(const_trait_impl)]
|
||||
#![feature(fn_traits)]
|
||||
|
||||
pub(crate) mod private {
|
||||
// sealed traits support
|
||||
pub trait Sealed {}
|
||||
impl<T: ?Sized> Sealed for T {}
|
||||
}
|
||||
|
||||
/// Namespace for all the type/trait aliases used by this crate.
|
||||
pub(crate) mod alias {}
|
||||
|
||||
/// Namespace for crate-wide extension traits/methods
|
||||
pub(crate) mod ext {}
|
||||
@@ -1,3 +1,6 @@
|
||||
from anyio import Path, open_file
|
||||
import tomlkit
|
||||
|
||||
from exo.shared.types.memory import Memory
|
||||
from exo.shared.types.models import ModelId, ModelMetadata
|
||||
from exo.utils.pydantic_ext import CamelCaseModel
|
||||
@@ -11,35 +14,24 @@ class ModelCard(CamelCaseModel):
|
||||
tags: list[str]
|
||||
metadata: ModelMetadata
|
||||
|
||||
@staticmethod
|
||||
async def load(path: Path) -> "ModelCard":
|
||||
async with await open_file(path) as f:
|
||||
data = await f.read()
|
||||
py = tomlkit.loads(data)
|
||||
return ModelCard.model_validate(py)
|
||||
|
||||
async def save(self, path: Path):
|
||||
async with await open_file(path, "w") as f:
|
||||
py = self.model_dump()
|
||||
data = tomlkit.dumps(py) # pyright: ignore[reportUnknownMemberType]
|
||||
await f.write(data)
|
||||
|
||||
|
||||
|
||||
|
||||
MODEL_CARDS: dict[str, ModelCard] = {
|
||||
# deepseek v3
|
||||
# "deepseek-v3-0324:4bit": ModelCard(
|
||||
# short_id="deepseek-v3-0324:4bit",
|
||||
# model_id="mlx-community/DeepSeek-V3-0324-4bit",
|
||||
# name="DeepSeek V3 0324 (4-bit)",
|
||||
# description="""DeepSeek V3 is a large language model trained on the DeepSeek V3 dataset.""",
|
||||
# tags=[],
|
||||
# metadata=ModelMetadata(
|
||||
# model_id=ModelId("mlx-community/DeepSeek-V3-0324-4bit"),
|
||||
# pretty_name="DeepSeek V3 0324 (4-bit)",
|
||||
# storage_size=Memory.from_kb(409706307),
|
||||
# n_layers=61,
|
||||
# ),
|
||||
# ),
|
||||
# "deepseek-v3-0324": ModelCard(
|
||||
# short_id="deepseek-v3-0324",
|
||||
# model_id="mlx-community/DeepSeek-v3-0324-8bit",
|
||||
# name="DeepSeek V3 0324 (8-bit)",
|
||||
# description="""DeepSeek V3 is a large language model trained on the DeepSeek V3 dataset.""",
|
||||
# tags=[],
|
||||
# metadata=ModelMetadata(
|
||||
# model_id=ModelId("mlx-community/DeepSeek-v3-0324-8bit"),
|
||||
# pretty_name="DeepSeek V3 0324 (8-bit)",
|
||||
# storage_size=Memory.from_kb(754706307),
|
||||
# n_layers=61,
|
||||
# ),
|
||||
# ),
|
||||
"deepseek-v3.1-4bit": ModelCard(
|
||||
short_id="deepseek-v3.1-4bit",
|
||||
model_id=ModelId("mlx-community/DeepSeek-V3.1-4bit"),
|
||||
@@ -70,65 +62,6 @@ MODEL_CARDS: dict[str, ModelCard] = {
|
||||
supports_tensor=True,
|
||||
),
|
||||
),
|
||||
# "deepseek-v3.2": ModelCard(
|
||||
# short_id="deepseek-v3.2",
|
||||
# model_id=ModelId("mlx-community/DeepSeek-V3.2-8bit"),
|
||||
# name="DeepSeek V3.2 (8-bit)",
|
||||
# description="""DeepSeek V3.2 is a large language model trained on the DeepSeek V3.2 dataset.""",
|
||||
# tags=[],
|
||||
# metadata=ModelMetadata(
|
||||
# model_id=ModelId("mlx-community/DeepSeek-V3.2-8bit"),
|
||||
# pretty_name="DeepSeek V3.2 (8-bit)",
|
||||
# storage_size=Memory.from_kb(754706307),
|
||||
# n_layers=61,
|
||||
# hidden_size=7168,
|
||||
# supports_tensor=True,
|
||||
# ),
|
||||
# ),
|
||||
# "deepseek-v3.2-4bit": ModelCard(
|
||||
# short_id="deepseek-v3.2-4bit",
|
||||
# model_id=ModelId("mlx-community/DeepSeek-V3.2-4bit"),
|
||||
# name="DeepSeek V3.2 (4-bit)",
|
||||
# description="""DeepSeek V3.2 is a large language model trained on the DeepSeek V3.2 dataset.""",
|
||||
# tags=[],
|
||||
# metadata=ModelMetadata(
|
||||
# model_id=ModelId("mlx-community/DeepSeek-V3.2-4bit"),
|
||||
# pretty_name="DeepSeek V3.2 (4-bit)",
|
||||
# storage_size=Memory.from_kb(754706307 // 2), # TODO !!!!!
|
||||
# n_layers=61,
|
||||
# hidden_size=7168,
|
||||
# supports_tensor=True,
|
||||
# ),
|
||||
# ),
|
||||
# deepseek r1
|
||||
# "deepseek-r1-0528-4bit": ModelCard(
|
||||
# short_id="deepseek-r1-0528-4bit",
|
||||
# model_id="mlx-community/DeepSeek-R1-0528-4bit",
|
||||
# name="DeepSeek-R1-0528 (4-bit)",
|
||||
# description="""DeepSeek R1 is a large language model trained on the DeepSeek R1 dataset.""",
|
||||
# tags=[],
|
||||
# metadata=ModelMetadata(
|
||||
# model_id=ModelId("mlx-community/DeepSeek-R1-0528-4bit"),
|
||||
# pretty_name="DeepSeek R1 671B (4-bit)",
|
||||
# storage_size=Memory.from_kb(409706307),
|
||||
# n_layers=61,
|
||||
# hidden_size=7168,
|
||||
# ),
|
||||
# ),
|
||||
# "deepseek-r1-0528": ModelCard(
|
||||
# short_id="deepseek-r1-0528",
|
||||
# model_id="mlx-community/DeepSeek-R1-0528-8bit",
|
||||
# name="DeepSeek-R1-0528 (8-bit)",
|
||||
# description="""DeepSeek R1 is a large language model trained on the DeepSeek R1 dataset.""",
|
||||
# tags=[],
|
||||
# metadata=ModelMetadata(
|
||||
# model_id=ModelId("mlx-community/DeepSeek-R1-0528-8bit"),
|
||||
# pretty_name="DeepSeek R1 671B (8-bit)",
|
||||
# storage_size=Memory.from_bytes(754998771712),
|
||||
# n_layers=61,
|
||||
# . hidden_size=7168,
|
||||
# ),
|
||||
# ),
|
||||
# kimi k2
|
||||
"kimi-k2-instruct-4bit": ModelCard(
|
||||
short_id="kimi-k2-instruct-4bit",
|
||||
@@ -525,8 +458,9 @@ MODEL_CARDS: dict[str, ModelCard] = {
|
||||
supports_tensor=True,
|
||||
),
|
||||
),
|
||||
# Needs to be quantized g32 or g16.
|
||||
# glm 4.5
|
||||
"glm-4.5-air-8bit": ModelCard(
|
||||
# Needs to be quantized g32 or g16 to work with tensor parallel
|
||||
short_id="glm-4.5-air-8bit",
|
||||
model_id=ModelId("mlx-community/GLM-4.5-Air-8bit"),
|
||||
name="GLM 4.5 Air 8bit",
|
||||
@@ -556,6 +490,7 @@ MODEL_CARDS: dict[str, ModelCard] = {
|
||||
supports_tensor=True,
|
||||
),
|
||||
),
|
||||
# glm 4.7
|
||||
"glm-4.7-4bit": ModelCard(
|
||||
short_id="glm-4.7-4bit",
|
||||
model_id=ModelId("mlx-community/GLM-4.7-4bit"),
|
||||
@@ -601,6 +536,7 @@ MODEL_CARDS: dict[str, ModelCard] = {
|
||||
supports_tensor=True,
|
||||
),
|
||||
),
|
||||
# minimax-m2
|
||||
"minimax-m2.1-8bit": ModelCard(
|
||||
short_id="minimax-m2.1-8bit",
|
||||
model_id=ModelId("mlx-community/MiniMax-M2.1-8bit"),
|
||||
@@ -631,19 +567,4 @@ MODEL_CARDS: dict[str, ModelCard] = {
|
||||
supports_tensor=True,
|
||||
),
|
||||
),
|
||||
# "devstral-2-123b-instruct-2512-8bit": ModelCard(
|
||||
# short_id="devstral-2-123b-instruct-2512-8bit",
|
||||
# model_id=ModelId("mlx-community/Devstral-2-123B-Instruct-2512-8bit"),
|
||||
# name="Devstral 2 123B Instruct 2512 (8-bit, MLX)",
|
||||
# description="""Mistral AI's Devstral 2 123B Instruct (2512) is an agentic coding model.""",
|
||||
# tags=[],
|
||||
# metadata=ModelMetadata(
|
||||
# model_id=ModelId("mlx-community/Devstral-2-123B-Instruct-2512-8bit"),
|
||||
# pretty_name="Devstral 2 123B Instruct 2512 (8-bit, MLX)",
|
||||
# storage_size=Memory.from_kb(133_000_000),
|
||||
# n_layers=88,
|
||||
# hidden_size=12288,
|
||||
# supports_tensor=True,
|
||||
# ),
|
||||
# ),
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user