mirror of
https://github.com/exo-explore/exo.git
synced 2026-02-27 11:46:14 -05:00
Compare commits
1 Commits
main
...
JakeHillio
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bdd6594bc8 |
35
.github/renovate.json
vendored
Normal file
35
.github/renovate.json
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
|
||||
"extends": [
|
||||
"config:recommended"
|
||||
],
|
||||
"dependencyDashboard": true,
|
||||
"customManagers": [
|
||||
{
|
||||
"customType": "regex",
|
||||
"description": "Pin HuggingFace model revisions to commit SHAs",
|
||||
"managerFilePatterns": [
|
||||
"^resources/inference_model_cards/.*\\.toml$",
|
||||
"^resources/image_model_cards/.*\\.toml$"
|
||||
],
|
||||
"matchStrings": [
|
||||
"model_id = \"(?<packageName>[^\"]+)\"(?:\\n|\\r\\n?)revision = \"(?<currentValue>[^\"]+)\""
|
||||
],
|
||||
"datasourceTemplate": "git-refs",
|
||||
"depNameTemplate": "huggingface.co/{{packageName}}",
|
||||
"currentValueTemplate": "main",
|
||||
"versioningTemplate": "git"
|
||||
}
|
||||
],
|
||||
"packageRules": [
|
||||
{
|
||||
"matchDatasources": ["git-refs"],
|
||||
"matchPackageNames": ["huggingface.co/**"],
|
||||
"groupName": null,
|
||||
"automerge": false,
|
||||
"prTitle": "chore: pin {{depName}} to {{newDigest}}",
|
||||
"commitMessageTopic": "{{depName}} revision",
|
||||
"commitMessageExtra": "to {{newDigest}}"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "exolabs/FLUX.1-Kontext-dev-4bit"
|
||||
revision = "4730d16f5f45143bab61f1cbf963d479f205e360"
|
||||
n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "exolabs/FLUX.1-Kontext-dev-8bit"
|
||||
revision = "cbf01164d429932b260d91872d62a7a4fe2634fa"
|
||||
n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "exolabs/FLUX.1-Kontext-dev"
|
||||
revision = "76e13736ad51f8dd8259a336dc087f8d3c7b819e"
|
||||
n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "exolabs/FLUX.1-Krea-dev-4bit"
|
||||
revision = "880ebf331481b566f2019a88dace35279c4bcce8"
|
||||
n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "exolabs/FLUX.1-Krea-dev-8bit"
|
||||
revision = "bb45e8d78959e51d42a1d424f44a112adabd5cbc"
|
||||
n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "exolabs/FLUX.1-Krea-dev"
|
||||
revision = "2e3d8c5ebc737af82d1f4c669f99bacc72b09d6b"
|
||||
n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "exolabs/FLUX.1-dev-4bit"
|
||||
revision = "b96c650a7b2c57484e2df51d1fafcb4cb31b1060"
|
||||
n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "exolabs/FLUX.1-dev-8bit"
|
||||
revision = "aff4dbb9efca28c2cb809b7814a92dc5de0533a4"
|
||||
n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "exolabs/FLUX.1-dev"
|
||||
revision = "d5bf931a451025fd4e152be685d0e05f50324388"
|
||||
n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "exolabs/FLUX.1-schnell-4bit"
|
||||
revision = "9eaa004ace32efb5b45b17f128d493ac614e8985"
|
||||
n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "exolabs/FLUX.1-schnell-8bit"
|
||||
revision = "b829443fca09b0abcca3eb20821c8f54b307d119"
|
||||
n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "exolabs/FLUX.1-schnell"
|
||||
revision = "aedc677102a335e26774bea593b317d64c908a83"
|
||||
n_layers = 57
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "exolabs/Qwen-Image-4bit"
|
||||
revision = "b38178287165b1b4ecc206a41f9a55f4423a21b5"
|
||||
n_layers = 60
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "exolabs/Qwen-Image-8bit"
|
||||
revision = "617d15c93a317f4b5cebb766d4638775d002b380"
|
||||
n_layers = 60
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "exolabs/Qwen-Image-Edit-2509-4bit"
|
||||
revision = "ac25e6566c3a94e52d988b017acde9d70945109a"
|
||||
n_layers = 60
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "exolabs/Qwen-Image-Edit-2509-8bit"
|
||||
revision = "463547f285f8b6e5496347724a2b39a6d514ca76"
|
||||
n_layers = 60
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "exolabs/Qwen-Image-Edit-2509"
|
||||
revision = "05027449c3ccee1b1c6be3ba85278ae683add9b5"
|
||||
n_layers = 60
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "exolabs/Qwen-Image"
|
||||
revision = "e7990ef5392a17dc917578b6f4e43aad9ae93e7a"
|
||||
n_layers = 60
|
||||
hidden_size = 1
|
||||
supports_tensor = false
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/DeepSeek-V3.1-4bit"
|
||||
revision = "main"
|
||||
n_layers = 61
|
||||
hidden_size = 7168
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/DeepSeek-V3.1-8bit"
|
||||
revision = "cd6c63546a6d33a8cc75158dc60d1746787306ac"
|
||||
n_layers = 61
|
||||
hidden_size = 7168
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/GLM-4.5-Air-8bit"
|
||||
revision = "ca44c769a97034e91466f2a524b6ef2c28eb3c1f"
|
||||
n_layers = 46
|
||||
hidden_size = 4096
|
||||
supports_tensor = false
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/GLM-4.5-Air-bf16"
|
||||
revision = "1753b3269c9e3cb62ba3cbaf0ab6433d69784592"
|
||||
n_layers = 46
|
||||
hidden_size = 4096
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/GLM-4.7-4bit"
|
||||
revision = "0e9f6c4babaef5d5fd04c9efc8770ef234b6d576"
|
||||
n_layers = 91
|
||||
hidden_size = 5120
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/GLM-4.7-6bit"
|
||||
revision = "025456be149d69c6b2805914dcc0e4aa6307caf9"
|
||||
n_layers = 91
|
||||
hidden_size = 5120
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/GLM-4.7-8bit-gs32"
|
||||
revision = "65b39750987b4230754dc0becc66e42b4c9da07b"
|
||||
n_layers = 91
|
||||
hidden_size = 5120
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/GLM-4.7-Flash-4bit"
|
||||
revision = "1454cffb1a21737e162f508e5bc70be9def89276"
|
||||
n_layers = 47
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/GLM-4.7-Flash-5bit"
|
||||
revision = "4b35cbe614a5693f0d8978de8718efdbf06d5706"
|
||||
n_layers = 47
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/GLM-4.7-Flash-6bit"
|
||||
revision = "6a4b4e620a3a7c7759227d8905cf8293ab28bc54"
|
||||
n_layers = 47
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/GLM-4.7-Flash-8bit"
|
||||
revision = "b3a202c6df57f7297fb351486938952352dcd25a"
|
||||
n_layers = 47
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/GLM-5-8bit-MXFP8"
|
||||
revision = "aa833c40d178262d4ac8b92965807ef988e9340e"
|
||||
n_layers = 78
|
||||
hidden_size = 6144
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/GLM-5-MXFP4-Q8"
|
||||
revision = "41c00f3f30615c0759475497e919c1b515b3cdc0"
|
||||
n_layers = 78
|
||||
hidden_size = 6144
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/GLM-5"
|
||||
revision = "27d3ffdf48b063d00f0c3d49f9f8fab09609c275"
|
||||
n_layers = 78
|
||||
hidden_size = 6144
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Kimi-K2-Instruct-4bit"
|
||||
revision = "91fb4f9fd1de100104925196d62b8ee06fd2ad60"
|
||||
n_layers = 61
|
||||
hidden_size = 7168
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Kimi-K2-Thinking"
|
||||
revision = "035a0cdd221ae0dca6b03120e20704a251a7bc9b"
|
||||
n_layers = 61
|
||||
hidden_size = 7168
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Kimi-K2.5"
|
||||
revision = "351021afd838c866ce1a7374fce51d615773d2a8"
|
||||
n_layers = 61
|
||||
hidden_size = 7168
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Llama-3.2-1B-Instruct-4bit"
|
||||
revision = "08231374eeacb049a0eade7922910865b8fce912"
|
||||
n_layers = 16
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Llama-3.2-3B-Instruct-4bit"
|
||||
revision = "7f0dc925e0d0afb0322d96f9255cfddf2ba5636e"
|
||||
n_layers = 28
|
||||
hidden_size = 3072
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Llama-3.2-3B-Instruct-8bit"
|
||||
revision = "ff054899609078569493def2823f9acd2780c0c9"
|
||||
n_layers = 28
|
||||
hidden_size = 3072
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Llama-3.3-70B-Instruct-4bit"
|
||||
revision = "de2dfaf56839b7d0e834157d2401dee02726874d"
|
||||
n_layers = 80
|
||||
hidden_size = 8192
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Llama-3.3-70B-Instruct-8bit"
|
||||
revision = "c5bfd839cd4cda0e5a39a97e00218d9c56e468af"
|
||||
n_layers = 80
|
||||
hidden_size = 8192
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"
|
||||
revision = "7772c93cf077b642f5503dd8d763a4176d7d406c"
|
||||
n_layers = 80
|
||||
hidden_size = 8192
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"
|
||||
revision = "241a666dad6cb93c8ff213d39a7f34a36bf26db4"
|
||||
n_layers = 32
|
||||
hidden_size = 4096
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"
|
||||
revision = "142d428004044c37c441272c91316251d9aecc58"
|
||||
n_layers = 32
|
||||
hidden_size = 4096
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Meta-Llama-3.1-8B-Instruct-bf16"
|
||||
revision = "f8311090f9ee47782b6f094984a20c856eb841d6"
|
||||
n_layers = 32
|
||||
hidden_size = 4096
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/MiniMax-M2.1-3bit"
|
||||
revision = "472cd920149fc1200e6ef2a2efc35db91cf44111"
|
||||
n_layers = 61
|
||||
hidden_size = 3072
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/MiniMax-M2.1-8bit"
|
||||
revision = "3d779130c25f54aa9198da1c845d844de7acc086"
|
||||
n_layers = 61
|
||||
hidden_size = 3072
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/MiniMax-M2.5-4bit"
|
||||
revision = "36fb6facb4697ac2e6c4e88b600cd8601fb62f08"
|
||||
n_layers = 62
|
||||
hidden_size = 3072
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/MiniMax-M2.5-6bit"
|
||||
revision = "6294b58e9eff340c3556dc8aa3ed688e9dc428f8"
|
||||
n_layers = 62
|
||||
hidden_size = 3072
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/MiniMax-M2.5-8bit"
|
||||
revision = "26af8b335da2017182616067c9342940a1c1ae73"
|
||||
n_layers = 62
|
||||
hidden_size = 3072
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Qwen3-0.6B-4bit"
|
||||
revision = "73e3e38d981303bc594367cd910ea6eb48349da8"
|
||||
n_layers = 28
|
||||
hidden_size = 1024
|
||||
supports_tensor = false
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Qwen3-0.6B-8bit"
|
||||
revision = "11de96878523501bcaa86104e3c186de07ff9068"
|
||||
n_layers = 28
|
||||
hidden_size = 1024
|
||||
supports_tensor = false
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Qwen3-235B-A22B-Instruct-2507-4bit"
|
||||
revision = "4dbf8a62338880825560dff3f58f2e9f0c56210f"
|
||||
n_layers = 94
|
||||
hidden_size = 4096
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Qwen3-235B-A22B-Instruct-2507-8bit"
|
||||
revision = "97042893088decff8468f7729c1076dcad2f251b"
|
||||
n_layers = 94
|
||||
hidden_size = 4096
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Qwen3-30B-A3B-4bit"
|
||||
revision = "d388dead1515f5e085ef7a0431dd8fadf0886c57"
|
||||
n_layers = 48
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Qwen3-30B-A3B-8bit"
|
||||
revision = "7d5b2e500d961076e3c16d6bf957b9c36783b0f5"
|
||||
n_layers = 48
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Qwen3-Coder-480B-A35B-Instruct-4bit"
|
||||
revision = "ca8dbf41071f579fbe3260f20bbe1ab896f79031"
|
||||
n_layers = 62
|
||||
hidden_size = 6144
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Qwen3-Coder-480B-A35B-Instruct-8bit"
|
||||
revision = "b4b2d06d678ac2819da4c41618a36a2dc8eeec03"
|
||||
n_layers = 62
|
||||
hidden_size = 6144
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Qwen3-Coder-Next-4bit"
|
||||
revision = "7b9321eabb85ce79625cac3f61ea691e4ea984b5"
|
||||
n_layers = 48
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Qwen3-Coder-Next-5bit"
|
||||
revision = "1f3e27b1c376095ebf88a8037807c92784c25d66"
|
||||
n_layers = 48
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Qwen3-Coder-Next-6bit"
|
||||
revision = "9d12cc36cc6c386ffd04f7c8f0de6ccb29c5927e"
|
||||
n_layers = 48
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Qwen3-Coder-Next-8bit"
|
||||
revision = "6d3c664dc8539a711783391484fd6784c51fd8fa"
|
||||
n_layers = 48
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Qwen3-Coder-Next-bf16"
|
||||
revision = "83d523e8883faaea659705840ce3560472286d08"
|
||||
n_layers = 48
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Qwen3-Next-80B-A3B-Instruct-4bit"
|
||||
revision = "d8a069bfa8ae87d3d468412e1034acae19b5892b"
|
||||
n_layers = 48
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Qwen3-Next-80B-A3B-Instruct-8bit"
|
||||
revision = "fd52af0cc2a4a37b60904c4b0251255aa7d3dda2"
|
||||
n_layers = 48
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Qwen3-Next-80B-A3B-Thinking-4bit"
|
||||
revision = "9a2b46347bb170cb2924092175fa21554fe585a9"
|
||||
n_layers = 48
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Qwen3-Next-80B-A3B-Thinking-8bit"
|
||||
revision = "d093dbe8233828ca0cc420f75466133c542a1e96"
|
||||
n_layers = 48
|
||||
hidden_size = 2048
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Step-3.5-Flash-4bit"
|
||||
revision = "caad23b0411c27d08aa3967822e61e5efcdd175c"
|
||||
n_layers = 45
|
||||
hidden_size = 4096
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Step-3.5-Flash-6bit"
|
||||
revision = "78bbe35b6d9f0a4fa6425416aedea6021e4c40ec"
|
||||
n_layers = 45
|
||||
hidden_size = 4096
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/Step-3.5-Flash-8Bit"
|
||||
revision = "0db40758dfe577e0ce383c3562226c2fbeac1d8e"
|
||||
n_layers = 45
|
||||
hidden_size = 4096
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/gpt-oss-120b-MXFP4-Q8"
|
||||
revision = "81e5ac3ad0af6efb1298a8e8c7a10ed2990c137b"
|
||||
n_layers = 36
|
||||
hidden_size = 2880
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/gpt-oss-20b-MXFP4-Q8"
|
||||
revision = "9f9d50e7b3418526519c2e21306d1c381e9181b2"
|
||||
n_layers = 24
|
||||
hidden_size = 2880
|
||||
supports_tensor = true
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
model_id = "mlx-community/llama-3.3-70b-instruct-fp16"
|
||||
revision = "8103891b028a8933068e47751bc2acc10bb59aa2"
|
||||
n_layers = 80
|
||||
hidden_size = 8192
|
||||
supports_tensor = true
|
||||
|
||||
@@ -696,7 +696,8 @@ async def resolve_allow_patterns(shard: ShardMetadata) -> list[str]:
|
||||
# (iii) Tensor parallel requires all files.
|
||||
return ["*"]
|
||||
try:
|
||||
weight_map = await get_weight_map(str(shard.model_card.model_id))
|
||||
revision = shard.model_card.revision or "main"
|
||||
weight_map = await get_weight_map(str(shard.model_card.model_id), revision)
|
||||
return get_allow_patterns(weight_map, shard)
|
||||
except Exception:
|
||||
logger.error(f"Error getting weight map for {shard.model_card.model_id=}")
|
||||
@@ -730,7 +731,7 @@ async def download_shard(
|
||||
if not skip_download:
|
||||
logger.debug(f"Downloading {shard.model_card.model_id=}")
|
||||
|
||||
revision = "main"
|
||||
revision = shard.model_card.revision or "main"
|
||||
target_dir = await ensure_models_dir() / str(shard.model_card.model_id).replace(
|
||||
"/", "--"
|
||||
)
|
||||
|
||||
@@ -5,7 +5,7 @@ import aiofiles
|
||||
import aiofiles.os as aios
|
||||
import tomlkit
|
||||
from anyio import Path, open_file
|
||||
from huggingface_hub import model_info
|
||||
from huggingface_hub import model_info, repo_info
|
||||
from loguru import logger
|
||||
from pydantic import (
|
||||
AliasChoices,
|
||||
@@ -79,6 +79,7 @@ class ComponentInfo(CamelCaseModel):
|
||||
|
||||
class ModelCard(CamelCaseModel):
|
||||
model_id: ModelId
|
||||
revision: str | None = None
|
||||
storage_size: Memory
|
||||
n_layers: PositiveInt
|
||||
hidden_size: PositiveInt
|
||||
@@ -127,12 +128,17 @@ class ModelCard(CamelCaseModel):
|
||||
async def fetch_from_hf(model_id: ModelId) -> "ModelCard":
|
||||
"""Fetches storage size and number of layers for a Hugging Face model, returns Pydantic ModelMeta."""
|
||||
# TODO: failure if files do not exist
|
||||
config_data = await fetch_config_data(model_id)
|
||||
# Fetch repo info to get the latest commit SHA
|
||||
repo = repo_info(model_id, repo_type="model")
|
||||
revision = repo.sha
|
||||
|
||||
config_data = await fetch_config_data(model_id, revision)
|
||||
num_layers = config_data.layer_count
|
||||
mem_size_bytes = await fetch_safetensors_size(model_id)
|
||||
mem_size_bytes = await fetch_safetensors_size(model_id, revision)
|
||||
|
||||
mc = ModelCard(
|
||||
model_id=ModelId(model_id),
|
||||
revision=revision,
|
||||
storage_size=mem_size_bytes,
|
||||
n_layers=num_layers,
|
||||
hidden_size=config_data.hidden_size or 0,
|
||||
@@ -219,7 +225,9 @@ class ConfigData(BaseModel):
|
||||
return data
|
||||
|
||||
|
||||
async def fetch_config_data(model_id: ModelId) -> ConfigData:
|
||||
async def fetch_config_data(
|
||||
model_id: ModelId, revision: str | None = None
|
||||
) -> ConfigData:
|
||||
"""Downloads and parses config.json for a model."""
|
||||
from exo.download.download_utils import (
|
||||
download_file_with_retry,
|
||||
@@ -230,7 +238,7 @@ async def fetch_config_data(model_id: ModelId) -> ConfigData:
|
||||
await aios.makedirs(target_dir, exist_ok=True)
|
||||
config_path = await download_file_with_retry(
|
||||
model_id,
|
||||
"main",
|
||||
revision or "main",
|
||||
"config.json",
|
||||
target_dir,
|
||||
lambda curr_bytes, total_bytes, is_renamed: logger.debug(
|
||||
@@ -241,7 +249,9 @@ async def fetch_config_data(model_id: ModelId) -> ConfigData:
|
||||
return ConfigData.model_validate_json(await f.read())
|
||||
|
||||
|
||||
async def fetch_safetensors_size(model_id: ModelId) -> Memory:
|
||||
async def fetch_safetensors_size(
|
||||
model_id: ModelId, revision: str | None = None
|
||||
) -> Memory:
|
||||
"""Gets model size from safetensors index or falls back to HF API."""
|
||||
from exo.download.download_utils import (
|
||||
download_file_with_retry,
|
||||
@@ -253,7 +263,7 @@ async def fetch_safetensors_size(model_id: ModelId) -> Memory:
|
||||
await aios.makedirs(target_dir, exist_ok=True)
|
||||
index_path = await download_file_with_retry(
|
||||
model_id,
|
||||
"main",
|
||||
revision or "main",
|
||||
"model.safetensors.index.json",
|
||||
target_dir,
|
||||
lambda curr_bytes, total_bytes, is_renamed: logger.debug(
|
||||
|
||||
Reference in New Issue
Block a user