mirror of
https://github.com/exo-explore/exo.git
synced 2025-12-23 22:27:50 -05:00
kimi k2 thinking
This commit is contained in:
33
.github/benchmark-dashboard/index.html
vendored
33
.github/benchmark-dashboard/index.html
vendored
@@ -586,8 +586,37 @@
|
|||||||
const modelIds = cluster.model_ids || ['unknown'];
|
const modelIds = cluster.model_ids || ['unknown'];
|
||||||
const modelName = modelIds.length === 1 ? modelIds[0] : `${modelIds.length} models`;
|
const modelName = modelIds.length === 1 ? modelIds[0] : `${modelIds.length} models`;
|
||||||
|
|
||||||
// Get strategy (default to 'N/A' if not specified)
|
// Get strategy (backwards compatible with old format)
|
||||||
const strategy = cluster.strategy || 'N/A';
|
// New format: sharding + instance_meta, e.g. "Pipeline (MLX Ring)"
|
||||||
|
// Old format: strategy field
|
||||||
|
let strategy = 'N/A';
|
||||||
|
if (cluster.strategy) {
|
||||||
|
// Backwards compatibility: use old strategy field
|
||||||
|
strategy = cluster.strategy;
|
||||||
|
} else if (cluster.sharding || cluster.instance_meta) {
|
||||||
|
// New format: combine sharding and instance_meta
|
||||||
|
const sharding = cluster.sharding || '';
|
||||||
|
const instanceMeta = cluster.instance_meta || '';
|
||||||
|
|
||||||
|
// Format instance_meta: convert camelCase/PascalCase to readable format
|
||||||
|
const formatInstanceMeta = (meta) => {
|
||||||
|
if (!meta) return '';
|
||||||
|
// Insert spaces before capital letters and handle common acronyms
|
||||||
|
return meta
|
||||||
|
.replace(/([A-Z])/g, ' $1')
|
||||||
|
.trim()
|
||||||
|
.replace(/\bMlx\b/g, 'MLX')
|
||||||
|
.replace(/\bIbv\b/g, 'IBV');
|
||||||
|
};
|
||||||
|
|
||||||
|
if (sharding && instanceMeta) {
|
||||||
|
strategy = `${sharding} (${formatInstanceMeta(instanceMeta)})`;
|
||||||
|
} else if (sharding) {
|
||||||
|
strategy = sharding;
|
||||||
|
} else if (instanceMeta) {
|
||||||
|
strategy = formatInstanceMeta(instanceMeta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// For each stage in the configuration, create a row
|
// For each stage in the configuration, create a row
|
||||||
stages.forEach((stageConfig, stageIdx) => {
|
stages.forEach((stageConfig, stageIdx) => {
|
||||||
|
|||||||
79
.github/configs/bench_simple.yaml
vendored
79
.github/configs/bench_simple.yaml
vendored
@@ -4,7 +4,7 @@
|
|||||||
# Hardware configuration - maps runner labels to instance counts
|
# Hardware configuration - maps runner labels to instance counts
|
||||||
hardware_plan:
|
hardware_plan:
|
||||||
puffin4: 1
|
puffin4: 1
|
||||||
# puffin8: 1
|
puffin8: 1
|
||||||
|
|
||||||
# Environment variables to set on each node
|
# Environment variables to set on each node
|
||||||
environment:
|
environment:
|
||||||
@@ -18,14 +18,15 @@ timeout_seconds: 1800
|
|||||||
# Model instances to run concurrently
|
# Model instances to run concurrently
|
||||||
model_ids:
|
model_ids:
|
||||||
# - "mlx-community/DeepSeek-V3.1-8bit"
|
# - "mlx-community/DeepSeek-V3.1-8bit"
|
||||||
- "mlx-community/Kimi-K2-Instruct-4bit"
|
# - "mlx-community/Kimi-K2-Instruct-4bit"
|
||||||
|
- "mlx-community/Kimi-K2-Thinking"
|
||||||
# - "mlx-community/Qwen3-235B-A22B-4bit"
|
# - "mlx-community/Qwen3-235B-A22B-4bit"
|
||||||
# - "mlx-community/Llama-3.3-70B-Instruct-4bit"
|
# - "mlx-community/Llama-3.3-70B-Instruct-4bit"
|
||||||
# - "mlx-community/Llama-3.3-70B-Instruct-8bit"
|
# - "mlx-community/Llama-3.3-70B-Instruct-8bit"
|
||||||
# - "mlx-community/Llama-3.2-1B-Instruct-4bit"
|
# - "mlx-community/Llama-3.2-1B-Instruct-4bit"
|
||||||
|
|
||||||
# Sharding strategy: "Pipeline" or "Tensor"
|
# Sharding strategy: "Pipeline" or "Tensor"
|
||||||
sharding: "Tensor"
|
sharding: "Pipeline"
|
||||||
|
|
||||||
# Instance type: "MlxRing" or "MlxIbv"
|
# Instance type: "MlxRing" or "MlxIbv"
|
||||||
instance_meta: "MlxIbv"
|
instance_meta: "MlxIbv"
|
||||||
@@ -46,62 +47,62 @@ stages:
|
|||||||
prompt_length: 64
|
prompt_length: 64
|
||||||
generation_length: 64
|
generation_length: 64
|
||||||
time_between_requests: 2.0
|
time_between_requests: 2.0
|
||||||
iterations: 10
|
iterations: 5
|
||||||
- name: "pp64_g512"
|
# - name: "pp64_g512"
|
||||||
prompt_length: 64
|
# prompt_length: 64
|
||||||
generation_length: 512
|
# generation_length: 512
|
||||||
time_between_requests: 2.0
|
# time_between_requests: 2.0
|
||||||
iterations: 10
|
# iterations: 10
|
||||||
- name: "pp256_g64"
|
- name: "pp256_g64"
|
||||||
prompt_length: 256
|
prompt_length: 256
|
||||||
generation_length: 64
|
generation_length: 64
|
||||||
time_between_requests: 2.0
|
time_between_requests: 2.0
|
||||||
iterations: 10
|
iterations: 5
|
||||||
- name: "pp256_g512"
|
# - name: "pp256_g512"
|
||||||
prompt_length: 256
|
# prompt_length: 256
|
||||||
generation_length: 512
|
# generation_length: 512
|
||||||
time_between_requests: 2.0
|
# time_between_requests: 2.0
|
||||||
iterations: 10
|
# iterations: 10
|
||||||
- name: "pp1024_g64"
|
- name: "pp1024_g64"
|
||||||
prompt_length: 1024
|
prompt_length: 1024
|
||||||
generation_length: 64
|
generation_length: 64
|
||||||
time_between_requests: 2.0
|
time_between_requests: 2.0
|
||||||
iterations: 10
|
iterations: 5
|
||||||
- name: "pp1024_g512"
|
# - name: "pp1024_g512"
|
||||||
prompt_length: 1024
|
# prompt_length: 1024
|
||||||
generation_length: 512
|
# generation_length: 512
|
||||||
time_between_requests: 2.0
|
# time_between_requests: 2.0
|
||||||
iterations: 10
|
# iterations: 10
|
||||||
- name: "pp2048_g64"
|
- name: "pp2048_g64"
|
||||||
prompt_length: 2048
|
prompt_length: 2048
|
||||||
generation_length: 64
|
generation_length: 64
|
||||||
time_between_requests: 2.0
|
time_between_requests: 2.0
|
||||||
iterations: 10
|
iterations: 5
|
||||||
- name: "pp2048_g512"
|
# - name: "pp2048_g512"
|
||||||
prompt_length: 2048
|
# prompt_length: 2048
|
||||||
generation_length: 512
|
# generation_length: 512
|
||||||
time_between_requests: 2.0
|
# time_between_requests: 2.0
|
||||||
iterations: 10
|
# iterations: 10
|
||||||
- name: "pp4096_g64"
|
- name: "pp4096_g64"
|
||||||
prompt_length: 4096
|
prompt_length: 4096
|
||||||
generation_length: 64
|
generation_length: 64
|
||||||
time_between_requests: 2.0
|
time_between_requests: 2.0
|
||||||
iterations: 10
|
iterations: 5
|
||||||
- name: "pp4096_g512"
|
# - name: "pp4096_g512"
|
||||||
prompt_length: 4096
|
# prompt_length: 4096
|
||||||
generation_length: 512
|
# generation_length: 512
|
||||||
time_between_requests: 2.0
|
# time_between_requests: 2.0
|
||||||
iterations: 10
|
# iterations: 10
|
||||||
- name: "pp8192_g64"
|
- name: "pp8192_g64"
|
||||||
prompt_length: 8192
|
prompt_length: 8192
|
||||||
generation_length: 64
|
generation_length: 64
|
||||||
time_between_requests: 2.0
|
time_between_requests: 2.0
|
||||||
iterations: 10
|
iterations: 5
|
||||||
- name: "pp8192_g512"
|
# - name: "pp8192_g512"
|
||||||
prompt_length: 8192
|
# prompt_length: 8192
|
||||||
generation_length: 512
|
# generation_length: 512
|
||||||
time_between_requests: 2.0
|
# time_between_requests: 2.0
|
||||||
iterations: 10
|
# iterations: 10
|
||||||
# - name: "pp16384_g64"
|
# - name: "pp16384_g64"
|
||||||
# prompt_length: 16384
|
# prompt_length: 16384
|
||||||
# generation_length: 64
|
# generation_length: 64
|
||||||
|
|||||||
1
TODO.md
1
TODO.md
@@ -19,6 +19,7 @@
|
|||||||
21. Make two separate things: tensor or pipeline, and ring or ibv.
|
21. Make two separate things: tensor or pipeline, and ring or ibv.
|
||||||
22. When downloading for the first time, stuff times out and I think the model never ends up actually loading into memory, or something.
|
22. When downloading for the first time, stuff times out and I think the model never ends up actually loading into memory, or something.
|
||||||
23. Do we need cache_limit? We went back and forth on that a lot because we thought it might be causing issues. One problem is it sets it relative to model size. So if you have multiple models loaded in it will take the most recent model size for the cache_limit. This is problematic if you launch DeepSeek -> Llama for example.
|
23. Do we need cache_limit? We went back and forth on that a lot because we thought it might be causing issues. One problem is it sets it relative to model size. So if you have multiple models loaded in it will take the most recent model size for the cache_limit. This is problematic if you launch DeepSeek -> Llama for example.
|
||||||
|
24. Task cancellation. When API http request gets cancelled, it should cancel corresponding task.
|
||||||
|
|
||||||
Potential refactors:
|
Potential refactors:
|
||||||
|
|
||||||
|
|||||||
@@ -36,6 +36,7 @@ dependencies = [
|
|||||||
"bidict>=0.23.1",
|
"bidict>=0.23.1",
|
||||||
"mlx>=0.29.3",
|
"mlx>=0.29.3",
|
||||||
"mlx-lm>=0.28.3",
|
"mlx-lm>=0.28.3",
|
||||||
|
"tiktoken>=0.12.0", # required for kimi k2 tokenizer
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ from functools import partial
|
|||||||
from inspect import signature
|
from inspect import signature
|
||||||
from typing import TYPE_CHECKING, Callable, Protocol, cast, override
|
from typing import TYPE_CHECKING, Callable, Protocol, cast, override
|
||||||
|
|
||||||
from mlx_lm.models.cache import KVCache
|
from mlx_lm.models.cache import KVCache, RotatingKVCache
|
||||||
from mlx_lm.models.deepseek_v3 import DeepseekV3MLP
|
from mlx_lm.models.deepseek_v3 import DeepseekV3MLP
|
||||||
from mlx_lm.models.deepseek_v3 import Model as DeepseekV3Model
|
from mlx_lm.models.deepseek_v3 import Model as DeepseekV3Model
|
||||||
from mlx_lm.models.llama import Model as LlamaModel
|
from mlx_lm.models.llama import Model as LlamaModel
|
||||||
@@ -92,7 +92,7 @@ class PipelineLastLayer(CustomMlxLayer):
|
|||||||
|
|
||||||
cache = self.original_layer_signature.bind_partial(x, *args, **kwargs).arguments.get("cache", None)
|
cache = self.original_layer_signature.bind_partial(x, *args, **kwargs).arguments.get("cache", None)
|
||||||
|
|
||||||
assert cache is None or isinstance(cache, KVCache)
|
assert cache is None or isinstance(cache, (KVCache, RotatingKVCache))
|
||||||
|
|
||||||
output: mx.array = self.original_layer(x, *args, **kwargs)
|
output: mx.array = self.original_layer(x, *args, **kwargs)
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import os
|
|||||||
import resource
|
import resource
|
||||||
from typing import Any, Callable, cast
|
from typing import Any, Callable, cast
|
||||||
|
|
||||||
from mlx_lm.models.cache import KVCache
|
from mlx_lm.models.cache import KVCache, RotatingKVCache
|
||||||
from mlx_lm.sample_utils import make_sampler
|
from mlx_lm.sample_utils import make_sampler
|
||||||
from mlx_lm.tokenizer_utils import TokenizerWrapper
|
from mlx_lm.tokenizer_utils import TokenizerWrapper
|
||||||
|
|
||||||
@@ -254,9 +254,14 @@ class NullKVCache(KVCache):
|
|||||||
def make_kv_cache(
|
def make_kv_cache(
|
||||||
model: Model,
|
model: Model,
|
||||||
max_kv_size: int | None = None,
|
max_kv_size: int | None = None,
|
||||||
) -> list[KVCache]:
|
) -> list[KVCache | RotatingKVCache]:
|
||||||
assert hasattr(model, "layers")
|
assert hasattr(model, "layers")
|
||||||
return [KVCache() for _ in model.layers]
|
if max_kv_size is None:
|
||||||
|
logger.info("Using default KV cache")
|
||||||
|
return [KVCache() for _ in model.layers]
|
||||||
|
else:
|
||||||
|
logger.info(f"Using rotating KV cache with {max_kv_size=}")
|
||||||
|
return [RotatingKVCache(max_size=max_kv_size) for _ in model.layers]
|
||||||
|
|
||||||
|
|
||||||
def mlx_force_oom(size: int = 40000) -> None:
|
def mlx_force_oom(size: int = 40000) -> None:
|
||||||
|
|||||||
@@ -215,7 +215,7 @@ class API:
|
|||||||
while not finished:
|
while not finished:
|
||||||
# TODO: how long should this timeout be?
|
# TODO: how long should this timeout be?
|
||||||
chunk = await asyncio.wait_for(
|
chunk = await asyncio.wait_for(
|
||||||
self._chat_completion_queues[command_id].get(), timeout=60
|
self._chat_completion_queues[command_id].get(), timeout=600
|
||||||
)
|
)
|
||||||
assert isinstance(chunk, TokenChunk)
|
assert isinstance(chunk, TokenChunk)
|
||||||
chunk_response: ChatCompletionResponse = chunk_to_response(
|
chunk_response: ChatCompletionResponse = chunk_to_response(
|
||||||
|
|||||||
@@ -93,6 +93,7 @@ MODEL_CARDS: dict[str, ModelCard] = {
|
|||||||
n_layers=61,
|
n_layers=61,
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
|
# kimi k2
|
||||||
"kimi-k2-instruct-4bit": ModelCard(
|
"kimi-k2-instruct-4bit": ModelCard(
|
||||||
short_id="kimi-k2-instruct-4bit",
|
short_id="kimi-k2-instruct-4bit",
|
||||||
model_id="mlx-community/Kimi-K2-Instruct-4bit",
|
model_id="mlx-community/Kimi-K2-Instruct-4bit",
|
||||||
@@ -106,6 +107,19 @@ MODEL_CARDS: dict[str, ModelCard] = {
|
|||||||
n_layers=61,
|
n_layers=61,
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
|
"kimi-k2-thinking": ModelCard(
|
||||||
|
short_id="kimi-k2-thinking",
|
||||||
|
model_id="mlx-community/Kimi-K2-Thinking",
|
||||||
|
name="Kimi K2 Thinking",
|
||||||
|
description="""Kimi K2 Thinking is the latest, most capable version of open-source thinking model.""",
|
||||||
|
tags=[],
|
||||||
|
metadata=ModelMetadata(
|
||||||
|
model_id=ModelId("mlx-community/Kimi-K2-Thinking"),
|
||||||
|
pretty_name="Kimi K2 Thinking",
|
||||||
|
storage_size=Memory.from_bytes(577597603840),
|
||||||
|
n_layers=61,
|
||||||
|
),
|
||||||
|
),
|
||||||
# llama-3.1
|
# llama-3.1
|
||||||
"llama-3.1-8b": ModelCard(
|
"llama-3.1-8b": ModelCard(
|
||||||
short_id="llama-3.1-8b",
|
short_id="llama-3.1-8b",
|
||||||
|
|||||||
40
uv.lock
generated
40
uv.lock
generated
@@ -1,5 +1,5 @@
|
|||||||
version = 1
|
version = 1
|
||||||
revision = 3
|
revision = 1
|
||||||
requires-python = ">=3.13"
|
requires-python = ">=3.13"
|
||||||
resolution-markers = [
|
resolution-markers = [
|
||||||
"sys_platform == 'darwin'",
|
"sys_platform == 'darwin'",
|
||||||
@@ -361,6 +361,7 @@ dependencies = [
|
|||||||
{ name = "sqlalchemy", extra = ["asyncio"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "sqlalchemy", extra = ["asyncio"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "sqlmodel", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "sqlmodel", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "textual", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "textual", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
|
{ name = "tiktoken", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "transformers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "transformers", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "typeguard", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "typeguard", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
{ name = "types-aiofiles", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
{ name = "types-aiofiles", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
@@ -403,6 +404,7 @@ requires-dist = [
|
|||||||
{ name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.43" },
|
{ name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.43" },
|
||||||
{ name = "sqlmodel", specifier = ">=0.0.24" },
|
{ name = "sqlmodel", specifier = ">=0.0.24" },
|
||||||
{ name = "textual", specifier = ">=5.3.0" },
|
{ name = "textual", specifier = ">=5.3.0" },
|
||||||
|
{ name = "tiktoken", specifier = ">=0.12.0" },
|
||||||
{ name = "transformers", specifier = ">=4.55.2" },
|
{ name = "transformers", specifier = ">=4.55.2" },
|
||||||
{ name = "typeguard", specifier = ">=4.4.4" },
|
{ name = "typeguard", specifier = ">=4.4.4" },
|
||||||
{ name = "types-aiofiles", specifier = ">=24.1.0.20250708" },
|
{ name = "types-aiofiles", specifier = ">=24.1.0.20250708" },
|
||||||
@@ -1458,6 +1460,42 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/42/37/1deba011782a49ea249c73adcf703a39b0249ac9b0e17d1a2e4074df8d57/textual-6.5.0-py3-none-any.whl", hash = "sha256:c5505be7fe606b8054fb88431279885f88352bddca64832f6acd293ef7d9b54f", size = 711848, upload-time = "2025-10-31T17:21:51.134Z" },
|
{ url = "https://files.pythonhosted.org/packages/42/37/1deba011782a49ea249c73adcf703a39b0249ac9b0e17d1a2e4074df8d57/textual-6.5.0-py3-none-any.whl", hash = "sha256:c5505be7fe606b8054fb88431279885f88352bddca64832f6acd293ef7d9b54f", size = 711848, upload-time = "2025-10-31T17:21:51.134Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tiktoken"
|
||||||
|
version = "0.12.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "regex", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
|
{ name = "requests", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/7d/ab/4d017d0f76ec3171d469d80fc03dfbb4e48a4bcaddaa831b31d526f05edc/tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931", size = 37806 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/00/61/441588ee21e6b5cdf59d6870f86beb9789e532ee9718c251b391b70c68d6/tiktoken-0.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:775c2c55de2310cc1bc9a3ad8826761cbdc87770e586fd7b6da7d4589e13dab3", size = 1050802 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/1f/05/dcf94486d5c5c8d34496abe271ac76c5b785507c8eae71b3708f1ad9b45a/tiktoken-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a01b12f69052fbe4b080a2cfb867c4de12c704b56178edf1d1d7b273561db160", size = 993995 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/a0/70/5163fe5359b943f8db9946b62f19be2305de8c3d78a16f629d4165e2f40e/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:01d99484dc93b129cd0964f9d34eee953f2737301f18b3c7257bf368d7615baa", size = 1128948 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/0c/da/c028aa0babf77315e1cef357d4d768800c5f8a6de04d0eac0f377cb619fa/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4a1a4fcd021f022bfc81904a911d3df0f6543b9e7627b51411da75ff2fe7a1be", size = 1151986 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/a0/5a/886b108b766aa53e295f7216b509be95eb7d60b166049ce2c58416b25f2a/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:981a81e39812d57031efdc9ec59fa32b2a5a5524d20d4776574c4b4bd2e9014a", size = 1194222 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f4/f8/4db272048397636ac7a078d22773dd2795b1becee7bc4922fe6207288d57/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9baf52f84a3f42eef3ff4e754a0db79a13a27921b457ca9832cf944c6be4f8f3", size = 1255097 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ce/76/994fc868f88e016e6d05b0da5ac24582a14c47893f4474c3e9744283f1d5/tiktoken-0.12.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d5f89ea5680066b68bcb797ae85219c72916c922ef0fcdd3480c7d2315ffff16", size = 1050309 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f6/b8/57ef1456504c43a849821920d582a738a461b76a047f352f18c0b26c6516/tiktoken-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b4e7ed1c6a7a8a60a3230965bdedba8cc58f68926b835e519341413370e0399a", size = 993712 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/72/90/13da56f664286ffbae9dbcfadcc625439142675845baa62715e49b87b68b/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:fc530a28591a2d74bce821d10b418b26a094bf33839e69042a6e86ddb7a7fb27", size = 1128725 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/05/df/4f80030d44682235bdaecd7346c90f67ae87ec8f3df4a3442cb53834f7e4/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:06a9f4f49884139013b138920a4c393aa6556b2f8f536345f11819389c703ebb", size = 1151875 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/22/1f/ae535223a8c4ef4c0c1192e3f9b82da660be9eb66b9279e95c99288e9dab/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:04f0e6a985d95913cabc96a741c5ffec525a2c72e9df086ff17ebe35985c800e", size = 1194451 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/78/a7/f8ead382fce0243cb625c4f266e66c27f65ae65ee9e77f59ea1653b6d730/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0ee8f9ae00c41770b5f9b0bb1235474768884ae157de3beb5439ca0fd70f3e25", size = 1253794 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/72/05/3abc1db5d2c9aadc4d2c76fa5640134e475e58d9fbb82b5c535dc0de9b01/tiktoken-0.12.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a90388128df3b3abeb2bfd1895b0681412a8d7dc644142519e6f0a97c2111646", size = 1050188 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/e3/7b/50c2f060412202d6c95f32b20755c7a6273543b125c0985d6fa9465105af/tiktoken-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:da900aa0ad52247d8794e307d6446bd3cdea8e192769b56276695d34d2c9aa88", size = 993978 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/14/27/bf795595a2b897e271771cd31cb847d479073497344c637966bdf2853da1/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:285ba9d73ea0d6171e7f9407039a290ca77efcdb026be7769dccc01d2c8d7fff", size = 1129271 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f5/de/9341a6d7a8f1b448573bbf3425fa57669ac58258a667eb48a25dfe916d70/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:d186a5c60c6a0213f04a7a802264083dea1bbde92a2d4c7069e1a56630aef830", size = 1151216 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/75/0d/881866647b8d1be4d67cb24e50d0c26f9f807f994aa1510cb9ba2fe5f612/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:604831189bd05480f2b885ecd2d1986dc7686f609de48208ebbbddeea071fc0b", size = 1194860 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/b3/1e/b651ec3059474dab649b8d5b69f5c65cd8fcd8918568c1935bd4136c9392/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8f317e8530bb3a222547b85a58583238c8f74fd7a7408305f9f63246d1a0958b", size = 1254567 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ac/a4/72eed53e8976a099539cdd5eb36f241987212c29629d0a52c305173e0a68/tiktoken-0.12.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2c714c72bc00a38ca969dae79e8266ddec999c7ceccd603cc4f0d04ccd76365", size = 1050473 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/e6/d7/0110b8f54c008466b19672c615f2168896b83706a6611ba6e47313dbc6e9/tiktoken-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cbb9a3ba275165a2cb0f9a83f5d7025afe6b9d0ab01a22b50f0e74fee2ad253e", size = 993855 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/5f/77/4f268c41a3957c418b084dd576ea2fad2e95da0d8e1ab705372892c2ca22/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:dfdfaa5ffff8993a3af94d1125870b1d27aed7cb97aa7eb8c1cefdbc87dbee63", size = 1129022 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/4e/2b/fc46c90fe5028bd094cd6ee25a7db321cb91d45dc87531e2bdbb26b4867a/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:584c3ad3d0c74f5269906eb8a659c8bfc6144a52895d9261cdaf90a0ae5f4de0", size = 1150736 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/28/c0/3c7a39ff68022ddfd7d93f3337ad90389a342f761c4d71de99a3ccc57857/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54c891b416a0e36b8e2045b12b33dd66fb34a4fe7965565f1b482da50da3e86a", size = 1194908 },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ab/0d/c1ad6f4016a3968c048545f5d9b8ffebf577774b2ede3e2e352553b685fe/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5edb8743b88d5be814b1a8a8854494719080c28faaa1ccbef02e87354fe71ef0", size = 1253706 },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokenizers"
|
name = "tokenizers"
|
||||||
version = "0.22.1"
|
version = "0.22.1"
|
||||||
|
|||||||
Reference in New Issue
Block a user