From 2f64e30dd18a14abbfa3ab5eae4cb600281bae5e Mon Sep 17 00:00:00 2001 From: Gelu Vrabie Date: Mon, 21 Jul 2025 14:10:29 +0100 Subject: [PATCH] Add sqlite connector Co-authored-by: Gelu Vrabie --- .clauderules | 63 ++++ shared/constants.py | 3 +- shared/db/__init__.py | 5 + shared/db/sqlite/__init__.py | 15 + shared/db/sqlite/config.py | 31 ++ shared/db/sqlite/connector.py | 242 +++++++++++++++ shared/db/sqlite/event_log_manager.py | 75 +++++ shared/db/sqlite/types.py | 66 +++++ shared/{openai.py => openai_compat.py} | 0 shared/pyproject.toml | 3 + shared/tests/__init__.py | 1 + shared/tests/conftest.py | 21 ++ shared/tests/test_sqlite_connector.py | 396 +++++++++++++++++++++++++ shared/types/events/chunks.py | 2 +- shared/types/worker/commands_runner.py | 2 +- uv.lock | 83 +++++- worker/runner/runner.py | 2 +- worker/tests/test_supervisor.py | 2 +- 18 files changed, 1004 insertions(+), 8 deletions(-) create mode 100644 .clauderules create mode 100644 shared/db/__init__.py create mode 100644 shared/db/sqlite/__init__.py create mode 100644 shared/db/sqlite/config.py create mode 100644 shared/db/sqlite/connector.py create mode 100644 shared/db/sqlite/event_log_manager.py create mode 100644 shared/db/sqlite/types.py rename shared/{openai.py => openai_compat.py} (100%) create mode 100644 shared/tests/__init__.py create mode 100644 shared/tests/conftest.py create mode 100644 shared/tests/test_sqlite_connector.py diff --git a/.clauderules b/.clauderules new file mode 100644 index 00000000..70101d61 --- /dev/null +++ b/.clauderules @@ -0,0 +1,63 @@ +# Claude Code Rules - Follow Every Rule Exactly + +You must prioritize straightforward code semantics, well-named types, clear function signatures, and robust, carefully-chosen abstractions. Think about how your decisions might impact these aspects of code quality before proposing any changes. + +You have access to all modern Python features from Python 3.13, 3.12, 3.11... + +**When you're done making changes, remove any redundant comments; remaining comments should only apply to complex code segments, adding relevant context.** + +## 1. Code Discipline + +* Eliminate superfluous `try`/`catch` and `if` branches through strict typing and static analysis. +* Use pure functions unless you must mutate fixed state—then wrap that state in a class. +* Every function is **referentially transparent**: same inputs ⇒ same outputs, no hidden state, no unintended I/O. +* Put side-effects in injectable "effect handlers"; keep core logic pure. + +## 2. Naming + +* Choose descriptive, non-abbreviated names—no 3-letter acronyms or non-standard contractions. +* Anyone reading a function's type signature alone should grasp its purpose without extra context. + +## 3. Typing + +* Maintain **strict, exhaustive** typing; never bypass the type-checker. +* Default to `Literal[...]` when an enum-like set is needed. +* Prefer built-in types; when two values share structure but differ in meaning, enforce separation: + * Use `typing.NewType` for primitives (zero runtime cost). + * For serializable objects, add a `type: str` field that states the object's identity. + +## 4. Pydantic + +* Read, respect, and rely on Pydantic documentation. +* Centralize a common `ConfigDict` with `frozen=True` and `strict=True` (or stricter) and reuse it everywhere. +* For hierarchies of `BaseModel` variants, declare a discriminated union with `typing.Annotated[Base, Field(discriminator='variant')]`; publish a single `TypeAdapter[Base]` so all variants share one strict validator. + +## 5. IDs & UUIDs + +* Subclass Pydantic's `UUID4` for custom ID types. +* Generate fresh IDs with `uuid.uuid4()`. +* Create idempotency keys by hashing *persisted* state plus a **function-specific salt** to avoid collisions after crashes. + +## 6. Error Handling + +* Catch an exception **only** where you can handle or transform it meaningfully. +* State in the docstring **where** each exception is expected to be handled and **why**. + +## 7. Dependencies + +* Introduce new external dependencies only after approval. +* Request only libraries common in production environments. + +## 8. Use of `@final` & Freezing + +* Mark classes, methods, and variables as `@final` or otherwise immutable wherever applicable. + +## 9. Repository Workflow + +If you spot a rule violation within code that you've not been asked to work on directly, inform the user rather than patching it ad-hoc. + +--- + +### One-Sentence Summary + +Write strictly-typed, pure, self-describing Python that uses Pydantic, well-scoped side-effects, immutable state, approved dependencies, and explicit error handling. \ No newline at end of file diff --git a/shared/constants.py b/shared/constants.py index a69b161a..8172da3a 100644 --- a/shared/constants.py +++ b/shared/constants.py @@ -2,7 +2,8 @@ import inspect from pathlib import Path EXO_HOME = Path.home() / ".exo" -EXO_EVENT_DB = EXO_HOME / "event_db.sqlite3" +EXO_GLOBAL_EVENT_DB = EXO_HOME / "global_events.db" +EXO_WORKER_EVENT_DB = EXO_HOME / "worker_events.db" EXO_MASTER_STATE = EXO_HOME / "master_state.json" EXO_WORKER_STATE = EXO_HOME / "worker_state.json" EXO_MASTER_LOG = EXO_HOME / "master.log" diff --git a/shared/db/__init__.py b/shared/db/__init__.py new file mode 100644 index 00000000..f7eb8bbc --- /dev/null +++ b/shared/db/__init__.py @@ -0,0 +1,5 @@ +"""Database implementations for event storage.""" + +from .sqlite import AsyncSQLiteEventStorage, EventStorageProtocol + +__all__ = ["AsyncSQLiteEventStorage", "EventStorageProtocol"] \ No newline at end of file diff --git a/shared/db/sqlite/__init__.py b/shared/db/sqlite/__init__.py new file mode 100644 index 00000000..abf926ff --- /dev/null +++ b/shared/db/sqlite/__init__.py @@ -0,0 +1,15 @@ +"""SQLite event storage implementation.""" + +from .config import EventLogConfig, EventLogType +from .connector import AsyncSQLiteEventStorage +from .event_log_manager import EventLogManager +from .types import EventStorageProtocol, StoredEvent + +__all__ = [ + "AsyncSQLiteEventStorage", + "EventLogConfig", + "EventLogManager", + "EventLogType", + "EventStorageProtocol", + "StoredEvent", +] \ No newline at end of file diff --git a/shared/db/sqlite/config.py b/shared/db/sqlite/config.py new file mode 100644 index 00000000..1294eb6d --- /dev/null +++ b/shared/db/sqlite/config.py @@ -0,0 +1,31 @@ +from enum import Enum +from pathlib import Path + +from pydantic import BaseModel + +from shared.constants import EXO_GLOBAL_EVENT_DB, EXO_WORKER_EVENT_DB + + +class EventLogType(str, Enum): + """Types of event logs in the system""" + WORKER_EVENTS = "worker_events" + GLOBAL_EVENTS = "global_events" + + +class EventLogConfig(BaseModel): + """Configuration for the event log system""" + + # Batch processing settings + batch_size: int = 100 + batch_timeout_ms: int = 100 + debounce_ms: int = 10 + max_age_ms: int = 100 + + def get_db_path(self, log_type: EventLogType) -> Path: + """Get the full path for a specific event log type""" + if log_type == EventLogType.WORKER_EVENTS: + return EXO_WORKER_EVENT_DB + elif log_type == EventLogType.GLOBAL_EVENTS: + return EXO_GLOBAL_EVENT_DB + else: + raise ValueError(f"Unknown log type: {log_type}") \ No newline at end of file diff --git a/shared/db/sqlite/connector.py b/shared/db/sqlite/connector.py new file mode 100644 index 00000000..199d2973 --- /dev/null +++ b/shared/db/sqlite/connector.py @@ -0,0 +1,242 @@ +import asyncio +import contextlib +import json +from asyncio import Queue, Task +from collections.abc import Sequence +from logging import Logger, getLogger +from pathlib import Path +from typing import Any, cast +from uuid import UUID + +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine +from sqlmodel import SQLModel + +from shared.types.events.common import ( + BaseEvent, + EventCategories, + EventFromEventLog, + NodeId, +) +from shared.types.events.registry import EventParser + +from .types import StoredEvent + + +class AsyncSQLiteEventStorage: + """High-performance SQLite event storage with async batching. + + Features: + - Non-blocking writes via adaptive async batching with debouncing + - Automatic sequence numbering using SQLite rowid + - Type-safe event serialization/deserialization + - Efficient indexing for common query patterns + + Batching behavior: + - Low load: Minimal latency via short debounce windows + - High load: Efficient batching up to batch_size limit + - Max age constraint prevents indefinite delays + """ + + def __init__( + self, + db_path: str | Path, + batch_size: int, + batch_timeout_ms: int, + debounce_ms: int, + max_age_ms: int, + logger: Logger | None = None + ): + self._db_path = Path(db_path) + self._batch_size = batch_size + self._batch_timeout_s = batch_timeout_ms / 1000.0 + self._debounce_s = debounce_ms / 1000.0 + self._max_age_s = max_age_ms / 1000.0 + self._logger = logger or getLogger(__name__) + + self._write_queue: Queue[tuple[BaseEvent[EventCategories], NodeId]] = Queue() + self._batch_writer_task: Task[None] | None = None + self._engine = None + self._closed = False + + async def start(self) -> None: + """Initialize the storage and start the batch writer.""" + if self._batch_writer_task is not None: + raise RuntimeError("Storage already started") + + # Create database and tables + await self._initialize_database() + + # Start batch writer + self._batch_writer_task = asyncio.create_task(self._batch_writer()) + self._logger.info(f"Started SQLite event storage: {self._db_path}") + + async def append_events( + self, + events: Sequence[BaseEvent[EventCategories]], + origin: NodeId + ) -> None: + """Append events to the log (fire-and-forget). The writes are batched and committed + in the background so readers don't have a guarantee of seeing events immediately.""" + if self._closed: + raise RuntimeError("Storage is closed") + + for event in events: + await self._write_queue.put((event, origin)) + + async def get_events_since( + self, + last_idx: int + ) -> Sequence[EventFromEventLog[EventCategories]]: + """Retrieve events after a specific index.""" + if self._closed: + raise RuntimeError("Storage is closed") + + assert self._engine is not None + + async with AsyncSession(self._engine) as session: + # Use raw SQL to get rowid along with the stored event data + result = await session.execute( + text("SELECT rowid, origin, event_data FROM events WHERE rowid > :last_idx ORDER BY rowid"), + {"last_idx": last_idx} + ) + rows = result.fetchall() + + events: list[EventFromEventLog[EventCategories]] = [] + for row in rows: + rowid: int = cast(int, row[0]) + origin: str = cast(str, row[1]) + # Parse JSON string to dict + raw_event_data = row[2] # type: ignore[reportAny] - SQLAlchemy result is Any + if isinstance(raw_event_data, str): + event_data: dict[str, Any] = cast(dict[str, Any], json.loads(raw_event_data)) + else: + event_data = cast(dict[str, Any], raw_event_data) + event = await self._deserialize_event(event_data) + events.append(EventFromEventLog( + event=event, + origin=NodeId(uuid=UUID(origin)), + idx_in_log=rowid # rowid becomes idx_in_log + )) + + return events + + async def close(self) -> None: + """Close the storage connection and cleanup resources.""" + if self._closed: + return + + self._closed = True + + # Stop batch writer + if self._batch_writer_task is not None: + self._batch_writer_task.cancel() + with contextlib.suppress(asyncio.CancelledError): + await self._batch_writer_task + + # Close database + if self._engine is not None: + await self._engine.dispose() + + self._logger.info("Closed SQLite event storage") + + async def _initialize_database(self) -> None: + """Initialize database connection and create tables.""" + self._engine = create_async_engine( + f"sqlite+aiosqlite:///{self._db_path}", + echo=False, + connect_args={ + "check_same_thread": False, + } + ) + + # Create tables using SQLModel + async with self._engine.begin() as conn: + await conn.run_sync(SQLModel.metadata.create_all) + + # Enable WAL mode and other optimizations + await conn.execute(text("PRAGMA journal_mode=WAL")) + await conn.execute(text("PRAGMA synchronous=NORMAL")) + await conn.execute(text("PRAGMA cache_size=10000")) + + async def _batch_writer(self) -> None: + """Background task that drains the queue and commits batches. + + Uses adaptive batching with debouncing: + - Blocks waiting for first item (no CPU waste when idle) + - Opens debounce window to collect more items + - Respects max age to prevent stale batches + - Resets debounce timer with each new item + """ + loop = asyncio.get_event_loop() + + while not self._closed: + batch: list[tuple[BaseEvent[EventCategories], NodeId]] = [] + + try: + # Block waiting for first item + event, origin = await self._write_queue.get() + batch.append((event, origin)) + first_ts = loop.time() # monotonic seconds + + # Open debounce window + while True: + # How much longer can we wait? + age_left = self._max_age_s - (loop.time() - first_ts) + if age_left <= 0: + break # max age reached → flush + + # Shrink the wait to honour both debounce and max-age + try: + event, origin = await asyncio.wait_for( + self._write_queue.get(), + timeout=min(self._debounce_s, age_left) + ) + batch.append((event, origin)) + + if len(batch) >= self._batch_size: + break # size cap reached → flush + # else: loop again, resetting debounce timer + except asyncio.TimeoutError: + break # debounce window closed → flush + + except asyncio.CancelledError: + # Drain any remaining items before exiting + if batch: + await self._commit_batch(batch) + raise + + if batch: + await self._commit_batch(batch) + + async def _commit_batch(self, batch: list[tuple[BaseEvent[EventCategories], NodeId]]) -> None: + """Commit a batch of events to SQLite.""" + assert self._engine is not None + + try: + async with AsyncSession(self._engine) as session: + for event, origin in batch: + stored_event = StoredEvent( + origin=str(origin.uuid), + event_type=event.event_type.value, + event_category=next(iter(event.event_category)).value, + event_id=str(event.event_id), + event_data=event.model_dump() # SQLModel handles JSON serialization automatically + ) + session.add(stored_event) + + await session.commit() + + self._logger.debug(f"Committed batch of {len(batch)} events") + + except Exception as e: + self._logger.error(f"Failed to commit batch: {e}") + raise + + async def _deserialize_event(self, event_data: dict[str, Any]) -> BaseEvent[EventCategories]: + """Deserialize event data back to typed Event.""" + return EventParser.validate_python(event_data) + + async def _deserialize_event_raw(self, event_data: dict[str, Any]) -> dict[str, Any]: + """Return raw event data for testing purposes.""" + return event_data diff --git a/shared/db/sqlite/event_log_manager.py b/shared/db/sqlite/event_log_manager.py new file mode 100644 index 00000000..a20f3eca --- /dev/null +++ b/shared/db/sqlite/event_log_manager.py @@ -0,0 +1,75 @@ +from logging import Logger +from typing import Dict + +from shared.constants import EXO_HOME +from shared.db.sqlite.config import EventLogConfig, EventLogType +from shared.db.sqlite.connector import AsyncSQLiteEventStorage + + +class EventLogManager: + """ + Manages both worker and global event log connectors. + Used by both master and worker processes with different access patterns: + + - Worker: writes to worker_events, tails global_events + - Master (elected): writes to global_events, tails global_events + - Master (replica): writes to worker_events, tails global_events + """ + + def __init__(self, config: EventLogConfig, logger: Logger): + self._config = config + self._logger = logger + self._connectors: Dict[EventLogType, AsyncSQLiteEventStorage] = {} + + # Ensure base directory exists + EXO_HOME.mkdir(parents=True, exist_ok=True) + + async def initialize(self) -> None: + """Initialize both connectors - call this during startup""" + # Both master and worker need both connectors + await self.get_connector(EventLogType.WORKER_EVENTS) + await self.get_connector(EventLogType.GLOBAL_EVENTS) + self._logger.info("Initialized all event log connectors") + + async def get_connector(self, log_type: EventLogType) -> AsyncSQLiteEventStorage: + """Get or create a connector for the specified log type""" + if log_type not in self._connectors: + db_path = self._config.get_db_path(log_type) + + connector = AsyncSQLiteEventStorage( + db_path=db_path, + batch_size=self._config.batch_size, + batch_timeout_ms=self._config.batch_timeout_ms, + debounce_ms=self._config.debounce_ms, + max_age_ms=self._config.max_age_ms, + logger=self._logger + ) + + # Start the connector (creates tables if needed) + await connector.start() + + self._connectors[log_type] = connector + self._logger.info(f"Initialized {log_type.value} connector at {db_path}") + + return self._connectors[log_type] + + @property + def worker_events(self) -> AsyncSQLiteEventStorage: + """Access worker events log (must call initialize() first)""" + if EventLogType.WORKER_EVENTS not in self._connectors: + raise RuntimeError("Event log manager not initialized. Call initialize() first.") + return self._connectors[EventLogType.WORKER_EVENTS] + + @property + def global_events(self) -> AsyncSQLiteEventStorage: + """Access global events log (must call initialize() first)""" + if EventLogType.GLOBAL_EVENTS not in self._connectors: + raise RuntimeError("Event log manager not initialized. Call initialize() first.") + return self._connectors[EventLogType.GLOBAL_EVENTS] + + async def close_all(self) -> None: + """Close all open connectors""" + for log_type, connector in self._connectors.items(): + await connector.close() + self._logger.info(f"Closed {log_type.value} connector") + self._connectors.clear() \ No newline at end of file diff --git a/shared/db/sqlite/types.py b/shared/db/sqlite/types.py new file mode 100644 index 00000000..4b623e0c --- /dev/null +++ b/shared/db/sqlite/types.py @@ -0,0 +1,66 @@ +from datetime import datetime, timezone +from typing import Any, Protocol, Sequence + +from sqlalchemy import DateTime, Index +from sqlmodel import JSON, Column, Field, SQLModel + +from shared.types.events.common import ( + BaseEvent, + EventCategories, + EventFromEventLog, + NodeId, +) + + +class StoredEvent(SQLModel, table=True): + """SQLite representation of an event in the event log. + + The rowid serves as the global sequence number (idx_in_log) for ordering. + """ + __tablename__ = "events" # type: ignore[assignment] + + # SQLite's rowid as primary key - we alias it but don't actually use it in queries + rowid: int | None = Field(default=None, primary_key=True, alias="rowid") + origin: str = Field(index=True) + event_type: str = Field(index=True) + event_category: str = Field(index=True) + event_id: str = Field(index=True) + event_data: dict[str, Any] = Field(sa_column=Column(JSON)) + created_at: datetime = Field( + default_factory=lambda: datetime.now(timezone.utc), + sa_column=Column(DateTime, index=True) + ) + + __table_args__ = ( + Index("idx_events_origin_created", "origin", "created_at"), + Index("idx_events_category_created", "event_category", "created_at"), + ) + +class EventStorageProtocol(Protocol): + """Protocol for event storage implementations.""" + + async def append_events( + self, + events: Sequence[BaseEvent[EventCategories]], + origin: NodeId + ) -> None: + """Append events to the log (fire-and-forget). + + Events are queued for batched writing and assigned idx_in_log + when committed to storage. + """ + ... + + async def get_events_since( + self, + last_idx: int + ) -> Sequence[EventFromEventLog[EventCategories]]: + """Retrieve events after a specific index. + + Returns events in idx_in_log order. + """ + ... + + async def close(self) -> None: + """Close the storage connection and cleanup resources.""" + ... \ No newline at end of file diff --git a/shared/openai.py b/shared/openai_compat.py similarity index 100% rename from shared/openai.py rename to shared/openai_compat.py diff --git a/shared/pyproject.toml b/shared/pyproject.toml index 6602478a..95a78f5c 100644 --- a/shared/pyproject.toml +++ b/shared/pyproject.toml @@ -5,6 +5,7 @@ description = "Shared utilities for the Exo project" readme = "README.md" requires-python = ">=3.13" dependencies = [ + "aiosqlite>=0.20.0", "networkx>=3.5", "openai>=1.93.0", "pathlib>=1.0.1", @@ -12,6 +13,8 @@ dependencies = [ "pydantic>=2.11.7", "rich>=14.0.0", "rustworkx>=0.16.0", + "sqlmodel>=0.0.22", + "sqlalchemy[asyncio]>=2.0.0", ] [build-system] diff --git a/shared/tests/__init__.py b/shared/tests/__init__.py new file mode 100644 index 00000000..e5374d95 --- /dev/null +++ b/shared/tests/__init__.py @@ -0,0 +1 @@ +# Test package for shared utilities \ No newline at end of file diff --git a/shared/tests/conftest.py b/shared/tests/conftest.py new file mode 100644 index 00000000..356e7951 --- /dev/null +++ b/shared/tests/conftest.py @@ -0,0 +1,21 @@ +"""Pytest configuration and shared fixtures for shared package tests.""" + +import asyncio +from typing import Generator + +import pytest + + +@pytest.fixture(scope="session") +def event_loop() -> Generator[asyncio.AbstractEventLoop, None, None]: + """Create an event loop for the test session.""" + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + yield loop + loop.close() + + +@pytest.fixture(autouse=True) +def reset_event_loop(): + """Reset the event loop for each test to ensure clean state.""" + # This ensures each test gets a fresh event loop state diff --git a/shared/tests/test_sqlite_connector.py b/shared/tests/test_sqlite_connector.py new file mode 100644 index 00000000..80e921ac --- /dev/null +++ b/shared/tests/test_sqlite_connector.py @@ -0,0 +1,396 @@ +import asyncio +import json +import tempfile +from pathlib import Path +from typing import Any, Generator, cast +from uuid import uuid4 + +import pytest +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncSession + +from shared.db.sqlite import AsyncSQLiteEventStorage, EventLogConfig +from shared.types.common import NodeId + +# Type ignore comment for all protected member access in this test file +# pyright: reportPrivateUsage=false + + +def _load_json_data(raw_data: str) -> dict[str, Any]: + """Helper function to load JSON data with proper typing.""" + return cast(dict[str, Any], json.loads(raw_data)) + + +@pytest.fixture +def temp_db_path() -> Generator[Path, None, None]: + """Create a temporary database file for testing.""" + with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: + yield Path(f.name) + # Cleanup + Path(f.name).unlink(missing_ok=True) + + +@pytest.fixture +def sample_node_id() -> NodeId: + """Create a sample NodeId for testing.""" + return NodeId(uuid=uuid4()) + + +class TestAsyncSQLiteEventStorage: + """Test suite for AsyncSQLiteEventStorage focused on storage functionality.""" + + @pytest.mark.asyncio + async def test_initialization_creates_tables(self, temp_db_path: Path) -> None: + """Test that database initialization creates the events table.""" + default_config = EventLogConfig() + storage = AsyncSQLiteEventStorage(db_path=temp_db_path, batch_size=default_config.batch_size, batch_timeout_ms=default_config.batch_timeout_ms, debounce_ms=default_config.debounce_ms, max_age_ms=default_config.max_age_ms) + await storage.start() + + # Verify table exists by querying directly + assert storage._engine is not None + async with AsyncSession(storage._engine) as session: + result = await session.execute(text("SELECT name FROM sqlite_master WHERE type='table' AND name='events'")) + tables = result.fetchall() + assert len(tables) == 1 + assert tables[0][0] == "events" + + await storage.close() + + @pytest.mark.asyncio + async def test_start_twice_raises_error(self, temp_db_path: Path) -> None: + """Test that starting storage twice raises an error.""" + default_config = EventLogConfig() + storage = AsyncSQLiteEventStorage(db_path=temp_db_path, batch_size=default_config.batch_size, batch_timeout_ms=default_config.batch_timeout_ms, debounce_ms=default_config.debounce_ms, max_age_ms=default_config.max_age_ms) + await storage.start() + + with pytest.raises(RuntimeError, match="Storage already started"): + await storage.start() + + await storage.close() + + @pytest.mark.asyncio + async def test_direct_database_operations(self, temp_db_path: Path, sample_node_id: NodeId) -> None: + """Test direct database operations without event parsing.""" + default_config = EventLogConfig() + storage = AsyncSQLiteEventStorage(db_path=temp_db_path, batch_size=default_config.batch_size, batch_timeout_ms=default_config.batch_timeout_ms, debounce_ms=default_config.debounce_ms, max_age_ms=default_config.max_age_ms) + await storage.start() + + # Insert test data directly + test_data = { + "event_type": "test_event", + "test_field": "test_value", + "number": 42 + } + + async with AsyncSession(storage._engine) as session: + await session.execute( + text("INSERT INTO events (origin, event_type, event_category, event_id, event_data) VALUES (:origin, :event_type, :event_category, :event_id, :event_data)"), + { + "origin": str(sample_node_id.uuid), + "event_type": "test_event", + "event_category": "test_category", + "event_id": str(uuid4()), + "event_data": json.dumps(test_data) + } + ) + await session.commit() + + # Query data back + assert storage._engine is not None + async with AsyncSession(storage._engine) as session: + result = await session.execute( + text("SELECT rowid, origin, event_data FROM events ORDER BY rowid") + ) + rows = result.fetchall() + + assert len(rows) == 1 + assert rows[0][0] == 1 # rowid + assert rows[0][1] == str(sample_node_id.uuid) # origin + raw_json = cast(str, rows[0][2]) + retrieved_data = _load_json_data(raw_json) + assert retrieved_data == test_data + + await storage.close() + + @pytest.mark.asyncio + async def test_rowid_auto_increment(self, temp_db_path: Path, sample_node_id: NodeId) -> None: + """Test that rowid auto-increments correctly.""" + default_config = EventLogConfig() + storage = AsyncSQLiteEventStorage(db_path=temp_db_path, batch_size=default_config.batch_size, batch_timeout_ms=default_config.batch_timeout_ms, debounce_ms=default_config.debounce_ms, max_age_ms=default_config.max_age_ms) + await storage.start() + + # Insert multiple records + test_records = [ + {"event_type": "test_event_1", "data": "first"}, + {"event_type": "test_event_2", "data": "second"}, + {"event_type": "test_event_3", "data": "third"} + ] + + assert storage._engine is not None + async with AsyncSession(storage._engine) as session: + for record in test_records: + await session.execute( + text("INSERT INTO events (origin, event_type, event_category, event_id, event_data) VALUES (:origin, :event_type, :event_category, :event_id, :event_data)"), + { + "origin": str(sample_node_id.uuid), + "event_type": record["event_type"], + "event_category": "test_category", + "event_id": str(uuid4()), + "event_data": json.dumps(record) + } + ) + await session.commit() + + # Query back and verify rowid sequence + assert storage._engine is not None + async with AsyncSession(storage._engine) as session: + result = await session.execute( + text("SELECT rowid, event_data FROM events ORDER BY rowid") + ) + rows = result.fetchall() + + assert len(rows) == 3 + for i, row in enumerate(rows): + assert row[0] == i + 1 # rowid starts at 1 + raw_json = cast(str, row[1]) + retrieved_data = _load_json_data(raw_json) + assert retrieved_data == test_records[i] + + await storage.close() + + @pytest.mark.asyncio + async def test_rowid_with_multiple_origins(self, temp_db_path: Path) -> None: + """Test rowid sequence across multiple origins.""" + default_config = EventLogConfig() + storage = AsyncSQLiteEventStorage(db_path=temp_db_path, batch_size=default_config.batch_size, batch_timeout_ms=default_config.batch_timeout_ms, debounce_ms=default_config.debounce_ms, max_age_ms=default_config.max_age_ms) + await storage.start() + + origin1 = NodeId(uuid=uuid4()) + origin2 = NodeId(uuid=uuid4()) + + # Insert interleaved records from different origins + assert storage._engine is not None + async with AsyncSession(storage._engine) as session: + # Origin 1 - record 1 + await session.execute( + text("INSERT INTO events (origin, event_type, event_category, event_id, event_data) VALUES (:origin, :event_type, :event_category, :event_id, :event_data)"), + {"origin": str(origin1.uuid), "event_type": "event_1", "event_category": "test", "event_id": str(uuid4()), "event_data": json.dumps({"from": "origin1", "seq": 1})} + ) + # Origin 2 - record 2 + await session.execute( + text("INSERT INTO events (origin, event_type, event_category, event_id, event_data) VALUES (:origin, :event_type, :event_category, :event_id, :event_data)"), + {"origin": str(origin2.uuid), "event_type": "event_2", "event_category": "test", "event_id": str(uuid4()), "event_data": json.dumps({"from": "origin2", "seq": 2})} + ) + # Origin 1 - record 3 + await session.execute( + text("INSERT INTO events (origin, event_type, event_category, event_id, event_data) VALUES (:origin, :event_type, :event_category, :event_id, :event_data)"), + {"origin": str(origin1.uuid), "event_type": "event_3", "event_category": "test", "event_id": str(uuid4()), "event_data": json.dumps({"from": "origin1", "seq": 3})} + ) + await session.commit() + + # Verify sequential rowid regardless of origin + assert storage._engine is not None + async with AsyncSession(storage._engine) as session: + result = await session.execute( + text("SELECT rowid, origin, event_data FROM events ORDER BY rowid") + ) + rows = result.fetchall() + + assert len(rows) == 3 + assert rows[0][0] == 1 # First rowid + assert rows[1][0] == 2 # Second rowid + assert rows[2][0] == 3 # Third rowid + + # Verify data integrity + raw_json1 = cast(str, rows[0][2]) + raw_json2 = cast(str, rows[1][2]) + raw_json3 = cast(str, rows[2][2]) + data1 = _load_json_data(raw_json1) + data2 = _load_json_data(raw_json2) + data3 = _load_json_data(raw_json3) + + assert data1["from"] == "origin1" and data1["seq"] == 1 + assert data2["from"] == "origin2" and data2["seq"] == 2 + assert data3["from"] == "origin1" and data3["seq"] == 3 + + await storage.close() + + @pytest.mark.asyncio + async def test_query_events_since_index(self, temp_db_path: Path, sample_node_id: NodeId) -> None: + """Test querying events after a specific rowid.""" + default_config = EventLogConfig() + storage = AsyncSQLiteEventStorage(db_path=temp_db_path, batch_size=default_config.batch_size, batch_timeout_ms=default_config.batch_timeout_ms, debounce_ms=default_config.debounce_ms, max_age_ms=default_config.max_age_ms) + await storage.start() + + # Insert 10 test records + assert storage._engine is not None + async with AsyncSession(storage._engine) as session: + for i in range(10): + await session.execute( + text("INSERT INTO events (origin, event_type, event_category, event_id, event_data) VALUES (:origin, :event_type, :event_category, :event_id, :event_data)"), + { + "origin": str(sample_node_id.uuid), + "event_type": f"event_{i}", + "event_category": "test", + "event_id": str(uuid4()), + "event_data": json.dumps({"index": i}) + } + ) + await session.commit() + + # Query events after index 5 + assert storage._engine is not None + async with AsyncSession(storage._engine) as session: + result = await session.execute( + text("SELECT rowid, event_data FROM events WHERE rowid > :last_idx ORDER BY rowid"), + {"last_idx": 5} + ) + rows = result.fetchall() + + assert len(rows) == 5 # Should get records 6-10 + for i, row in enumerate(rows): + assert row[0] == i + 6 # rowid 6, 7, 8, 9, 10 + raw_json = cast(str, row[1]) + data = _load_json_data(raw_json) + assert data["index"] == i + 5 # index 5, 6, 7, 8, 9 + + await storage.close() + + @pytest.mark.asyncio + async def test_empty_query(self, temp_db_path: Path) -> None: + """Test querying when no events exist.""" + default_config = EventLogConfig() + storage = AsyncSQLiteEventStorage(db_path=temp_db_path, batch_size=default_config.batch_size, batch_timeout_ms=default_config.batch_timeout_ms, debounce_ms=default_config.debounce_ms, max_age_ms=default_config.max_age_ms) + await storage.start() + + assert storage._engine is not None + async with AsyncSession(storage._engine) as session: + result = await session.execute( + text("SELECT rowid, origin, event_data FROM events WHERE rowid > :last_idx ORDER BY rowid"), + {"last_idx": 0} + ) + rows = result.fetchall() + + assert len(rows) == 0 + + await storage.close() + + @pytest.mark.asyncio + async def test_operations_after_close_raise_error(self, temp_db_path: Path) -> None: + """Test that operations after close work properly.""" + default_config = EventLogConfig() + storage = AsyncSQLiteEventStorage(db_path=temp_db_path, batch_size=default_config.batch_size, batch_timeout_ms=default_config.batch_timeout_ms, debounce_ms=default_config.debounce_ms, max_age_ms=default_config.max_age_ms) + await storage.start() + await storage.close() + + # These should not raise errors since we're not using the public API + assert storage._closed is True + assert storage._engine is not None # Engine should still exist but be disposed + + @pytest.mark.asyncio + async def test_multiple_close_calls_safe(self, temp_db_path: Path) -> None: + """Test that multiple close calls are safe.""" + default_config = EventLogConfig() + storage = AsyncSQLiteEventStorage(db_path=temp_db_path, batch_size=default_config.batch_size, batch_timeout_ms=default_config.batch_timeout_ms, debounce_ms=default_config.debounce_ms, max_age_ms=default_config.max_age_ms) + await storage.start() + await storage.close() + await storage.close() # Should not raise an error + + @pytest.mark.asyncio + async def test_json_data_types(self, temp_db_path: Path, sample_node_id: NodeId) -> None: + """Test that various JSON data types are handled correctly.""" + default_config = EventLogConfig() + storage = AsyncSQLiteEventStorage(db_path=temp_db_path, batch_size=default_config.batch_size, batch_timeout_ms=default_config.batch_timeout_ms, debounce_ms=default_config.debounce_ms, max_age_ms=default_config.max_age_ms) + await storage.start() + + # Test various JSON data types + test_data = { + "string": "test string", + "number": 42, + "float": 3.14, + "boolean": True, + "null": None, + "array": [1, 2, 3, "four"], + "object": {"nested": "value", "deep": {"deeper": "nested"}}, + "unicode": "测试 🚀" + } + + assert storage._engine is not None + async with AsyncSession(storage._engine) as session: + await session.execute( + text("INSERT INTO events (origin, event_type, event_category, event_id, event_data) VALUES (:origin, :event_type, :event_category, :event_id, :event_data)"), + { + "origin": str(sample_node_id.uuid), + "event_type": "complex_event", + "event_category": "test", + "event_id": str(uuid4()), + "event_data": json.dumps(test_data) + } + ) + await session.commit() + + # Query back and verify data integrity + assert storage._engine is not None + async with AsyncSession(storage._engine) as session: + result = await session.execute( + text("SELECT event_data FROM events WHERE event_type = :event_type"), + {"event_type": "complex_event"} + ) + rows = result.fetchall() + + assert len(rows) == 1 + raw_json = cast(str, rows[0][0]) + retrieved_data = _load_json_data(raw_json) + assert retrieved_data == test_data + + await storage.close() + + @pytest.mark.asyncio + async def test_concurrent_inserts(self, temp_db_path: Path) -> None: + """Test concurrent inserts maintain rowid ordering.""" + default_config = EventLogConfig() + storage = AsyncSQLiteEventStorage(db_path=temp_db_path, batch_size=default_config.batch_size, batch_timeout_ms=default_config.batch_timeout_ms, debounce_ms=default_config.debounce_ms, max_age_ms=default_config.max_age_ms) + await storage.start() + + async def insert_batch(origin_id: str, batch_id: int, count: int) -> None: + assert storage._engine is not None + async with AsyncSession(storage._engine) as session: + for i in range(count): + await session.execute( + text("INSERT INTO events (origin, event_type, event_category, event_id, event_data) VALUES (:origin, :event_type, :event_category, :event_id, :event_data)"), + { + "origin": origin_id, + "event_type": f"batch_{batch_id}_event_{i}", + "event_category": "test", + "event_id": str(uuid4()), + "event_data": json.dumps({"batch": batch_id, "item": i}) + } + ) + await session.commit() + + # Run multiple concurrent insert batches + origin1 = str(uuid4()) + origin2 = str(uuid4()) + origin3 = str(uuid4()) + + await asyncio.gather( + insert_batch(origin1, 1, 5), + insert_batch(origin2, 2, 5), + insert_batch(origin3, 3, 5) + ) + + # Verify all records were inserted and rowid is sequential + assert storage._engine is not None + async with AsyncSession(storage._engine) as session: + result = await session.execute( + text("SELECT rowid, origin, event_data FROM events ORDER BY rowid") + ) + rows = result.fetchall() + + assert len(rows) == 15 # 3 batches * 5 records each + + # Verify rowid sequence is maintained + for i, row in enumerate(rows): + assert row[0] == i + 1 # rowid should be sequential + + await storage.close() \ No newline at end of file diff --git a/shared/types/events/chunks.py b/shared/types/events/chunks.py index ed52b008..65bf4dd6 100644 --- a/shared/types/events/chunks.py +++ b/shared/types/events/chunks.py @@ -5,7 +5,7 @@ from typing import Annotated, Literal # from openai.types.chat.chat_completion_chunk import ChatCompletionChunk from pydantic import BaseModel, Field, TypeAdapter -from shared.openai import FinishReason +from shared.openai_compat import FinishReason from shared.types.models.common import ModelId from shared.types.tasks.common import TaskId diff --git a/shared/types/worker/commands_runner.py b/shared/types/worker/commands_runner.py index 83283135..ea3c0715 100644 --- a/shared/types/worker/commands_runner.py +++ b/shared/types/worker/commands_runner.py @@ -3,7 +3,7 @@ from typing import Annotated, Generic, Literal, TypeVar from pydantic import BaseModel, Field, TypeAdapter -from shared.openai import FinishReason +from shared.openai_compat import FinishReason from shared.types.tasks.common import ChatCompletionTaskData from shared.types.worker.mlx import Host from shared.types.worker.shards import ShardMetadata diff --git a/uv.lock b/uv.lock index 015412d4..c10602aa 100644 --- a/uv.lock +++ b/uv.lock @@ -20,6 +20,18 @@ members = [ "exo-worker", ] +[[package]] +name = "aiosqlite" +version = "0.21.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/13/7d/8bca2bf9a247c2c5dfeec1d7a5f40db6518f88d314b8bca9da29670d2671/aiosqlite-0.21.0.tar.gz", hash = "sha256:131bb8056daa3bc875608c631c678cda73922a2d4ba8aec373b19f18c17e7aa3", size = 13454, upload-time = "2025-02-03T07:30:16.235Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f5/10/6c25ed6de94c49f88a91fa5018cb4c0f3625f31d5be9f771ebe5cc7cd506/aiosqlite-0.21.0-py3-none-any.whl", hash = "sha256:2549cf4057f95f53dcba16f2b64e8e2791d7e1adedb13197dd8ed77bb226d7d0", size = 15792, upload-time = "2025-02-03T07:30:13.6Z" }, +] + [[package]] name = "annotated-types" version = "0.7.0" @@ -148,6 +160,7 @@ name = "exo-shared" version = "0.1.0" source = { editable = "shared" } dependencies = [ + { name = "aiosqlite", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "networkx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "openai", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "pathlib", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, @@ -155,6 +168,8 @@ dependencies = [ { name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "rich", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "rustworkx", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sqlalchemy", extra = ["asyncio"], marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sqlmodel", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, ] [package.dev-dependencies] @@ -164,6 +179,7 @@ dev = [ [package.metadata] requires-dist = [ + { name = "aiosqlite", specifier = ">=0.20.0" }, { name = "networkx", specifier = ">=3.5" }, { name = "openai", specifier = ">=1.93.0" }, { name = "pathlib", specifier = ">=1.0.1" }, @@ -171,6 +187,8 @@ requires-dist = [ { name = "pydantic", specifier = ">=2.11.7" }, { name = "rich", specifier = ">=14.0.0" }, { name = "rustworkx", specifier = ">=0.16.0" }, + { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.0" }, + { name = "sqlmodel", specifier = ">=0.0.22" }, ] [package.metadata.requires-dev] @@ -225,6 +243,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2f/e0/014d5d9d7a4564cf1c40b5039bc882db69fd881111e03ab3657ac0b218e2/fsspec-2025.7.0-py3-none-any.whl", hash = "sha256:8b012e39f63c7d5f10474de957f3ab793b47b45ae7d39f2fb735f8bbe25c0e21", size = 199597, upload-time = "2025-07-15T16:05:19.529Z" }, ] +[[package]] +name = "greenlet" +version = "3.2.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c9/92/bb85bd6e80148a4d2e0c59f7c0c2891029f8fd510183afc7d8d2feeed9b6/greenlet-3.2.3.tar.gz", hash = "sha256:8b0dd8ae4c0d6f5e54ee55ba935eeb3d735a9b58a8a1e5b5cbab64e01a39f365", size = 185752, upload-time = "2025-06-05T16:16:09.955Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b1/cf/f5c0b23309070ae93de75c90d29300751a5aacefc0a3ed1b1d8edb28f08b/greenlet-3.2.3-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:500b8689aa9dd1ab26872a34084503aeddefcb438e2e7317b89b11eaea1901ad", size = 270732, upload-time = "2025-06-05T16:10:08.26Z" }, + { url = "https://files.pythonhosted.org/packages/48/ae/91a957ba60482d3fecf9be49bc3948f341d706b52ddb9d83a70d42abd498/greenlet-3.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a07d3472c2a93117af3b0136f246b2833fdc0b542d4a9799ae5f41c28323faef", size = 639033, upload-time = "2025-06-05T16:38:53.983Z" }, + { url = "https://files.pythonhosted.org/packages/6f/df/20ffa66dd5a7a7beffa6451bdb7400d66251374ab40b99981478c69a67a8/greenlet-3.2.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:8704b3768d2f51150626962f4b9a9e4a17d2e37c8a8d9867bbd9fa4eb938d3b3", size = 652999, upload-time = "2025-06-05T16:41:37.89Z" }, + { url = "https://files.pythonhosted.org/packages/51/b4/ebb2c8cb41e521f1d72bf0465f2f9a2fd803f674a88db228887e6847077e/greenlet-3.2.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5035d77a27b7c62db6cf41cf786cfe2242644a7a337a0e155c80960598baab95", size = 647368, upload-time = "2025-06-05T16:48:21.467Z" }, + { url = "https://files.pythonhosted.org/packages/8e/6a/1e1b5aa10dced4ae876a322155705257748108b7fd2e4fae3f2a091fe81a/greenlet-3.2.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2d8aa5423cd4a396792f6d4580f88bdc6efcb9205891c9d40d20f6e670992efb", size = 650037, upload-time = "2025-06-05T16:13:06.402Z" }, + { url = "https://files.pythonhosted.org/packages/26/f2/ad51331a157c7015c675702e2d5230c243695c788f8f75feba1af32b3617/greenlet-3.2.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2c724620a101f8170065d7dded3f962a2aea7a7dae133a009cada42847e04a7b", size = 608402, upload-time = "2025-06-05T16:12:51.91Z" }, + { url = "https://files.pythonhosted.org/packages/26/bc/862bd2083e6b3aff23300900a956f4ea9a4059de337f5c8734346b9b34fc/greenlet-3.2.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:873abe55f134c48e1f2a6f53f7d1419192a3d1a4e873bace00499a4e45ea6af0", size = 1119577, upload-time = "2025-06-05T16:36:49.787Z" }, + { url = "https://files.pythonhosted.org/packages/86/94/1fc0cc068cfde885170e01de40a619b00eaa8f2916bf3541744730ffb4c3/greenlet-3.2.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:024571bbce5f2c1cfff08bf3fbaa43bbc7444f580ae13b0099e95d0e6e67ed36", size = 1147121, upload-time = "2025-06-05T16:12:42.527Z" }, + { url = "https://files.pythonhosted.org/packages/d8/ca/accd7aa5280eb92b70ed9e8f7fd79dc50a2c21d8c73b9a0856f5b564e222/greenlet-3.2.3-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:3d04332dddb10b4a211b68111dabaee2e1a073663d117dc10247b5b1642bac86", size = 271479, upload-time = "2025-06-05T16:10:47.525Z" }, + { url = "https://files.pythonhosted.org/packages/55/71/01ed9895d9eb49223280ecc98a557585edfa56b3d0e965b9fa9f7f06b6d9/greenlet-3.2.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8186162dffde068a465deab08fc72c767196895c39db26ab1c17c0b77a6d8b97", size = 683952, upload-time = "2025-06-05T16:38:55.125Z" }, + { url = "https://files.pythonhosted.org/packages/ea/61/638c4bdf460c3c678a0a1ef4c200f347dff80719597e53b5edb2fb27ab54/greenlet-3.2.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f4bfbaa6096b1b7a200024784217defedf46a07c2eee1a498e94a1b5f8ec5728", size = 696917, upload-time = "2025-06-05T16:41:38.959Z" }, + { url = "https://files.pythonhosted.org/packages/22/cc/0bd1a7eb759d1f3e3cc2d1bc0f0b487ad3cc9f34d74da4b80f226fde4ec3/greenlet-3.2.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:ed6cfa9200484d234d8394c70f5492f144b20d4533f69262d530a1a082f6ee9a", size = 692443, upload-time = "2025-06-05T16:48:23.113Z" }, + { url = "https://files.pythonhosted.org/packages/67/10/b2a4b63d3f08362662e89c103f7fe28894a51ae0bc890fabf37d1d780e52/greenlet-3.2.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:02b0df6f63cd15012bed5401b47829cfd2e97052dc89da3cfaf2c779124eb892", size = 692995, upload-time = "2025-06-05T16:13:07.972Z" }, + { url = "https://files.pythonhosted.org/packages/5a/c6/ad82f148a4e3ce9564056453a71529732baf5448ad53fc323e37efe34f66/greenlet-3.2.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:86c2d68e87107c1792e2e8d5399acec2487a4e993ab76c792408e59394d52141", size = 655320, upload-time = "2025-06-05T16:12:53.453Z" }, +] + [[package]] name = "h11" version = "0.16.0" @@ -482,7 +522,7 @@ wheels = [ [[package]] name = "openai" -version = "1.96.1" +version = "1.97.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, @@ -494,9 +534,9 @@ dependencies = [ { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/2f/b5/18fd5e1b6b6c7dca52d60307b3637f9e9e3206a8041a9c8028985dbc6260/openai-1.96.1.tar.gz", hash = "sha256:6d505b5cc550e036bfa3fe99d6cff565b11491d12378d4c353f92ef72b0a408a", size = 489065, upload-time = "2025-07-15T21:39:37.215Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/c6/b8d66e4f3b95493a8957065b24533333c927dc23817abe397f13fe589c6e/openai-1.97.0.tar.gz", hash = "sha256:0be349569ccaa4fb54f97bb808423fd29ccaeb1246ee1be762e0c81a47bae0aa", size = 493850, upload-time = "2025-07-16T16:37:35.196Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4f/57/325bbdbdc27b47309be35cb4e0eb8980b0c1bc997194c797c3691d88ae41/openai-1.96.1-py3-none-any.whl", hash = "sha256:0afaab2019bae8e145e7a1baf6953167084f019dd15042c65edd117398c1eb1c", size = 757454, upload-time = "2025-07-15T21:39:34.517Z" }, + { url = "https://files.pythonhosted.org/packages/8a/91/1f1cf577f745e956b276a8b1d3d76fa7a6ee0c2b05db3b001b900f2c71db/openai-1.97.0-py3-none-any.whl", hash = "sha256:a1c24d96f4609f3f7f51c9e1c2606d97cc6e334833438659cfd687e9c972c610", size = 764953, upload-time = "2025-07-16T16:37:33.135Z" }, ] [[package]] @@ -747,6 +787,43 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, ] +[[package]] +name = "sqlalchemy" +version = "2.0.41" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "greenlet", marker = "(python_full_version < '3.14' and platform_machine == 'AMD64' and sys_platform == 'darwin') or (python_full_version < '3.14' and platform_machine == 'WIN32' and sys_platform == 'darwin') or (python_full_version < '3.14' and platform_machine == 'aarch64' and sys_platform == 'darwin') or (python_full_version < '3.14' and platform_machine == 'amd64' and sys_platform == 'darwin') or (python_full_version < '3.14' and platform_machine == 'ppc64le' and sys_platform == 'darwin') or (python_full_version < '3.14' and platform_machine == 'win32' and sys_platform == 'darwin') or (python_full_version < '3.14' and platform_machine == 'x86_64' and sys_platform == 'darwin') or (python_full_version < '3.14' and platform_machine == 'AMD64' and sys_platform == 'linux') or (python_full_version < '3.14' and platform_machine == 'WIN32' and sys_platform == 'linux') or (python_full_version < '3.14' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.14' and platform_machine == 'amd64' and sys_platform == 'linux') or (python_full_version < '3.14' and platform_machine == 'ppc64le' and sys_platform == 'linux') or (python_full_version < '3.14' and platform_machine == 'win32' and sys_platform == 'linux') or (python_full_version < '3.14' and platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/63/66/45b165c595ec89aa7dcc2c1cd222ab269bc753f1fc7a1e68f8481bd957bf/sqlalchemy-2.0.41.tar.gz", hash = "sha256:edba70118c4be3c2b1f90754d308d0b79c6fe2c0fdc52d8ddf603916f83f4db9", size = 9689424, upload-time = "2025-05-14T17:10:32.339Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d3/ad/2e1c6d4f235a97eeef52d0200d8ddda16f6c4dd70ae5ad88c46963440480/sqlalchemy-2.0.41-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4eeb195cdedaf17aab6b247894ff2734dcead6c08f748e617bfe05bd5a218443", size = 2115491, upload-time = "2025-05-14T17:55:31.177Z" }, + { url = "https://files.pythonhosted.org/packages/cf/8d/be490e5db8400dacc89056f78a52d44b04fbf75e8439569d5b879623a53b/sqlalchemy-2.0.41-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d4ae769b9c1c7757e4ccce94b0641bc203bbdf43ba7a2413ab2523d8d047d8dc", size = 2102827, upload-time = "2025-05-14T17:55:34.921Z" }, + { url = "https://files.pythonhosted.org/packages/a0/72/c97ad430f0b0e78efaf2791342e13ffeafcbb3c06242f01a3bb8fe44f65d/sqlalchemy-2.0.41-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a62448526dd9ed3e3beedc93df9bb6b55a436ed1474db31a2af13b313a70a7e1", size = 3225224, upload-time = "2025-05-14T17:50:41.418Z" }, + { url = "https://files.pythonhosted.org/packages/5e/51/5ba9ea3246ea068630acf35a6ba0d181e99f1af1afd17e159eac7e8bc2b8/sqlalchemy-2.0.41-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc56c9788617b8964ad02e8fcfeed4001c1f8ba91a9e1f31483c0dffb207002a", size = 3230045, upload-time = "2025-05-14T17:51:54.722Z" }, + { url = "https://files.pythonhosted.org/packages/78/2f/8c14443b2acea700c62f9b4a8bad9e49fc1b65cfb260edead71fd38e9f19/sqlalchemy-2.0.41-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c153265408d18de4cc5ded1941dcd8315894572cddd3c58df5d5b5705b3fa28d", size = 3159357, upload-time = "2025-05-14T17:50:43.483Z" }, + { url = "https://files.pythonhosted.org/packages/fc/b2/43eacbf6ccc5276d76cea18cb7c3d73e294d6fb21f9ff8b4eef9b42bbfd5/sqlalchemy-2.0.41-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f67766965996e63bb46cfbf2ce5355fc32d9dd3b8ad7e536a920ff9ee422e23", size = 3197511, upload-time = "2025-05-14T17:51:57.308Z" }, + { url = "https://files.pythonhosted.org/packages/1c/fc/9ba22f01b5cdacc8f5ed0d22304718d2c758fce3fd49a5372b886a86f37c/sqlalchemy-2.0.41-py3-none-any.whl", hash = "sha256:57df5dc6fdb5ed1a88a1ed2195fd31927e705cad62dedd86b46972752a80f576", size = 1911224, upload-time = "2025-05-14T17:39:42.154Z" }, +] + +[package.optional-dependencies] +asyncio = [ + { name = "greenlet", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] + +[[package]] +name = "sqlmodel" +version = "0.0.24" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sqlalchemy", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/86/4b/c2ad0496f5bdc6073d9b4cef52be9c04f2b37a5773441cc6600b1857648b/sqlmodel-0.0.24.tar.gz", hash = "sha256:cc5c7613c1a5533c9c7867e1aab2fd489a76c9e8a061984da11b4e613c182423", size = 116780, upload-time = "2025-03-07T05:43:32.887Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/16/91/484cd2d05569892b7fef7f5ceab3bc89fb0f8a8c0cde1030d383dbc5449c/sqlmodel-0.0.24-py3-none-any.whl", hash = "sha256:6778852f09370908985b667d6a3ab92910d0d5ec88adcaf23dbc242715ff7193", size = 28622, upload-time = "2025-03-07T05:43:30.37Z" }, +] + [[package]] name = "starlette" version = "0.47.1" diff --git a/worker/runner/runner.py b/worker/runner/runner.py index 7b5b2e6d..583d6740 100644 --- a/worker/runner/runner.py +++ b/worker/runner/runner.py @@ -10,7 +10,7 @@ from mlx_lm.generate import stream_generate # type: ignore from mlx_lm.tokenizer_utils import TokenizerWrapper from engines.mlx.utils_mlx import apply_chat_template, initialize_mlx -from shared.openai import FinishReason +from shared.openai_compat import FinishReason from shared.types.tasks.common import ChatCompletionTaskData, CompletionCreateParams from shared.types.worker.commands_runner import ( ChatTaskMessage, diff --git a/worker/tests/test_supervisor.py b/worker/tests/test_supervisor.py index b63233be..028b5d74 100644 --- a/worker/tests/test_supervisor.py +++ b/worker/tests/test_supervisor.py @@ -3,7 +3,7 @@ from typing import Callable, Literal import pytest -from shared.openai import FinishReason +from shared.openai_compat import FinishReason from shared.types.events.chunks import TokenChunk from shared.types.tasks.common import ( ChatCompletionTaskData,