Compare commits

...

13 Commits

Author SHA1 Message Date
Nicolas Mowen
14fcaa9911 Don't require download check 2026-02-19 08:34:47 -07:00
Nicolas Mowen
72c73b153c Fix sending images 2026-02-19 08:24:19 -07:00
Nicolas Mowen
4cd581fc43 undo 2026-02-19 08:17:06 -07:00
Nicolas Mowen
f67f569104 Basic docs 2026-02-19 08:14:13 -07:00
Nicolas Mowen
54a8678058 Add support for embedding via genai 2026-02-19 08:14:08 -07:00
Nicolas Mowen
e013a0206a Add embed API support 2026-02-19 08:13:50 -07:00
Nicolas Mowen
266e243425 Support GenAI for embeddings 2026-02-19 08:13:39 -07:00
Nicolas Mowen
b6e17b032a Fix import issues 2026-02-18 10:52:32 -07:00
Nicolas Mowen
d150b44d36 Cleanup 2026-02-18 10:52:32 -07:00
Nicolas Mowen
1e4596eb99 Support getting client via manager 2026-02-18 10:52:32 -07:00
Nicolas Mowen
7c066f661a Convert to roles list 2026-02-18 10:52:32 -07:00
Nicolas Mowen
feba94b202 Add config migration 2026-02-18 10:52:32 -07:00
Nicolas Mowen
3093a7a594 GenAI client manager 2026-02-18 10:52:32 -07:00
19 changed files with 532 additions and 50 deletions

View File

@@ -76,6 +76,40 @@ Switching between V1 and V2 requires reindexing your embeddings. The embeddings
:::
### GenAI Provider (llama.cpp)
Frigate can use a GenAI provider for semantic search embeddings when that provider has the `embeddings` role. Currently, only **llama.cpp** supports multimodal embeddings (both text and images).
To use llama.cpp for semantic search:
1. Configure a GenAI provider in your config with `embeddings` in its `roles`.
2. Set `semantic_search.model` to the GenAI config key (e.g. `default`).
3. Start the llama.cpp server with `--embeddings` and `--mmproj` for image support:
```yaml
genai:
default:
provider: llamacpp
base_url: http://localhost:8080
model: your-model-name
roles:
- embeddings
- vision
- tools
semantic_search:
enabled: True
model: default
```
The llama.cpp server must be started with `--embeddings` for the embeddings API, and `--mmproj <mmproj.gguf>` when using image embeddings. See the [llama.cpp server documentation](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) for details.
:::note
Switching between Jina models and a GenAI provider requires reindexing. Embeddings from different backends are incompatible.
:::
### GPU Acceleration
The CLIP models are downloaded in ONNX format, and the `large` model can be accelerated using GPU hardware, when available. This depends on the Docker build that is used. You can also target a specific device in a multi-GPU installation.

View File

@@ -38,6 +38,7 @@ from frigate.config.camera.updater import (
CameraConfigUpdateTopic,
)
from frigate.ffmpeg_presets import FFMPEG_HWACCEL_VAAPI, _gpu_selector
from frigate.genai import GenAIClientManager
from frigate.jobs.media_sync import (
get_current_media_sync_job,
get_media_sync_job_by_id,
@@ -432,6 +433,7 @@ def config_set(request: Request, body: AppConfigSetBody):
if body.requires_restart == 0 or body.update_topic:
old_config: FrigateConfig = request.app.frigate_config
request.app.frigate_config = config
request.app.genai_manager = GenAIClientManager(config)
if body.update_topic:
if body.update_topic.startswith("config/cameras/"):

View File

@@ -1037,4 +1037,4 @@ async def get_allowed_cameras_for_filter(request: Request):
role = current_user["role"]
all_camera_names = set(request.app.frigate_config.cameras.keys())
roles_dict = request.app.frigate_config.auth.roles
return User.get_allowed_cameras(role, roles_dict, all_camera_names)
return User.get_allowed_cameras(role, roles_dict, all_camera_names)

View File

@@ -23,7 +23,6 @@ from frigate.api.defs.response.chat_response import (
)
from frigate.api.defs.tags import Tags
from frigate.api.event import events
from frigate.genai import get_genai_client
logger = logging.getLogger(__name__)
@@ -383,7 +382,7 @@ async def chat_completion(
6. Repeats until final answer
7. Returns response to user
"""
genai_client = get_genai_client(request.app.frigate_config)
genai_client = request.app.genai_manager.tool_client
if not genai_client:
return JSONResponse(
content={

View File

@@ -33,6 +33,7 @@ from frigate.comms.event_metadata_updater import (
from frigate.config import FrigateConfig
from frigate.config.camera.updater import CameraConfigUpdatePublisher
from frigate.embeddings import EmbeddingsContext
from frigate.genai import GenAIClientManager
from frigate.ptz.onvif import OnvifController
from frigate.stats.emitter import StatsEmitter
from frigate.storage import StorageMaintainer
@@ -134,6 +135,7 @@ def create_fastapi_app(
app.include_router(record.router)
# App Properties
app.frigate_config = frigate_config
app.genai_manager = GenAIClientManager(frigate_config)
app.embeddings = embeddings
app.detected_frames_processor = detected_frames_processor
app.storage_maintainer = storage_maintainer

View File

@@ -33,7 +33,6 @@ from frigate.api.defs.response.review_response import (
ReviewSummaryResponse,
)
from frigate.api.defs.tags import Tags
from frigate.config import FrigateConfig
from frigate.embeddings import EmbeddingsContext
from frigate.models import Recordings, ReviewSegment, UserReviewStatus
from frigate.review.types import SeverityEnum
@@ -747,9 +746,7 @@ async def set_not_reviewed(
description="Use GenAI to summarize review items over a period of time.",
)
def generate_review_summary(request: Request, start_ts: float, end_ts: float):
config: FrigateConfig = request.app.frigate_config
if not config.genai.provider:
if not request.app.genai_manager.vision_client:
return JSONResponse(
content=(
{

View File

@@ -6,7 +6,7 @@ from pydantic import Field
from ..base import FrigateBaseModel
from ..env import EnvString
__all__ = ["GenAIConfig", "GenAIProviderEnum"]
__all__ = ["GenAIConfig", "GenAIProviderEnum", "GenAIRoleEnum"]
class GenAIProviderEnum(str, Enum):
@@ -17,6 +17,12 @@ class GenAIProviderEnum(str, Enum):
llamacpp = "llamacpp"
class GenAIRoleEnum(str, Enum):
tools = "tools"
vision = "vision"
embeddings = "embeddings"
class GenAIConfig(FrigateBaseModel):
"""Primary GenAI Config to define GenAI Provider."""
@@ -24,6 +30,14 @@ class GenAIConfig(FrigateBaseModel):
base_url: Optional[str] = Field(default=None, title="Provider base url.")
model: str = Field(default="gpt-4o", title="GenAI model.")
provider: GenAIProviderEnum | None = Field(default=None, title="GenAI provider.")
roles: list[GenAIRoleEnum] = Field(
default_factory=lambda: [
GenAIRoleEnum.embeddings,
GenAIRoleEnum.vision,
GenAIRoleEnum.tools,
],
title="GenAI roles (tools, vision, embeddings); one provider per role.",
)
provider_options: dict[str, Any] = Field(
default={}, title="GenAI Provider extra options."
)

View File

@@ -1,5 +1,5 @@
from enum import Enum
from typing import Dict, List, Optional
from typing import Dict, List, Optional, Union
from pydantic import ConfigDict, Field
@@ -128,9 +128,10 @@ class SemanticSearchConfig(FrigateBaseModel):
reindex: Optional[bool] = Field(
default=False, title="Reindex all tracked objects on startup."
)
model: Optional[SemanticSearchModelEnum] = Field(
model: Optional[Union[SemanticSearchModelEnum, str]] = Field(
default=SemanticSearchModelEnum.jinav1,
title="The CLIP model to use for semantic search.",
title="The CLIP model or GenAI provider name for semantic search.",
description="Use 'jinav1', 'jinav2' for ONNX models, or a GenAI config key (e.g. 'default') when that provider has the embeddings role.",
)
model_size: str = Field(
default="small", title="The size of the embeddings model used."

View File

@@ -45,7 +45,7 @@ from .camera.audio import AudioConfig
from .camera.birdseye import BirdseyeConfig
from .camera.detect import DetectConfig
from .camera.ffmpeg import FfmpegConfig
from .camera.genai import GenAIConfig
from .camera.genai import GenAIConfig, GenAIRoleEnum
from .camera.motion import MotionConfig
from .camera.notification import NotificationConfig
from .camera.objects import FilterConfig, ObjectConfig
@@ -347,9 +347,9 @@ class FrigateConfig(FrigateBaseModel):
default_factory=ModelConfig, title="Detection model configuration."
)
# GenAI config
genai: GenAIConfig = Field(
default_factory=GenAIConfig, title="Generative AI configuration."
# GenAI config (named provider configs: name -> GenAIConfig)
genai: Dict[str, GenAIConfig] = Field(
default_factory=dict, title="Generative AI configuration (named providers)."
)
# Camera config
@@ -431,6 +431,34 @@ class FrigateConfig(FrigateBaseModel):
# set notifications state
self.notifications.enabled_in_config = self.notifications.enabled
# validate genai: each role (tools, vision, embeddings) at most once
role_to_name: dict[GenAIRoleEnum, str] = {}
for name, genai_cfg in self.genai.items():
for role in genai_cfg.roles:
if role in role_to_name:
raise ValueError(
f"GenAI role '{role.value}' is assigned to both "
f"'{role_to_name[role]}' and '{name}'; each role must have "
"exactly one provider."
)
role_to_name[role] = name
# validate semantic_search.model when it is a GenAI provider name
if self.semantic_search.enabled and isinstance(
self.semantic_search.model, str
):
if self.semantic_search.model not in self.genai:
raise ValueError(
f"semantic_search.model '{self.semantic_search.model}' is not a "
"valid GenAI config key. Must match a key in genai config."
)
genai_cfg = self.genai[self.semantic_search.model]
if GenAIRoleEnum.embeddings not in genai_cfg.roles:
raise ValueError(
f"GenAI provider '{self.semantic_search.model}' must have "
"'embeddings' in its roles for semantic search."
)
# set default min_score for object attributes
for attribute in self.model.all_attributes:
if not self.objects.filters.get(attribute):

View File

@@ -603,4 +603,4 @@ def get_optimized_runner(
provider_options=options,
),
model_type=model_type,
)
)

View File

@@ -28,6 +28,7 @@ from frigate.types import ModelStatusTypesEnum
from frigate.util.builtin import EventsPerSecond, InferenceSpeed, serialize
from frigate.util.file import get_event_thumbnail_bytes
from .genai_embedding import GenAIEmbedding
from .onnx.jina_v1_embedding import JinaV1ImageEmbedding, JinaV1TextEmbedding
from .onnx.jina_v2_embedding import JinaV2Embedding
@@ -73,11 +74,13 @@ class Embeddings:
config: FrigateConfig,
db: SqliteVecQueueDatabase,
metrics: DataProcessorMetrics,
genai_manager=None,
) -> None:
self.config = config
self.db = db
self.metrics = metrics
self.requestor = InterProcessRequestor()
self.genai_manager = genai_manager
self.image_inference_speed = InferenceSpeed(self.metrics.image_embeddings_speed)
self.image_eps = EventsPerSecond()
@@ -104,7 +107,27 @@ class Embeddings:
},
)
if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2:
model_cfg = self.config.semantic_search.model
is_genai_model = isinstance(model_cfg, str)
if is_genai_model:
embeddings_client = (
genai_manager.embeddings_client if genai_manager else None
)
if not embeddings_client:
raise ValueError(
f"semantic_search.model is '{model_cfg}' (GenAI provider) but "
"no embeddings client is configured. Ensure the GenAI provider "
"has 'embeddings' in its roles."
)
self.embedding = GenAIEmbedding(embeddings_client)
self.text_embedding = lambda input_data: self.embedding(
input_data, embedding_type="text"
)
self.vision_embedding = lambda input_data: self.embedding(
input_data, embedding_type="vision"
)
elif model_cfg == SemanticSearchModelEnum.jinav2:
# Single JinaV2Embedding instance for both text and vision
self.embedding = JinaV2Embedding(
model_size=self.config.semantic_search.model_size,
@@ -118,7 +141,8 @@ class Embeddings:
self.vision_embedding = lambda input_data: self.embedding(
input_data, embedding_type="vision"
)
else: # Default to jinav1
else:
# Default to jinav1
self.text_embedding = JinaV1TextEmbedding(
model_size=config.semantic_search.model_size,
requestor=self.requestor,
@@ -136,8 +160,11 @@ class Embeddings:
self.metrics.text_embeddings_eps.value = self.text_eps.eps()
def get_model_definitions(self):
# Version-specific models
if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2:
model_cfg = self.config.semantic_search.model
if isinstance(model_cfg, str):
# GenAI provider: no ONNX models to download
models = []
elif model_cfg == SemanticSearchModelEnum.jinav2:
models = [
"jinaai/jina-clip-v2-tokenizer",
"jinaai/jina-clip-v2-model_fp16.onnx"
@@ -224,6 +251,14 @@ class Embeddings:
embeddings = self.vision_embedding(valid_thumbs)
if len(embeddings) != len(valid_ids):
logger.warning(
"Batch embed returned %d embeddings for %d thumbnails; skipping batch",
len(embeddings),
len(valid_ids),
)
return []
if upsert:
items = []
for i in range(len(valid_ids)):
@@ -246,9 +281,15 @@ class Embeddings:
def embed_description(
self, event_id: str, description: str, upsert: bool = True
) -> np.ndarray:
) -> np.ndarray | None:
start = datetime.datetime.now().timestamp()
embedding = self.text_embedding([description])[0]
embeddings = self.text_embedding([description])
if not embeddings:
logger.warning(
"Failed to generate description embedding for event %s", event_id
)
return None
embedding = embeddings[0]
if upsert:
self.db.execute_sql(
@@ -271,8 +312,32 @@ class Embeddings:
# upsert embeddings one by one to avoid token limit
embeddings = []
for desc in event_descriptions.values():
embeddings.append(self.text_embedding([desc])[0])
for eid, desc in event_descriptions.items():
result = self.text_embedding([desc])
if not result:
logger.warning(
"Failed to generate description embedding for event %s", eid
)
continue
embeddings.append(result[0])
if not embeddings:
logger.warning("No description embeddings generated in batch")
return np.array([])
# Build ids list for only successful embeddings - we need to track which succeeded
ids = list(event_descriptions.keys())
if len(embeddings) != len(ids):
# Rebuild ids/embeddings for only successful ones (match by order)
ids = []
embeddings_filtered = []
for eid, desc in event_descriptions.items():
result = self.text_embedding([desc])
if result:
ids.append(eid)
embeddings_filtered.append(result[0])
ids = ids
embeddings = embeddings_filtered
if upsert:
ids = list(event_descriptions.keys())
@@ -314,7 +379,10 @@ class Embeddings:
batch_size = (
4
if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2
if (
isinstance(self.config.semantic_search.model, str)
or self.config.semantic_search.model == SemanticSearchModelEnum.jinav2
)
else 32
)
current_page = 1
@@ -601,6 +669,8 @@ class Embeddings:
if trigger.type == "description":
logger.debug(f"Generating embedding for trigger description {trigger_name}")
embedding = self.embed_description(None, trigger.data, upsert=False)
if embedding is None:
return b""
return embedding.astype(np.float32).tobytes()
elif trigger.type == "thumbnail":
@@ -636,6 +706,8 @@ class Embeddings:
embedding = self.embed_thumbnail(
str(trigger.data), thumbnail, upsert=False
)
if embedding is None:
return b""
return embedding.astype(np.float32).tobytes()
else:

View File

@@ -0,0 +1,85 @@
"""GenAI-backed embeddings for semantic search."""
import io
import logging
from typing import TYPE_CHECKING
import numpy as np
from PIL import Image
if TYPE_CHECKING:
from frigate.genai import GenAIClient
logger = logging.getLogger(__name__)
EMBEDDING_DIM = 768
class GenAIEmbedding:
"""Embedding adapter that delegates to a GenAI provider's embed API.
Provides the same interface as JinaV2Embedding for semantic search:
__call__(inputs, embedding_type) -> list[np.ndarray]. Output embeddings are
normalized to 768 dimensions for Frigate's sqlite-vec schema.
"""
def __init__(self, client: "GenAIClient") -> None:
self.client = client
def __call__(
self,
inputs: list[str] | list[bytes] | list[Image.Image],
embedding_type: str = "text",
) -> list[np.ndarray]:
"""Generate embeddings for text or images.
Args:
inputs: List of strings (text) or bytes/PIL images (vision).
embedding_type: "text" or "vision".
Returns:
List of 768-dim numpy float32 arrays.
"""
if not inputs:
return []
if embedding_type == "text":
texts = [str(x) for x in inputs]
embeddings = self.client.embed(texts=texts)
elif embedding_type == "vision":
images: list[bytes] = []
for inp in inputs:
if isinstance(inp, bytes):
images.append(inp)
elif isinstance(inp, Image.Image):
buf = io.BytesIO()
inp.convert("RGB").save(buf, format="JPEG")
images.append(buf.getvalue())
else:
logger.warning(
"GenAIEmbedding: skipping unsupported vision input type %s",
type(inp).__name__,
)
if not images:
return []
embeddings = self.client.embed(images=images)
else:
raise ValueError(
f"Invalid embedding_type '{embedding_type}'. Must be 'text' or 'vision'."
)
result = []
for emb in embeddings:
arr = np.asarray(emb, dtype=np.float32).flatten()
if arr.size != EMBEDDING_DIM:
if arr.size > EMBEDDING_DIM:
arr = arr[:EMBEDDING_DIM]
else:
arr = np.pad(
arr,
(0, EMBEDDING_DIM - arr.size),
mode="constant",
constant_values=0,
)
result.append(arr)
return result

View File

@@ -59,7 +59,7 @@ from frigate.data_processing.real_time.license_plate import (
from frigate.data_processing.types import DataProcessorMetrics, PostProcessDataEnum
from frigate.db.sqlitevecq import SqliteVecQueueDatabase
from frigate.events.types import EventTypeEnum, RegenerateDescriptionEnum
from frigate.genai import get_genai_client
from frigate.genai import GenAIClientManager
from frigate.models import Event, Recordings, ReviewSegment, Trigger
from frigate.util.builtin import serialize
from frigate.util.file import get_event_thumbnail_bytes
@@ -116,8 +116,10 @@ class EmbeddingMaintainer(threading.Thread):
models = [Event, Recordings, ReviewSegment, Trigger]
db.bind(models)
self.genai_manager = GenAIClientManager(config)
if config.semantic_search.enabled:
self.embeddings = Embeddings(config, db, metrics)
self.embeddings = Embeddings(config, db, metrics, self.genai_manager)
# Check if we need to re-index events
if config.semantic_search.reindex:
@@ -144,7 +146,6 @@ class EmbeddingMaintainer(threading.Thread):
self.frame_manager = SharedMemoryFrameManager()
self.detected_license_plates: dict[str, dict[str, Any]] = {}
self.genai_client = get_genai_client(config)
# model runners to share between realtime and post processors
if self.config.lpr.enabled:
@@ -203,12 +204,15 @@ class EmbeddingMaintainer(threading.Thread):
# post processors
self.post_processors: list[PostProcessorApi] = []
if self.genai_client is not None and any(
if self.genai_manager.vision_client is not None and any(
c.review.genai.enabled_in_config for c in self.config.cameras.values()
):
self.post_processors.append(
ReviewDescriptionProcessor(
self.config, self.requestor, self.metrics, self.genai_client
self.config,
self.requestor,
self.metrics,
self.genai_manager.vision_client,
)
)
@@ -246,7 +250,7 @@ class EmbeddingMaintainer(threading.Thread):
)
self.post_processors.append(semantic_trigger_processor)
if self.genai_client is not None and any(
if self.genai_manager.vision_client is not None and any(
c.objects.genai.enabled_in_config for c in self.config.cameras.values()
):
self.post_processors.append(
@@ -255,7 +259,7 @@ class EmbeddingMaintainer(threading.Thread):
self.embeddings,
self.requestor,
self.metrics,
self.genai_client,
self.genai_manager.vision_client,
semantic_trigger_processor,
)
)

View File

@@ -7,15 +7,27 @@ import os
import re
from typing import Any, Optional
import numpy as np
from playhouse.shortcuts import model_to_dict
from frigate.config import CameraConfig, FrigateConfig, GenAIConfig, GenAIProviderEnum
from frigate.const import CLIPS_DIR
from frigate.data_processing.post.types import ReviewMetadata
from frigate.genai.manager import GenAIClientManager
from frigate.models import Event
logger = logging.getLogger(__name__)
__all__ = [
"GenAIClient",
"GenAIClientManager",
"GenAIConfig",
"GenAIProviderEnum",
"PROVIDERS",
"load_providers",
"register_genai_provider",
]
PROVIDERS = {}
@@ -293,6 +305,25 @@ Guidelines:
"""Get the context window size for this provider in tokens."""
return 4096
def embed(
self,
texts: list[str] | None = None,
images: list[bytes] | None = None,
) -> list[np.ndarray]:
"""Generate embeddings for text and/or images.
Returns list of numpy arrays (one per input). Expected dimension is 768
for Frigate semantic search compatibility.
Providers that support embeddings should override this method.
"""
logger.warning(
"%s does not support embeddings. "
"This method should be overridden by the provider implementation.",
self.__class__.__name__,
)
return []
def chat_with_tools(
self,
messages: list[dict[str, Any]],
@@ -352,19 +383,6 @@ Guidelines:
}
def get_genai_client(config: FrigateConfig) -> Optional[GenAIClient]:
"""Get the GenAI client."""
if not config.genai.provider:
return None
load_providers()
provider = PROVIDERS.get(config.genai.provider)
if provider:
return provider(config.genai)
return None
def load_providers():
package_dir = os.path.dirname(__file__)
for filename in os.listdir(package_dir):

View File

@@ -1,11 +1,14 @@
"""llama.cpp Provider for Frigate AI."""
import base64
import io
import json
import logging
from typing import Any, Optional
import numpy as np
import requests
from PIL import Image
from frigate.config import GenAIProviderEnum
from frigate.genai import GenAIClient, register_genai_provider
@@ -13,6 +16,20 @@ from frigate.genai import GenAIClient, register_genai_provider
logger = logging.getLogger(__name__)
def _to_jpeg(img_bytes: bytes) -> bytes | None:
"""Convert image bytes to JPEG. llama.cpp/STB does not support WebP."""
try:
img = Image.open(io.BytesIO(img_bytes))
if img.mode != "RGB":
img = img.convert("RGB")
buf = io.BytesIO()
img.save(buf, format="JPEG", quality=85)
return buf.getvalue()
except Exception as e:
logger.warning("Failed to convert image to JPEG: %s", e)
return None
@register_genai_provider(GenAIProviderEnum.llamacpp)
class LlamaCppClient(GenAIClient):
"""Generative AI client for Frigate using llama.cpp server."""
@@ -101,6 +118,104 @@ class LlamaCppClient(GenAIClient):
"""Get the context window size for llama.cpp."""
return self.genai_config.provider_options.get("context_size", 4096)
def embed(
self,
texts: list[str] | None = None,
images: list[bytes] | None = None,
) -> list[np.ndarray]:
"""Generate embeddings via llama.cpp /embeddings endpoint.
Supports batch requests. Uses content format with prompt_string and
multimodal_data for images (PR #15108). Server must be started with
--embeddings and --mmproj for multimodal support.
"""
if self.provider is None:
logger.warning(
"llama.cpp provider has not been initialized. Check your llama.cpp configuration."
)
return []
texts = texts or []
images = images or []
if not texts and not images:
return []
EMBEDDING_DIM = 768
content = []
for text in texts:
content.append({"prompt_string": text})
for img in images:
# llama.cpp uses STB which does not support WebP; convert to JPEG
jpeg_bytes = _to_jpeg(img)
to_encode = jpeg_bytes if jpeg_bytes is not None else img
encoded = base64.b64encode(to_encode).decode("utf-8")
# prompt_string must contain <__media__> placeholder for image tokenization
content.append({
"prompt_string": "<__media__>\n",
"multimodal_data": [encoded],
})
try:
response = requests.post(
f"{self.provider}/embeddings",
json={"content": content},
timeout=self.timeout,
)
response.raise_for_status()
result = response.json()
items = result.get("data", result) if isinstance(result, dict) else result
if not isinstance(items, list):
logger.warning("llama.cpp embeddings returned unexpected format")
return []
embeddings = []
for item in items:
emb = item.get("embedding") if isinstance(item, dict) else None
if emb is None:
logger.warning("llama.cpp embeddings item missing embedding field")
continue
arr = np.array(emb, dtype=np.float32)
orig_dim = arr.size
if orig_dim != EMBEDDING_DIM:
if orig_dim > EMBEDDING_DIM:
arr = arr[:EMBEDDING_DIM]
logger.debug(
"Truncated llama.cpp embedding from %d to %d dimensions",
orig_dim,
EMBEDDING_DIM,
)
else:
arr = np.pad(
arr,
(0, EMBEDDING_DIM - orig_dim),
mode="constant",
constant_values=0,
)
logger.debug(
"Padded llama.cpp embedding from %d to %d dimensions",
orig_dim,
EMBEDDING_DIM,
)
embeddings.append(arr)
return embeddings
except requests.exceptions.Timeout:
logger.warning("llama.cpp embeddings request timed out")
return []
except requests.exceptions.RequestException as e:
error_detail = str(e)
if hasattr(e, "response") and e.response is not None:
try:
error_detail = f"{str(e)} - Response: {e.response.text[:500]}"
except Exception:
pass
logger.warning("llama.cpp embeddings error: %s", error_detail)
return []
except Exception as e:
logger.warning("Unexpected error in llama.cpp embeddings: %s", str(e))
return []
def chat_with_tools(
self,
messages: list[dict[str, Any]],

89
frigate/genai/manager.py Normal file
View File

@@ -0,0 +1,89 @@
"""GenAI client manager for Frigate.
Manages GenAI provider clients from Frigate config. Configuration is read only
in _update_config(); no other code should read config.genai. Exposes clients
by role: tool_client, vision_client, embeddings_client.
"""
import logging
from typing import TYPE_CHECKING, Optional
from frigate.config import FrigateConfig
from frigate.config.camera.genai import GenAIRoleEnum
if TYPE_CHECKING:
from frigate.genai import GenAIClient
logger = logging.getLogger(__name__)
class GenAIClientManager:
"""Manages GenAI provider clients from Frigate config."""
def __init__(self, config: FrigateConfig) -> None:
self._config = config
self._tool_client: Optional[GenAIClient] = None
self._vision_client: Optional[GenAIClient] = None
self._embeddings_client: Optional[GenAIClient] = None
self._update_config()
def _update_config(self) -> None:
"""Build role clients from current Frigate config.genai.
Called from __init__ and can be called again when config is reloaded.
Each role (tools, vision, embeddings) gets the client for the provider
that has that role in its roles list.
"""
from frigate.genai import PROVIDERS, load_providers
self._tool_client = None
self._vision_client = None
self._embeddings_client = None
if not self._config.genai:
return
load_providers()
for _name, genai_cfg in self._config.genai.items():
if not genai_cfg.provider:
continue
provider_cls = PROVIDERS.get(genai_cfg.provider)
if not provider_cls:
logger.warning(
"Unknown GenAI provider %s in config, skipping.",
genai_cfg.provider,
)
continue
try:
client = provider_cls(genai_cfg)
except Exception as e:
logger.exception(
"Failed to create GenAI client for provider %s: %s",
genai_cfg.provider,
e,
)
continue
for role in genai_cfg.roles:
if role == GenAIRoleEnum.tools:
self._tool_client = client
elif role == GenAIRoleEnum.vision:
self._vision_client = client
elif role == GenAIRoleEnum.embeddings:
self._embeddings_client = client
@property
def tool_client(self) -> "Optional[GenAIClient]":
"""Client configured for the tools role (e.g. chat with function calling)."""
return self._tool_client
@property
def vision_client(self) -> "Optional[GenAIClient]":
"""Client configured for the vision role (e.g. review descriptions, object descriptions)."""
return self._vision_client
@property
def embeddings_client(self) -> "Optional[GenAIClient]":
"""Client configured for the embeddings role."""
return self._embeddings_client

View File

@@ -438,6 +438,13 @@ def migrate_018_0(config: dict[str, dict[str, Any]]) -> dict[str, dict[str, Any]
"""Handle migrating frigate config to 0.18-0"""
new_config = config.copy()
# Migrate GenAI to new format
genai = new_config.get("genai")
if genai and genai.get("provider"):
genai["roles"] = ["embeddings", "vision", "tools"]
new_config["genai"] = {"default": genai}
# Remove deprecated sync_recordings from global record config
if new_config.get("record", {}).get("sync_recordings") is not None:
del new_config["record"]["sync_recordings"]

View File

@@ -1,3 +1,6 @@
/** ONNX embedding models that require local model downloads. GenAI providers are not in this list. */
export const JINA_EMBEDDING_MODELS = ["jinav1", "jinav2"] as const;
export const supportedLanguageKeys = [
"en",
"es",

View File

@@ -23,6 +23,7 @@ import { toast } from "sonner";
import useSWR from "swr";
import useSWRInfinite from "swr/infinite";
import { useDocDomain } from "@/hooks/use-doc-domain";
import { JINA_EMBEDDING_MODELS } from "@/lib/const";
const API_LIMIT = 25;
@@ -293,7 +294,12 @@ export default function Explore() {
const modelVersion = config?.semantic_search.model || "jinav1";
const modelSize = config?.semantic_search.model_size || "small";
// Text model state
// GenAI providers have no local models to download
const isGenaiEmbeddings =
typeof modelVersion === "string" &&
!(JINA_EMBEDDING_MODELS as readonly string[]).includes(modelVersion);
// Text model state (skipped for GenAI - no local models)
const { payload: textModelState } = useModelState(
modelVersion === "jinav1"
? "jinaai/jina-clip-v1-text_model_fp16.onnx"
@@ -328,6 +334,10 @@ export default function Explore() {
);
const allModelsLoaded = useMemo(() => {
if (isGenaiEmbeddings) {
return true;
}
return (
textModelState === "downloaded" &&
textTokenizerState === "downloaded" &&
@@ -335,6 +345,7 @@ export default function Explore() {
visionFeatureExtractorState === "downloaded"
);
}, [
isGenaiEmbeddings,
textModelState,
textTokenizerState,
visionModelState,
@@ -358,10 +369,11 @@ export default function Explore() {
!defaultViewLoaded ||
(config?.semantic_search.enabled &&
(!reindexState ||
!textModelState ||
!textTokenizerState ||
!visionModelState ||
!visionFeatureExtractorState))
(!isGenaiEmbeddings &&
(!textModelState ||
!textTokenizerState ||
!visionModelState ||
!visionFeatureExtractorState))))
) {
return (
<ActivityIndicator className="absolute left-1/2 top-1/2 -translate-x-1/2 -translate-y-1/2" />