mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-02-20 07:46:46 -05:00
Compare commits
13 Commits
0.18-rebas
...
llama.cpp-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
14fcaa9911 | ||
|
|
72c73b153c | ||
|
|
4cd581fc43 | ||
|
|
f67f569104 | ||
|
|
54a8678058 | ||
|
|
e013a0206a | ||
|
|
266e243425 | ||
|
|
b6e17b032a | ||
|
|
d150b44d36 | ||
|
|
1e4596eb99 | ||
|
|
7c066f661a | ||
|
|
feba94b202 | ||
|
|
3093a7a594 |
@@ -76,6 +76,40 @@ Switching between V1 and V2 requires reindexing your embeddings. The embeddings
|
||||
|
||||
:::
|
||||
|
||||
### GenAI Provider (llama.cpp)
|
||||
|
||||
Frigate can use a GenAI provider for semantic search embeddings when that provider has the `embeddings` role. Currently, only **llama.cpp** supports multimodal embeddings (both text and images).
|
||||
|
||||
To use llama.cpp for semantic search:
|
||||
|
||||
1. Configure a GenAI provider in your config with `embeddings` in its `roles`.
|
||||
2. Set `semantic_search.model` to the GenAI config key (e.g. `default`).
|
||||
3. Start the llama.cpp server with `--embeddings` and `--mmproj` for image support:
|
||||
|
||||
```yaml
|
||||
genai:
|
||||
default:
|
||||
provider: llamacpp
|
||||
base_url: http://localhost:8080
|
||||
model: your-model-name
|
||||
roles:
|
||||
- embeddings
|
||||
- vision
|
||||
- tools
|
||||
|
||||
semantic_search:
|
||||
enabled: True
|
||||
model: default
|
||||
```
|
||||
|
||||
The llama.cpp server must be started with `--embeddings` for the embeddings API, and `--mmproj <mmproj.gguf>` when using image embeddings. See the [llama.cpp server documentation](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) for details.
|
||||
|
||||
:::note
|
||||
|
||||
Switching between Jina models and a GenAI provider requires reindexing. Embeddings from different backends are incompatible.
|
||||
|
||||
:::
|
||||
|
||||
### GPU Acceleration
|
||||
|
||||
The CLIP models are downloaded in ONNX format, and the `large` model can be accelerated using GPU hardware, when available. This depends on the Docker build that is used. You can also target a specific device in a multi-GPU installation.
|
||||
|
||||
@@ -38,6 +38,7 @@ from frigate.config.camera.updater import (
|
||||
CameraConfigUpdateTopic,
|
||||
)
|
||||
from frigate.ffmpeg_presets import FFMPEG_HWACCEL_VAAPI, _gpu_selector
|
||||
from frigate.genai import GenAIClientManager
|
||||
from frigate.jobs.media_sync import (
|
||||
get_current_media_sync_job,
|
||||
get_media_sync_job_by_id,
|
||||
@@ -432,6 +433,7 @@ def config_set(request: Request, body: AppConfigSetBody):
|
||||
if body.requires_restart == 0 or body.update_topic:
|
||||
old_config: FrigateConfig = request.app.frigate_config
|
||||
request.app.frigate_config = config
|
||||
request.app.genai_manager = GenAIClientManager(config)
|
||||
|
||||
if body.update_topic:
|
||||
if body.update_topic.startswith("config/cameras/"):
|
||||
|
||||
@@ -1037,4 +1037,4 @@ async def get_allowed_cameras_for_filter(request: Request):
|
||||
role = current_user["role"]
|
||||
all_camera_names = set(request.app.frigate_config.cameras.keys())
|
||||
roles_dict = request.app.frigate_config.auth.roles
|
||||
return User.get_allowed_cameras(role, roles_dict, all_camera_names)
|
||||
return User.get_allowed_cameras(role, roles_dict, all_camera_names)
|
||||
|
||||
@@ -23,7 +23,6 @@ from frigate.api.defs.response.chat_response import (
|
||||
)
|
||||
from frigate.api.defs.tags import Tags
|
||||
from frigate.api.event import events
|
||||
from frigate.genai import get_genai_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -383,7 +382,7 @@ async def chat_completion(
|
||||
6. Repeats until final answer
|
||||
7. Returns response to user
|
||||
"""
|
||||
genai_client = get_genai_client(request.app.frigate_config)
|
||||
genai_client = request.app.genai_manager.tool_client
|
||||
if not genai_client:
|
||||
return JSONResponse(
|
||||
content={
|
||||
|
||||
@@ -33,6 +33,7 @@ from frigate.comms.event_metadata_updater import (
|
||||
from frigate.config import FrigateConfig
|
||||
from frigate.config.camera.updater import CameraConfigUpdatePublisher
|
||||
from frigate.embeddings import EmbeddingsContext
|
||||
from frigate.genai import GenAIClientManager
|
||||
from frigate.ptz.onvif import OnvifController
|
||||
from frigate.stats.emitter import StatsEmitter
|
||||
from frigate.storage import StorageMaintainer
|
||||
@@ -134,6 +135,7 @@ def create_fastapi_app(
|
||||
app.include_router(record.router)
|
||||
# App Properties
|
||||
app.frigate_config = frigate_config
|
||||
app.genai_manager = GenAIClientManager(frigate_config)
|
||||
app.embeddings = embeddings
|
||||
app.detected_frames_processor = detected_frames_processor
|
||||
app.storage_maintainer = storage_maintainer
|
||||
|
||||
@@ -33,7 +33,6 @@ from frigate.api.defs.response.review_response import (
|
||||
ReviewSummaryResponse,
|
||||
)
|
||||
from frigate.api.defs.tags import Tags
|
||||
from frigate.config import FrigateConfig
|
||||
from frigate.embeddings import EmbeddingsContext
|
||||
from frigate.models import Recordings, ReviewSegment, UserReviewStatus
|
||||
from frigate.review.types import SeverityEnum
|
||||
@@ -747,9 +746,7 @@ async def set_not_reviewed(
|
||||
description="Use GenAI to summarize review items over a period of time.",
|
||||
)
|
||||
def generate_review_summary(request: Request, start_ts: float, end_ts: float):
|
||||
config: FrigateConfig = request.app.frigate_config
|
||||
|
||||
if not config.genai.provider:
|
||||
if not request.app.genai_manager.vision_client:
|
||||
return JSONResponse(
|
||||
content=(
|
||||
{
|
||||
|
||||
@@ -6,7 +6,7 @@ from pydantic import Field
|
||||
from ..base import FrigateBaseModel
|
||||
from ..env import EnvString
|
||||
|
||||
__all__ = ["GenAIConfig", "GenAIProviderEnum"]
|
||||
__all__ = ["GenAIConfig", "GenAIProviderEnum", "GenAIRoleEnum"]
|
||||
|
||||
|
||||
class GenAIProviderEnum(str, Enum):
|
||||
@@ -17,6 +17,12 @@ class GenAIProviderEnum(str, Enum):
|
||||
llamacpp = "llamacpp"
|
||||
|
||||
|
||||
class GenAIRoleEnum(str, Enum):
|
||||
tools = "tools"
|
||||
vision = "vision"
|
||||
embeddings = "embeddings"
|
||||
|
||||
|
||||
class GenAIConfig(FrigateBaseModel):
|
||||
"""Primary GenAI Config to define GenAI Provider."""
|
||||
|
||||
@@ -24,6 +30,14 @@ class GenAIConfig(FrigateBaseModel):
|
||||
base_url: Optional[str] = Field(default=None, title="Provider base url.")
|
||||
model: str = Field(default="gpt-4o", title="GenAI model.")
|
||||
provider: GenAIProviderEnum | None = Field(default=None, title="GenAI provider.")
|
||||
roles: list[GenAIRoleEnum] = Field(
|
||||
default_factory=lambda: [
|
||||
GenAIRoleEnum.embeddings,
|
||||
GenAIRoleEnum.vision,
|
||||
GenAIRoleEnum.tools,
|
||||
],
|
||||
title="GenAI roles (tools, vision, embeddings); one provider per role.",
|
||||
)
|
||||
provider_options: dict[str, Any] = Field(
|
||||
default={}, title="GenAI Provider extra options."
|
||||
)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from enum import Enum
|
||||
from typing import Dict, List, Optional
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
from pydantic import ConfigDict, Field
|
||||
|
||||
@@ -128,9 +128,10 @@ class SemanticSearchConfig(FrigateBaseModel):
|
||||
reindex: Optional[bool] = Field(
|
||||
default=False, title="Reindex all tracked objects on startup."
|
||||
)
|
||||
model: Optional[SemanticSearchModelEnum] = Field(
|
||||
model: Optional[Union[SemanticSearchModelEnum, str]] = Field(
|
||||
default=SemanticSearchModelEnum.jinav1,
|
||||
title="The CLIP model to use for semantic search.",
|
||||
title="The CLIP model or GenAI provider name for semantic search.",
|
||||
description="Use 'jinav1', 'jinav2' for ONNX models, or a GenAI config key (e.g. 'default') when that provider has the embeddings role.",
|
||||
)
|
||||
model_size: str = Field(
|
||||
default="small", title="The size of the embeddings model used."
|
||||
|
||||
@@ -45,7 +45,7 @@ from .camera.audio import AudioConfig
|
||||
from .camera.birdseye import BirdseyeConfig
|
||||
from .camera.detect import DetectConfig
|
||||
from .camera.ffmpeg import FfmpegConfig
|
||||
from .camera.genai import GenAIConfig
|
||||
from .camera.genai import GenAIConfig, GenAIRoleEnum
|
||||
from .camera.motion import MotionConfig
|
||||
from .camera.notification import NotificationConfig
|
||||
from .camera.objects import FilterConfig, ObjectConfig
|
||||
@@ -347,9 +347,9 @@ class FrigateConfig(FrigateBaseModel):
|
||||
default_factory=ModelConfig, title="Detection model configuration."
|
||||
)
|
||||
|
||||
# GenAI config
|
||||
genai: GenAIConfig = Field(
|
||||
default_factory=GenAIConfig, title="Generative AI configuration."
|
||||
# GenAI config (named provider configs: name -> GenAIConfig)
|
||||
genai: Dict[str, GenAIConfig] = Field(
|
||||
default_factory=dict, title="Generative AI configuration (named providers)."
|
||||
)
|
||||
|
||||
# Camera config
|
||||
@@ -431,6 +431,34 @@ class FrigateConfig(FrigateBaseModel):
|
||||
# set notifications state
|
||||
self.notifications.enabled_in_config = self.notifications.enabled
|
||||
|
||||
# validate genai: each role (tools, vision, embeddings) at most once
|
||||
role_to_name: dict[GenAIRoleEnum, str] = {}
|
||||
for name, genai_cfg in self.genai.items():
|
||||
for role in genai_cfg.roles:
|
||||
if role in role_to_name:
|
||||
raise ValueError(
|
||||
f"GenAI role '{role.value}' is assigned to both "
|
||||
f"'{role_to_name[role]}' and '{name}'; each role must have "
|
||||
"exactly one provider."
|
||||
)
|
||||
role_to_name[role] = name
|
||||
|
||||
# validate semantic_search.model when it is a GenAI provider name
|
||||
if self.semantic_search.enabled and isinstance(
|
||||
self.semantic_search.model, str
|
||||
):
|
||||
if self.semantic_search.model not in self.genai:
|
||||
raise ValueError(
|
||||
f"semantic_search.model '{self.semantic_search.model}' is not a "
|
||||
"valid GenAI config key. Must match a key in genai config."
|
||||
)
|
||||
genai_cfg = self.genai[self.semantic_search.model]
|
||||
if GenAIRoleEnum.embeddings not in genai_cfg.roles:
|
||||
raise ValueError(
|
||||
f"GenAI provider '{self.semantic_search.model}' must have "
|
||||
"'embeddings' in its roles for semantic search."
|
||||
)
|
||||
|
||||
# set default min_score for object attributes
|
||||
for attribute in self.model.all_attributes:
|
||||
if not self.objects.filters.get(attribute):
|
||||
|
||||
@@ -603,4 +603,4 @@ def get_optimized_runner(
|
||||
provider_options=options,
|
||||
),
|
||||
model_type=model_type,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -28,6 +28,7 @@ from frigate.types import ModelStatusTypesEnum
|
||||
from frigate.util.builtin import EventsPerSecond, InferenceSpeed, serialize
|
||||
from frigate.util.file import get_event_thumbnail_bytes
|
||||
|
||||
from .genai_embedding import GenAIEmbedding
|
||||
from .onnx.jina_v1_embedding import JinaV1ImageEmbedding, JinaV1TextEmbedding
|
||||
from .onnx.jina_v2_embedding import JinaV2Embedding
|
||||
|
||||
@@ -73,11 +74,13 @@ class Embeddings:
|
||||
config: FrigateConfig,
|
||||
db: SqliteVecQueueDatabase,
|
||||
metrics: DataProcessorMetrics,
|
||||
genai_manager=None,
|
||||
) -> None:
|
||||
self.config = config
|
||||
self.db = db
|
||||
self.metrics = metrics
|
||||
self.requestor = InterProcessRequestor()
|
||||
self.genai_manager = genai_manager
|
||||
|
||||
self.image_inference_speed = InferenceSpeed(self.metrics.image_embeddings_speed)
|
||||
self.image_eps = EventsPerSecond()
|
||||
@@ -104,7 +107,27 @@ class Embeddings:
|
||||
},
|
||||
)
|
||||
|
||||
if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2:
|
||||
model_cfg = self.config.semantic_search.model
|
||||
is_genai_model = isinstance(model_cfg, str)
|
||||
|
||||
if is_genai_model:
|
||||
embeddings_client = (
|
||||
genai_manager.embeddings_client if genai_manager else None
|
||||
)
|
||||
if not embeddings_client:
|
||||
raise ValueError(
|
||||
f"semantic_search.model is '{model_cfg}' (GenAI provider) but "
|
||||
"no embeddings client is configured. Ensure the GenAI provider "
|
||||
"has 'embeddings' in its roles."
|
||||
)
|
||||
self.embedding = GenAIEmbedding(embeddings_client)
|
||||
self.text_embedding = lambda input_data: self.embedding(
|
||||
input_data, embedding_type="text"
|
||||
)
|
||||
self.vision_embedding = lambda input_data: self.embedding(
|
||||
input_data, embedding_type="vision"
|
||||
)
|
||||
elif model_cfg == SemanticSearchModelEnum.jinav2:
|
||||
# Single JinaV2Embedding instance for both text and vision
|
||||
self.embedding = JinaV2Embedding(
|
||||
model_size=self.config.semantic_search.model_size,
|
||||
@@ -118,7 +141,8 @@ class Embeddings:
|
||||
self.vision_embedding = lambda input_data: self.embedding(
|
||||
input_data, embedding_type="vision"
|
||||
)
|
||||
else: # Default to jinav1
|
||||
else:
|
||||
# Default to jinav1
|
||||
self.text_embedding = JinaV1TextEmbedding(
|
||||
model_size=config.semantic_search.model_size,
|
||||
requestor=self.requestor,
|
||||
@@ -136,8 +160,11 @@ class Embeddings:
|
||||
self.metrics.text_embeddings_eps.value = self.text_eps.eps()
|
||||
|
||||
def get_model_definitions(self):
|
||||
# Version-specific models
|
||||
if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2:
|
||||
model_cfg = self.config.semantic_search.model
|
||||
if isinstance(model_cfg, str):
|
||||
# GenAI provider: no ONNX models to download
|
||||
models = []
|
||||
elif model_cfg == SemanticSearchModelEnum.jinav2:
|
||||
models = [
|
||||
"jinaai/jina-clip-v2-tokenizer",
|
||||
"jinaai/jina-clip-v2-model_fp16.onnx"
|
||||
@@ -224,6 +251,14 @@ class Embeddings:
|
||||
|
||||
embeddings = self.vision_embedding(valid_thumbs)
|
||||
|
||||
if len(embeddings) != len(valid_ids):
|
||||
logger.warning(
|
||||
"Batch embed returned %d embeddings for %d thumbnails; skipping batch",
|
||||
len(embeddings),
|
||||
len(valid_ids),
|
||||
)
|
||||
return []
|
||||
|
||||
if upsert:
|
||||
items = []
|
||||
for i in range(len(valid_ids)):
|
||||
@@ -246,9 +281,15 @@ class Embeddings:
|
||||
|
||||
def embed_description(
|
||||
self, event_id: str, description: str, upsert: bool = True
|
||||
) -> np.ndarray:
|
||||
) -> np.ndarray | None:
|
||||
start = datetime.datetime.now().timestamp()
|
||||
embedding = self.text_embedding([description])[0]
|
||||
embeddings = self.text_embedding([description])
|
||||
if not embeddings:
|
||||
logger.warning(
|
||||
"Failed to generate description embedding for event %s", event_id
|
||||
)
|
||||
return None
|
||||
embedding = embeddings[0]
|
||||
|
||||
if upsert:
|
||||
self.db.execute_sql(
|
||||
@@ -271,8 +312,32 @@ class Embeddings:
|
||||
# upsert embeddings one by one to avoid token limit
|
||||
embeddings = []
|
||||
|
||||
for desc in event_descriptions.values():
|
||||
embeddings.append(self.text_embedding([desc])[0])
|
||||
for eid, desc in event_descriptions.items():
|
||||
result = self.text_embedding([desc])
|
||||
if not result:
|
||||
logger.warning(
|
||||
"Failed to generate description embedding for event %s", eid
|
||||
)
|
||||
continue
|
||||
embeddings.append(result[0])
|
||||
|
||||
if not embeddings:
|
||||
logger.warning("No description embeddings generated in batch")
|
||||
return np.array([])
|
||||
|
||||
# Build ids list for only successful embeddings - we need to track which succeeded
|
||||
ids = list(event_descriptions.keys())
|
||||
if len(embeddings) != len(ids):
|
||||
# Rebuild ids/embeddings for only successful ones (match by order)
|
||||
ids = []
|
||||
embeddings_filtered = []
|
||||
for eid, desc in event_descriptions.items():
|
||||
result = self.text_embedding([desc])
|
||||
if result:
|
||||
ids.append(eid)
|
||||
embeddings_filtered.append(result[0])
|
||||
ids = ids
|
||||
embeddings = embeddings_filtered
|
||||
|
||||
if upsert:
|
||||
ids = list(event_descriptions.keys())
|
||||
@@ -314,7 +379,10 @@ class Embeddings:
|
||||
|
||||
batch_size = (
|
||||
4
|
||||
if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2
|
||||
if (
|
||||
isinstance(self.config.semantic_search.model, str)
|
||||
or self.config.semantic_search.model == SemanticSearchModelEnum.jinav2
|
||||
)
|
||||
else 32
|
||||
)
|
||||
current_page = 1
|
||||
@@ -601,6 +669,8 @@ class Embeddings:
|
||||
if trigger.type == "description":
|
||||
logger.debug(f"Generating embedding for trigger description {trigger_name}")
|
||||
embedding = self.embed_description(None, trigger.data, upsert=False)
|
||||
if embedding is None:
|
||||
return b""
|
||||
return embedding.astype(np.float32).tobytes()
|
||||
|
||||
elif trigger.type == "thumbnail":
|
||||
@@ -636,6 +706,8 @@ class Embeddings:
|
||||
embedding = self.embed_thumbnail(
|
||||
str(trigger.data), thumbnail, upsert=False
|
||||
)
|
||||
if embedding is None:
|
||||
return b""
|
||||
return embedding.astype(np.float32).tobytes()
|
||||
|
||||
else:
|
||||
|
||||
85
frigate/embeddings/genai_embedding.py
Normal file
85
frigate/embeddings/genai_embedding.py
Normal file
@@ -0,0 +1,85 @@
|
||||
"""GenAI-backed embeddings for semantic search."""
|
||||
|
||||
import io
|
||||
import logging
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from frigate.genai import GenAIClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
EMBEDDING_DIM = 768
|
||||
|
||||
|
||||
class GenAIEmbedding:
|
||||
"""Embedding adapter that delegates to a GenAI provider's embed API.
|
||||
|
||||
Provides the same interface as JinaV2Embedding for semantic search:
|
||||
__call__(inputs, embedding_type) -> list[np.ndarray]. Output embeddings are
|
||||
normalized to 768 dimensions for Frigate's sqlite-vec schema.
|
||||
"""
|
||||
|
||||
def __init__(self, client: "GenAIClient") -> None:
|
||||
self.client = client
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
inputs: list[str] | list[bytes] | list[Image.Image],
|
||||
embedding_type: str = "text",
|
||||
) -> list[np.ndarray]:
|
||||
"""Generate embeddings for text or images.
|
||||
|
||||
Args:
|
||||
inputs: List of strings (text) or bytes/PIL images (vision).
|
||||
embedding_type: "text" or "vision".
|
||||
|
||||
Returns:
|
||||
List of 768-dim numpy float32 arrays.
|
||||
"""
|
||||
if not inputs:
|
||||
return []
|
||||
|
||||
if embedding_type == "text":
|
||||
texts = [str(x) for x in inputs]
|
||||
embeddings = self.client.embed(texts=texts)
|
||||
elif embedding_type == "vision":
|
||||
images: list[bytes] = []
|
||||
for inp in inputs:
|
||||
if isinstance(inp, bytes):
|
||||
images.append(inp)
|
||||
elif isinstance(inp, Image.Image):
|
||||
buf = io.BytesIO()
|
||||
inp.convert("RGB").save(buf, format="JPEG")
|
||||
images.append(buf.getvalue())
|
||||
else:
|
||||
logger.warning(
|
||||
"GenAIEmbedding: skipping unsupported vision input type %s",
|
||||
type(inp).__name__,
|
||||
)
|
||||
if not images:
|
||||
return []
|
||||
embeddings = self.client.embed(images=images)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Invalid embedding_type '{embedding_type}'. Must be 'text' or 'vision'."
|
||||
)
|
||||
|
||||
result = []
|
||||
for emb in embeddings:
|
||||
arr = np.asarray(emb, dtype=np.float32).flatten()
|
||||
if arr.size != EMBEDDING_DIM:
|
||||
if arr.size > EMBEDDING_DIM:
|
||||
arr = arr[:EMBEDDING_DIM]
|
||||
else:
|
||||
arr = np.pad(
|
||||
arr,
|
||||
(0, EMBEDDING_DIM - arr.size),
|
||||
mode="constant",
|
||||
constant_values=0,
|
||||
)
|
||||
result.append(arr)
|
||||
return result
|
||||
@@ -59,7 +59,7 @@ from frigate.data_processing.real_time.license_plate import (
|
||||
from frigate.data_processing.types import DataProcessorMetrics, PostProcessDataEnum
|
||||
from frigate.db.sqlitevecq import SqliteVecQueueDatabase
|
||||
from frigate.events.types import EventTypeEnum, RegenerateDescriptionEnum
|
||||
from frigate.genai import get_genai_client
|
||||
from frigate.genai import GenAIClientManager
|
||||
from frigate.models import Event, Recordings, ReviewSegment, Trigger
|
||||
from frigate.util.builtin import serialize
|
||||
from frigate.util.file import get_event_thumbnail_bytes
|
||||
@@ -116,8 +116,10 @@ class EmbeddingMaintainer(threading.Thread):
|
||||
models = [Event, Recordings, ReviewSegment, Trigger]
|
||||
db.bind(models)
|
||||
|
||||
self.genai_manager = GenAIClientManager(config)
|
||||
|
||||
if config.semantic_search.enabled:
|
||||
self.embeddings = Embeddings(config, db, metrics)
|
||||
self.embeddings = Embeddings(config, db, metrics, self.genai_manager)
|
||||
|
||||
# Check if we need to re-index events
|
||||
if config.semantic_search.reindex:
|
||||
@@ -144,7 +146,6 @@ class EmbeddingMaintainer(threading.Thread):
|
||||
self.frame_manager = SharedMemoryFrameManager()
|
||||
|
||||
self.detected_license_plates: dict[str, dict[str, Any]] = {}
|
||||
self.genai_client = get_genai_client(config)
|
||||
|
||||
# model runners to share between realtime and post processors
|
||||
if self.config.lpr.enabled:
|
||||
@@ -203,12 +204,15 @@ class EmbeddingMaintainer(threading.Thread):
|
||||
# post processors
|
||||
self.post_processors: list[PostProcessorApi] = []
|
||||
|
||||
if self.genai_client is not None and any(
|
||||
if self.genai_manager.vision_client is not None and any(
|
||||
c.review.genai.enabled_in_config for c in self.config.cameras.values()
|
||||
):
|
||||
self.post_processors.append(
|
||||
ReviewDescriptionProcessor(
|
||||
self.config, self.requestor, self.metrics, self.genai_client
|
||||
self.config,
|
||||
self.requestor,
|
||||
self.metrics,
|
||||
self.genai_manager.vision_client,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -246,7 +250,7 @@ class EmbeddingMaintainer(threading.Thread):
|
||||
)
|
||||
self.post_processors.append(semantic_trigger_processor)
|
||||
|
||||
if self.genai_client is not None and any(
|
||||
if self.genai_manager.vision_client is not None and any(
|
||||
c.objects.genai.enabled_in_config for c in self.config.cameras.values()
|
||||
):
|
||||
self.post_processors.append(
|
||||
@@ -255,7 +259,7 @@ class EmbeddingMaintainer(threading.Thread):
|
||||
self.embeddings,
|
||||
self.requestor,
|
||||
self.metrics,
|
||||
self.genai_client,
|
||||
self.genai_manager.vision_client,
|
||||
semantic_trigger_processor,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -7,15 +7,27 @@ import os
|
||||
import re
|
||||
from typing import Any, Optional
|
||||
|
||||
import numpy as np
|
||||
from playhouse.shortcuts import model_to_dict
|
||||
|
||||
from frigate.config import CameraConfig, FrigateConfig, GenAIConfig, GenAIProviderEnum
|
||||
from frigate.const import CLIPS_DIR
|
||||
from frigate.data_processing.post.types import ReviewMetadata
|
||||
from frigate.genai.manager import GenAIClientManager
|
||||
from frigate.models import Event
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
__all__ = [
|
||||
"GenAIClient",
|
||||
"GenAIClientManager",
|
||||
"GenAIConfig",
|
||||
"GenAIProviderEnum",
|
||||
"PROVIDERS",
|
||||
"load_providers",
|
||||
"register_genai_provider",
|
||||
]
|
||||
|
||||
PROVIDERS = {}
|
||||
|
||||
|
||||
@@ -293,6 +305,25 @@ Guidelines:
|
||||
"""Get the context window size for this provider in tokens."""
|
||||
return 4096
|
||||
|
||||
def embed(
|
||||
self,
|
||||
texts: list[str] | None = None,
|
||||
images: list[bytes] | None = None,
|
||||
) -> list[np.ndarray]:
|
||||
"""Generate embeddings for text and/or images.
|
||||
|
||||
Returns list of numpy arrays (one per input). Expected dimension is 768
|
||||
for Frigate semantic search compatibility.
|
||||
|
||||
Providers that support embeddings should override this method.
|
||||
"""
|
||||
logger.warning(
|
||||
"%s does not support embeddings. "
|
||||
"This method should be overridden by the provider implementation.",
|
||||
self.__class__.__name__,
|
||||
)
|
||||
return []
|
||||
|
||||
def chat_with_tools(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
@@ -352,19 +383,6 @@ Guidelines:
|
||||
}
|
||||
|
||||
|
||||
def get_genai_client(config: FrigateConfig) -> Optional[GenAIClient]:
|
||||
"""Get the GenAI client."""
|
||||
if not config.genai.provider:
|
||||
return None
|
||||
|
||||
load_providers()
|
||||
provider = PROVIDERS.get(config.genai.provider)
|
||||
if provider:
|
||||
return provider(config.genai)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def load_providers():
|
||||
package_dir = os.path.dirname(__file__)
|
||||
for filename in os.listdir(package_dir):
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
"""llama.cpp Provider for Frigate AI."""
|
||||
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Optional
|
||||
|
||||
import numpy as np
|
||||
import requests
|
||||
from PIL import Image
|
||||
|
||||
from frigate.config import GenAIProviderEnum
|
||||
from frigate.genai import GenAIClient, register_genai_provider
|
||||
@@ -13,6 +16,20 @@ from frigate.genai import GenAIClient, register_genai_provider
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _to_jpeg(img_bytes: bytes) -> bytes | None:
|
||||
"""Convert image bytes to JPEG. llama.cpp/STB does not support WebP."""
|
||||
try:
|
||||
img = Image.open(io.BytesIO(img_bytes))
|
||||
if img.mode != "RGB":
|
||||
img = img.convert("RGB")
|
||||
buf = io.BytesIO()
|
||||
img.save(buf, format="JPEG", quality=85)
|
||||
return buf.getvalue()
|
||||
except Exception as e:
|
||||
logger.warning("Failed to convert image to JPEG: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
@register_genai_provider(GenAIProviderEnum.llamacpp)
|
||||
class LlamaCppClient(GenAIClient):
|
||||
"""Generative AI client for Frigate using llama.cpp server."""
|
||||
@@ -101,6 +118,104 @@ class LlamaCppClient(GenAIClient):
|
||||
"""Get the context window size for llama.cpp."""
|
||||
return self.genai_config.provider_options.get("context_size", 4096)
|
||||
|
||||
def embed(
|
||||
self,
|
||||
texts: list[str] | None = None,
|
||||
images: list[bytes] | None = None,
|
||||
) -> list[np.ndarray]:
|
||||
"""Generate embeddings via llama.cpp /embeddings endpoint.
|
||||
|
||||
Supports batch requests. Uses content format with prompt_string and
|
||||
multimodal_data for images (PR #15108). Server must be started with
|
||||
--embeddings and --mmproj for multimodal support.
|
||||
"""
|
||||
if self.provider is None:
|
||||
logger.warning(
|
||||
"llama.cpp provider has not been initialized. Check your llama.cpp configuration."
|
||||
)
|
||||
return []
|
||||
|
||||
texts = texts or []
|
||||
images = images or []
|
||||
if not texts and not images:
|
||||
return []
|
||||
|
||||
EMBEDDING_DIM = 768
|
||||
|
||||
content = []
|
||||
for text in texts:
|
||||
content.append({"prompt_string": text})
|
||||
for img in images:
|
||||
# llama.cpp uses STB which does not support WebP; convert to JPEG
|
||||
jpeg_bytes = _to_jpeg(img)
|
||||
to_encode = jpeg_bytes if jpeg_bytes is not None else img
|
||||
encoded = base64.b64encode(to_encode).decode("utf-8")
|
||||
# prompt_string must contain <__media__> placeholder for image tokenization
|
||||
content.append({
|
||||
"prompt_string": "<__media__>\n",
|
||||
"multimodal_data": [encoded],
|
||||
})
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{self.provider}/embeddings",
|
||||
json={"content": content},
|
||||
timeout=self.timeout,
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()
|
||||
|
||||
items = result.get("data", result) if isinstance(result, dict) else result
|
||||
if not isinstance(items, list):
|
||||
logger.warning("llama.cpp embeddings returned unexpected format")
|
||||
return []
|
||||
|
||||
embeddings = []
|
||||
for item in items:
|
||||
emb = item.get("embedding") if isinstance(item, dict) else None
|
||||
if emb is None:
|
||||
logger.warning("llama.cpp embeddings item missing embedding field")
|
||||
continue
|
||||
arr = np.array(emb, dtype=np.float32)
|
||||
orig_dim = arr.size
|
||||
if orig_dim != EMBEDDING_DIM:
|
||||
if orig_dim > EMBEDDING_DIM:
|
||||
arr = arr[:EMBEDDING_DIM]
|
||||
logger.debug(
|
||||
"Truncated llama.cpp embedding from %d to %d dimensions",
|
||||
orig_dim,
|
||||
EMBEDDING_DIM,
|
||||
)
|
||||
else:
|
||||
arr = np.pad(
|
||||
arr,
|
||||
(0, EMBEDDING_DIM - orig_dim),
|
||||
mode="constant",
|
||||
constant_values=0,
|
||||
)
|
||||
logger.debug(
|
||||
"Padded llama.cpp embedding from %d to %d dimensions",
|
||||
orig_dim,
|
||||
EMBEDDING_DIM,
|
||||
)
|
||||
embeddings.append(arr)
|
||||
return embeddings
|
||||
except requests.exceptions.Timeout:
|
||||
logger.warning("llama.cpp embeddings request timed out")
|
||||
return []
|
||||
except requests.exceptions.RequestException as e:
|
||||
error_detail = str(e)
|
||||
if hasattr(e, "response") and e.response is not None:
|
||||
try:
|
||||
error_detail = f"{str(e)} - Response: {e.response.text[:500]}"
|
||||
except Exception:
|
||||
pass
|
||||
logger.warning("llama.cpp embeddings error: %s", error_detail)
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.warning("Unexpected error in llama.cpp embeddings: %s", str(e))
|
||||
return []
|
||||
|
||||
def chat_with_tools(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
|
||||
89
frigate/genai/manager.py
Normal file
89
frigate/genai/manager.py
Normal file
@@ -0,0 +1,89 @@
|
||||
"""GenAI client manager for Frigate.
|
||||
|
||||
Manages GenAI provider clients from Frigate config. Configuration is read only
|
||||
in _update_config(); no other code should read config.genai. Exposes clients
|
||||
by role: tool_client, vision_client, embeddings_client.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
|
||||
from frigate.config import FrigateConfig
|
||||
from frigate.config.camera.genai import GenAIRoleEnum
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from frigate.genai import GenAIClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GenAIClientManager:
|
||||
"""Manages GenAI provider clients from Frigate config."""
|
||||
|
||||
def __init__(self, config: FrigateConfig) -> None:
|
||||
self._config = config
|
||||
self._tool_client: Optional[GenAIClient] = None
|
||||
self._vision_client: Optional[GenAIClient] = None
|
||||
self._embeddings_client: Optional[GenAIClient] = None
|
||||
self._update_config()
|
||||
|
||||
def _update_config(self) -> None:
|
||||
"""Build role clients from current Frigate config.genai.
|
||||
|
||||
Called from __init__ and can be called again when config is reloaded.
|
||||
Each role (tools, vision, embeddings) gets the client for the provider
|
||||
that has that role in its roles list.
|
||||
"""
|
||||
from frigate.genai import PROVIDERS, load_providers
|
||||
|
||||
self._tool_client = None
|
||||
self._vision_client = None
|
||||
self._embeddings_client = None
|
||||
|
||||
if not self._config.genai:
|
||||
return
|
||||
|
||||
load_providers()
|
||||
|
||||
for _name, genai_cfg in self._config.genai.items():
|
||||
if not genai_cfg.provider:
|
||||
continue
|
||||
provider_cls = PROVIDERS.get(genai_cfg.provider)
|
||||
if not provider_cls:
|
||||
logger.warning(
|
||||
"Unknown GenAI provider %s in config, skipping.",
|
||||
genai_cfg.provider,
|
||||
)
|
||||
continue
|
||||
try:
|
||||
client = provider_cls(genai_cfg)
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
"Failed to create GenAI client for provider %s: %s",
|
||||
genai_cfg.provider,
|
||||
e,
|
||||
)
|
||||
continue
|
||||
|
||||
for role in genai_cfg.roles:
|
||||
if role == GenAIRoleEnum.tools:
|
||||
self._tool_client = client
|
||||
elif role == GenAIRoleEnum.vision:
|
||||
self._vision_client = client
|
||||
elif role == GenAIRoleEnum.embeddings:
|
||||
self._embeddings_client = client
|
||||
|
||||
@property
|
||||
def tool_client(self) -> "Optional[GenAIClient]":
|
||||
"""Client configured for the tools role (e.g. chat with function calling)."""
|
||||
return self._tool_client
|
||||
|
||||
@property
|
||||
def vision_client(self) -> "Optional[GenAIClient]":
|
||||
"""Client configured for the vision role (e.g. review descriptions, object descriptions)."""
|
||||
return self._vision_client
|
||||
|
||||
@property
|
||||
def embeddings_client(self) -> "Optional[GenAIClient]":
|
||||
"""Client configured for the embeddings role."""
|
||||
return self._embeddings_client
|
||||
@@ -438,6 +438,13 @@ def migrate_018_0(config: dict[str, dict[str, Any]]) -> dict[str, dict[str, Any]
|
||||
"""Handle migrating frigate config to 0.18-0"""
|
||||
new_config = config.copy()
|
||||
|
||||
# Migrate GenAI to new format
|
||||
genai = new_config.get("genai")
|
||||
|
||||
if genai and genai.get("provider"):
|
||||
genai["roles"] = ["embeddings", "vision", "tools"]
|
||||
new_config["genai"] = {"default": genai}
|
||||
|
||||
# Remove deprecated sync_recordings from global record config
|
||||
if new_config.get("record", {}).get("sync_recordings") is not None:
|
||||
del new_config["record"]["sync_recordings"]
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
/** ONNX embedding models that require local model downloads. GenAI providers are not in this list. */
|
||||
export const JINA_EMBEDDING_MODELS = ["jinav1", "jinav2"] as const;
|
||||
|
||||
export const supportedLanguageKeys = [
|
||||
"en",
|
||||
"es",
|
||||
|
||||
@@ -23,6 +23,7 @@ import { toast } from "sonner";
|
||||
import useSWR from "swr";
|
||||
import useSWRInfinite from "swr/infinite";
|
||||
import { useDocDomain } from "@/hooks/use-doc-domain";
|
||||
import { JINA_EMBEDDING_MODELS } from "@/lib/const";
|
||||
|
||||
const API_LIMIT = 25;
|
||||
|
||||
@@ -293,7 +294,12 @@ export default function Explore() {
|
||||
const modelVersion = config?.semantic_search.model || "jinav1";
|
||||
const modelSize = config?.semantic_search.model_size || "small";
|
||||
|
||||
// Text model state
|
||||
// GenAI providers have no local models to download
|
||||
const isGenaiEmbeddings =
|
||||
typeof modelVersion === "string" &&
|
||||
!(JINA_EMBEDDING_MODELS as readonly string[]).includes(modelVersion);
|
||||
|
||||
// Text model state (skipped for GenAI - no local models)
|
||||
const { payload: textModelState } = useModelState(
|
||||
modelVersion === "jinav1"
|
||||
? "jinaai/jina-clip-v1-text_model_fp16.onnx"
|
||||
@@ -328,6 +334,10 @@ export default function Explore() {
|
||||
);
|
||||
|
||||
const allModelsLoaded = useMemo(() => {
|
||||
if (isGenaiEmbeddings) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return (
|
||||
textModelState === "downloaded" &&
|
||||
textTokenizerState === "downloaded" &&
|
||||
@@ -335,6 +345,7 @@ export default function Explore() {
|
||||
visionFeatureExtractorState === "downloaded"
|
||||
);
|
||||
}, [
|
||||
isGenaiEmbeddings,
|
||||
textModelState,
|
||||
textTokenizerState,
|
||||
visionModelState,
|
||||
@@ -358,10 +369,11 @@ export default function Explore() {
|
||||
!defaultViewLoaded ||
|
||||
(config?.semantic_search.enabled &&
|
||||
(!reindexState ||
|
||||
!textModelState ||
|
||||
!textTokenizerState ||
|
||||
!visionModelState ||
|
||||
!visionFeatureExtractorState))
|
||||
(!isGenaiEmbeddings &&
|
||||
(!textModelState ||
|
||||
!textTokenizerState ||
|
||||
!visionModelState ||
|
||||
!visionFeatureExtractorState))))
|
||||
) {
|
||||
return (
|
||||
<ActivityIndicator className="absolute left-1/2 top-1/2 -translate-x-1/2 -translate-y-1/2" />
|
||||
|
||||
Reference in New Issue
Block a user