Compare commits

...

7 Commits

Author SHA1 Message Date
Nicolas Mowen
14fcaa9911 Don't require download check 2026-02-19 08:34:47 -07:00
Nicolas Mowen
72c73b153c Fix sending images 2026-02-19 08:24:19 -07:00
Nicolas Mowen
4cd581fc43 undo 2026-02-19 08:17:06 -07:00
Nicolas Mowen
f67f569104 Basic docs 2026-02-19 08:14:13 -07:00
Nicolas Mowen
54a8678058 Add support for embedding via genai 2026-02-19 08:14:08 -07:00
Nicolas Mowen
e013a0206a Add embed API support 2026-02-19 08:13:50 -07:00
Nicolas Mowen
266e243425 Support GenAI for embeddings 2026-02-19 08:13:39 -07:00
10 changed files with 378 additions and 19 deletions

View File

@@ -76,6 +76,40 @@ Switching between V1 and V2 requires reindexing your embeddings. The embeddings
:::
### GenAI Provider (llama.cpp)
Frigate can use a GenAI provider for semantic search embeddings when that provider has the `embeddings` role. Currently, only **llama.cpp** supports multimodal embeddings (both text and images).
To use llama.cpp for semantic search:
1. Configure a GenAI provider in your config with `embeddings` in its `roles`.
2. Set `semantic_search.model` to the GenAI config key (e.g. `default`).
3. Start the llama.cpp server with `--embeddings` and `--mmproj` for image support:
```yaml
genai:
default:
provider: llamacpp
base_url: http://localhost:8080
model: your-model-name
roles:
- embeddings
- vision
- tools
semantic_search:
enabled: True
model: default
```
The llama.cpp server must be started with `--embeddings` for the embeddings API, and `--mmproj <mmproj.gguf>` when using image embeddings. See the [llama.cpp server documentation](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) for details.
:::note
Switching between Jina models and a GenAI provider requires reindexing. Embeddings from different backends are incompatible.
:::
### GPU Acceleration
The CLIP models are downloaded in ONNX format, and the `large` model can be accelerated using GPU hardware, when available. This depends on the Docker build that is used. You can also target a specific device in a multi-GPU installation.

View File

@@ -1,5 +1,5 @@
from enum import Enum
from typing import Dict, List, Optional
from typing import Dict, List, Optional, Union
from pydantic import ConfigDict, Field
@@ -128,9 +128,10 @@ class SemanticSearchConfig(FrigateBaseModel):
reindex: Optional[bool] = Field(
default=False, title="Reindex all tracked objects on startup."
)
model: Optional[SemanticSearchModelEnum] = Field(
model: Optional[Union[SemanticSearchModelEnum, str]] = Field(
default=SemanticSearchModelEnum.jinav1,
title="The CLIP model to use for semantic search.",
title="The CLIP model or GenAI provider name for semantic search.",
description="Use 'jinav1', 'jinav2' for ONNX models, or a GenAI config key (e.g. 'default') when that provider has the embeddings role.",
)
model_size: str = Field(
default="small", title="The size of the embeddings model used."

View File

@@ -443,6 +443,22 @@ class FrigateConfig(FrigateBaseModel):
)
role_to_name[role] = name
# validate semantic_search.model when it is a GenAI provider name
if self.semantic_search.enabled and isinstance(
self.semantic_search.model, str
):
if self.semantic_search.model not in self.genai:
raise ValueError(
f"semantic_search.model '{self.semantic_search.model}' is not a "
"valid GenAI config key. Must match a key in genai config."
)
genai_cfg = self.genai[self.semantic_search.model]
if GenAIRoleEnum.embeddings not in genai_cfg.roles:
raise ValueError(
f"GenAI provider '{self.semantic_search.model}' must have "
"'embeddings' in its roles for semantic search."
)
# set default min_score for object attributes
for attribute in self.model.all_attributes:
if not self.objects.filters.get(attribute):

View File

@@ -28,6 +28,7 @@ from frigate.types import ModelStatusTypesEnum
from frigate.util.builtin import EventsPerSecond, InferenceSpeed, serialize
from frigate.util.file import get_event_thumbnail_bytes
from .genai_embedding import GenAIEmbedding
from .onnx.jina_v1_embedding import JinaV1ImageEmbedding, JinaV1TextEmbedding
from .onnx.jina_v2_embedding import JinaV2Embedding
@@ -73,11 +74,13 @@ class Embeddings:
config: FrigateConfig,
db: SqliteVecQueueDatabase,
metrics: DataProcessorMetrics,
genai_manager=None,
) -> None:
self.config = config
self.db = db
self.metrics = metrics
self.requestor = InterProcessRequestor()
self.genai_manager = genai_manager
self.image_inference_speed = InferenceSpeed(self.metrics.image_embeddings_speed)
self.image_eps = EventsPerSecond()
@@ -104,7 +107,27 @@ class Embeddings:
},
)
if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2:
model_cfg = self.config.semantic_search.model
is_genai_model = isinstance(model_cfg, str)
if is_genai_model:
embeddings_client = (
genai_manager.embeddings_client if genai_manager else None
)
if not embeddings_client:
raise ValueError(
f"semantic_search.model is '{model_cfg}' (GenAI provider) but "
"no embeddings client is configured. Ensure the GenAI provider "
"has 'embeddings' in its roles."
)
self.embedding = GenAIEmbedding(embeddings_client)
self.text_embedding = lambda input_data: self.embedding(
input_data, embedding_type="text"
)
self.vision_embedding = lambda input_data: self.embedding(
input_data, embedding_type="vision"
)
elif model_cfg == SemanticSearchModelEnum.jinav2:
# Single JinaV2Embedding instance for both text and vision
self.embedding = JinaV2Embedding(
model_size=self.config.semantic_search.model_size,
@@ -118,7 +141,8 @@ class Embeddings:
self.vision_embedding = lambda input_data: self.embedding(
input_data, embedding_type="vision"
)
else: # Default to jinav1
else:
# Default to jinav1
self.text_embedding = JinaV1TextEmbedding(
model_size=config.semantic_search.model_size,
requestor=self.requestor,
@@ -136,8 +160,11 @@ class Embeddings:
self.metrics.text_embeddings_eps.value = self.text_eps.eps()
def get_model_definitions(self):
# Version-specific models
if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2:
model_cfg = self.config.semantic_search.model
if isinstance(model_cfg, str):
# GenAI provider: no ONNX models to download
models = []
elif model_cfg == SemanticSearchModelEnum.jinav2:
models = [
"jinaai/jina-clip-v2-tokenizer",
"jinaai/jina-clip-v2-model_fp16.onnx"
@@ -224,6 +251,14 @@ class Embeddings:
embeddings = self.vision_embedding(valid_thumbs)
if len(embeddings) != len(valid_ids):
logger.warning(
"Batch embed returned %d embeddings for %d thumbnails; skipping batch",
len(embeddings),
len(valid_ids),
)
return []
if upsert:
items = []
for i in range(len(valid_ids)):
@@ -246,9 +281,15 @@ class Embeddings:
def embed_description(
self, event_id: str, description: str, upsert: bool = True
) -> np.ndarray:
) -> np.ndarray | None:
start = datetime.datetime.now().timestamp()
embedding = self.text_embedding([description])[0]
embeddings = self.text_embedding([description])
if not embeddings:
logger.warning(
"Failed to generate description embedding for event %s", event_id
)
return None
embedding = embeddings[0]
if upsert:
self.db.execute_sql(
@@ -271,8 +312,32 @@ class Embeddings:
# upsert embeddings one by one to avoid token limit
embeddings = []
for desc in event_descriptions.values():
embeddings.append(self.text_embedding([desc])[0])
for eid, desc in event_descriptions.items():
result = self.text_embedding([desc])
if not result:
logger.warning(
"Failed to generate description embedding for event %s", eid
)
continue
embeddings.append(result[0])
if not embeddings:
logger.warning("No description embeddings generated in batch")
return np.array([])
# Build ids list for only successful embeddings - we need to track which succeeded
ids = list(event_descriptions.keys())
if len(embeddings) != len(ids):
# Rebuild ids/embeddings for only successful ones (match by order)
ids = []
embeddings_filtered = []
for eid, desc in event_descriptions.items():
result = self.text_embedding([desc])
if result:
ids.append(eid)
embeddings_filtered.append(result[0])
ids = ids
embeddings = embeddings_filtered
if upsert:
ids = list(event_descriptions.keys())
@@ -314,7 +379,10 @@ class Embeddings:
batch_size = (
4
if self.config.semantic_search.model == SemanticSearchModelEnum.jinav2
if (
isinstance(self.config.semantic_search.model, str)
or self.config.semantic_search.model == SemanticSearchModelEnum.jinav2
)
else 32
)
current_page = 1
@@ -601,6 +669,8 @@ class Embeddings:
if trigger.type == "description":
logger.debug(f"Generating embedding for trigger description {trigger_name}")
embedding = self.embed_description(None, trigger.data, upsert=False)
if embedding is None:
return b""
return embedding.astype(np.float32).tobytes()
elif trigger.type == "thumbnail":
@@ -636,6 +706,8 @@ class Embeddings:
embedding = self.embed_thumbnail(
str(trigger.data), thumbnail, upsert=False
)
if embedding is None:
return b""
return embedding.astype(np.float32).tobytes()
else:

View File

@@ -0,0 +1,85 @@
"""GenAI-backed embeddings for semantic search."""
import io
import logging
from typing import TYPE_CHECKING
import numpy as np
from PIL import Image
if TYPE_CHECKING:
from frigate.genai import GenAIClient
logger = logging.getLogger(__name__)
EMBEDDING_DIM = 768
class GenAIEmbedding:
"""Embedding adapter that delegates to a GenAI provider's embed API.
Provides the same interface as JinaV2Embedding for semantic search:
__call__(inputs, embedding_type) -> list[np.ndarray]. Output embeddings are
normalized to 768 dimensions for Frigate's sqlite-vec schema.
"""
def __init__(self, client: "GenAIClient") -> None:
self.client = client
def __call__(
self,
inputs: list[str] | list[bytes] | list[Image.Image],
embedding_type: str = "text",
) -> list[np.ndarray]:
"""Generate embeddings for text or images.
Args:
inputs: List of strings (text) or bytes/PIL images (vision).
embedding_type: "text" or "vision".
Returns:
List of 768-dim numpy float32 arrays.
"""
if not inputs:
return []
if embedding_type == "text":
texts = [str(x) for x in inputs]
embeddings = self.client.embed(texts=texts)
elif embedding_type == "vision":
images: list[bytes] = []
for inp in inputs:
if isinstance(inp, bytes):
images.append(inp)
elif isinstance(inp, Image.Image):
buf = io.BytesIO()
inp.convert("RGB").save(buf, format="JPEG")
images.append(buf.getvalue())
else:
logger.warning(
"GenAIEmbedding: skipping unsupported vision input type %s",
type(inp).__name__,
)
if not images:
return []
embeddings = self.client.embed(images=images)
else:
raise ValueError(
f"Invalid embedding_type '{embedding_type}'. Must be 'text' or 'vision'."
)
result = []
for emb in embeddings:
arr = np.asarray(emb, dtype=np.float32).flatten()
if arr.size != EMBEDDING_DIM:
if arr.size > EMBEDDING_DIM:
arr = arr[:EMBEDDING_DIM]
else:
arr = np.pad(
arr,
(0, EMBEDDING_DIM - arr.size),
mode="constant",
constant_values=0,
)
result.append(arr)
return result

View File

@@ -116,8 +116,10 @@ class EmbeddingMaintainer(threading.Thread):
models = [Event, Recordings, ReviewSegment, Trigger]
db.bind(models)
self.genai_manager = GenAIClientManager(config)
if config.semantic_search.enabled:
self.embeddings = Embeddings(config, db, metrics)
self.embeddings = Embeddings(config, db, metrics, self.genai_manager)
# Check if we need to re-index events
if config.semantic_search.reindex:
@@ -144,7 +146,6 @@ class EmbeddingMaintainer(threading.Thread):
self.frame_manager = SharedMemoryFrameManager()
self.detected_license_plates: dict[str, dict[str, Any]] = {}
self.genai_manager = GenAIClientManager(config)
# model runners to share between realtime and post processors
if self.config.lpr.enabled:

View File

@@ -7,6 +7,7 @@ import os
import re
from typing import Any, Optional
import numpy as np
from playhouse.shortcuts import model_to_dict
from frigate.config import CameraConfig, FrigateConfig, GenAIConfig, GenAIProviderEnum
@@ -304,6 +305,25 @@ Guidelines:
"""Get the context window size for this provider in tokens."""
return 4096
def embed(
self,
texts: list[str] | None = None,
images: list[bytes] | None = None,
) -> list[np.ndarray]:
"""Generate embeddings for text and/or images.
Returns list of numpy arrays (one per input). Expected dimension is 768
for Frigate semantic search compatibility.
Providers that support embeddings should override this method.
"""
logger.warning(
"%s does not support embeddings. "
"This method should be overridden by the provider implementation.",
self.__class__.__name__,
)
return []
def chat_with_tools(
self,
messages: list[dict[str, Any]],

View File

@@ -1,11 +1,14 @@
"""llama.cpp Provider for Frigate AI."""
import base64
import io
import json
import logging
from typing import Any, Optional
import numpy as np
import requests
from PIL import Image
from frigate.config import GenAIProviderEnum
from frigate.genai import GenAIClient, register_genai_provider
@@ -13,6 +16,20 @@ from frigate.genai import GenAIClient, register_genai_provider
logger = logging.getLogger(__name__)
def _to_jpeg(img_bytes: bytes) -> bytes | None:
"""Convert image bytes to JPEG. llama.cpp/STB does not support WebP."""
try:
img = Image.open(io.BytesIO(img_bytes))
if img.mode != "RGB":
img = img.convert("RGB")
buf = io.BytesIO()
img.save(buf, format="JPEG", quality=85)
return buf.getvalue()
except Exception as e:
logger.warning("Failed to convert image to JPEG: %s", e)
return None
@register_genai_provider(GenAIProviderEnum.llamacpp)
class LlamaCppClient(GenAIClient):
"""Generative AI client for Frigate using llama.cpp server."""
@@ -101,6 +118,104 @@ class LlamaCppClient(GenAIClient):
"""Get the context window size for llama.cpp."""
return self.genai_config.provider_options.get("context_size", 4096)
def embed(
self,
texts: list[str] | None = None,
images: list[bytes] | None = None,
) -> list[np.ndarray]:
"""Generate embeddings via llama.cpp /embeddings endpoint.
Supports batch requests. Uses content format with prompt_string and
multimodal_data for images (PR #15108). Server must be started with
--embeddings and --mmproj for multimodal support.
"""
if self.provider is None:
logger.warning(
"llama.cpp provider has not been initialized. Check your llama.cpp configuration."
)
return []
texts = texts or []
images = images or []
if not texts and not images:
return []
EMBEDDING_DIM = 768
content = []
for text in texts:
content.append({"prompt_string": text})
for img in images:
# llama.cpp uses STB which does not support WebP; convert to JPEG
jpeg_bytes = _to_jpeg(img)
to_encode = jpeg_bytes if jpeg_bytes is not None else img
encoded = base64.b64encode(to_encode).decode("utf-8")
# prompt_string must contain <__media__> placeholder for image tokenization
content.append({
"prompt_string": "<__media__>\n",
"multimodal_data": [encoded],
})
try:
response = requests.post(
f"{self.provider}/embeddings",
json={"content": content},
timeout=self.timeout,
)
response.raise_for_status()
result = response.json()
items = result.get("data", result) if isinstance(result, dict) else result
if not isinstance(items, list):
logger.warning("llama.cpp embeddings returned unexpected format")
return []
embeddings = []
for item in items:
emb = item.get("embedding") if isinstance(item, dict) else None
if emb is None:
logger.warning("llama.cpp embeddings item missing embedding field")
continue
arr = np.array(emb, dtype=np.float32)
orig_dim = arr.size
if orig_dim != EMBEDDING_DIM:
if orig_dim > EMBEDDING_DIM:
arr = arr[:EMBEDDING_DIM]
logger.debug(
"Truncated llama.cpp embedding from %d to %d dimensions",
orig_dim,
EMBEDDING_DIM,
)
else:
arr = np.pad(
arr,
(0, EMBEDDING_DIM - orig_dim),
mode="constant",
constant_values=0,
)
logger.debug(
"Padded llama.cpp embedding from %d to %d dimensions",
orig_dim,
EMBEDDING_DIM,
)
embeddings.append(arr)
return embeddings
except requests.exceptions.Timeout:
logger.warning("llama.cpp embeddings request timed out")
return []
except requests.exceptions.RequestException as e:
error_detail = str(e)
if hasattr(e, "response") and e.response is not None:
try:
error_detail = f"{str(e)} - Response: {e.response.text[:500]}"
except Exception:
pass
logger.warning("llama.cpp embeddings error: %s", error_detail)
return []
except Exception as e:
logger.warning("Unexpected error in llama.cpp embeddings: %s", str(e))
return []
def chat_with_tools(
self,
messages: list[dict[str, Any]],

View File

@@ -1,3 +1,6 @@
/** ONNX embedding models that require local model downloads. GenAI providers are not in this list. */
export const JINA_EMBEDDING_MODELS = ["jinav1", "jinav2"] as const;
export const supportedLanguageKeys = [
"en",
"es",

View File

@@ -23,6 +23,7 @@ import { toast } from "sonner";
import useSWR from "swr";
import useSWRInfinite from "swr/infinite";
import { useDocDomain } from "@/hooks/use-doc-domain";
import { JINA_EMBEDDING_MODELS } from "@/lib/const";
const API_LIMIT = 25;
@@ -293,7 +294,12 @@ export default function Explore() {
const modelVersion = config?.semantic_search.model || "jinav1";
const modelSize = config?.semantic_search.model_size || "small";
// Text model state
// GenAI providers have no local models to download
const isGenaiEmbeddings =
typeof modelVersion === "string" &&
!(JINA_EMBEDDING_MODELS as readonly string[]).includes(modelVersion);
// Text model state (skipped for GenAI - no local models)
const { payload: textModelState } = useModelState(
modelVersion === "jinav1"
? "jinaai/jina-clip-v1-text_model_fp16.onnx"
@@ -328,6 +334,10 @@ export default function Explore() {
);
const allModelsLoaded = useMemo(() => {
if (isGenaiEmbeddings) {
return true;
}
return (
textModelState === "downloaded" &&
textTokenizerState === "downloaded" &&
@@ -335,6 +345,7 @@ export default function Explore() {
visionFeatureExtractorState === "downloaded"
);
}, [
isGenaiEmbeddings,
textModelState,
textTokenizerState,
visionModelState,
@@ -358,10 +369,11 @@ export default function Explore() {
!defaultViewLoaded ||
(config?.semantic_search.enabled &&
(!reindexState ||
!textModelState ||
!textTokenizerState ||
!visionModelState ||
!visionFeatureExtractorState))
(!isGenaiEmbeddings &&
(!textModelState ||
!textTokenizerState ||
!visionModelState ||
!visionFeatureExtractorState))))
) {
return (
<ActivityIndicator className="absolute left-1/2 top-1/2 -translate-x-1/2 -translate-y-1/2" />