|
|
|
@@ -1,10 +1,12 @@
|
|
|
|
|
"""Chat and LLM tool calling APIs."""
|
|
|
|
|
|
|
|
|
|
import base64
|
|
|
|
|
import json
|
|
|
|
|
import logging
|
|
|
|
|
from datetime import datetime, timezone
|
|
|
|
|
from typing import Any, Dict, List
|
|
|
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
|
|
|
|
|
|
import cv2
|
|
|
|
|
from fastapi import APIRouter, Body, Depends, Request
|
|
|
|
|
from fastapi.responses import JSONResponse
|
|
|
|
|
from pydantic import BaseModel
|
|
|
|
@@ -87,6 +89,28 @@ def get_tool_definitions() -> List[Dict[str, Any]]:
|
|
|
|
|
"required": [],
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"type": "function",
|
|
|
|
|
"function": {
|
|
|
|
|
"name": "get_live_context",
|
|
|
|
|
"description": (
|
|
|
|
|
"Get the current detection information for a camera: objects being tracked, "
|
|
|
|
|
"zones, timestamps. Use this to understand what is visible in the live view. "
|
|
|
|
|
"Call this when the user has included a live image (via include_live_image) or "
|
|
|
|
|
"when answering questions about what is happening right now on a specific camera."
|
|
|
|
|
),
|
|
|
|
|
"parameters": {
|
|
|
|
|
"type": "object",
|
|
|
|
|
"properties": {
|
|
|
|
|
"camera": {
|
|
|
|
|
"type": "string",
|
|
|
|
|
"description": "Camera name to get live context for.",
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
"required": ["camera"],
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -207,6 +231,98 @@ async def execute_tool(
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _execute_get_live_context(
|
|
|
|
|
request: Request,
|
|
|
|
|
camera: str,
|
|
|
|
|
allowed_cameras: List[str],
|
|
|
|
|
) -> Dict[str, Any]:
|
|
|
|
|
if camera not in allowed_cameras:
|
|
|
|
|
return {
|
|
|
|
|
"error": f"Camera '{camera}' not found or access denied",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if camera not in request.app.frigate_config.cameras:
|
|
|
|
|
return {
|
|
|
|
|
"error": f"Camera '{camera}' not found",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
frame_processor = request.app.detected_frames_processor
|
|
|
|
|
camera_state = frame_processor.camera_states.get(camera)
|
|
|
|
|
|
|
|
|
|
if camera_state is None:
|
|
|
|
|
return {
|
|
|
|
|
"error": f"Camera '{camera}' state not available",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tracked_objects_dict = {}
|
|
|
|
|
with camera_state.current_frame_lock:
|
|
|
|
|
tracked_objects = camera_state.tracked_objects.copy()
|
|
|
|
|
frame_time = camera_state.current_frame_time
|
|
|
|
|
|
|
|
|
|
for obj_id, tracked_obj in tracked_objects.items():
|
|
|
|
|
obj_dict = tracked_obj.to_dict()
|
|
|
|
|
if obj_dict.get("frame_time") == frame_time:
|
|
|
|
|
tracked_objects_dict[obj_id] = {
|
|
|
|
|
"label": obj_dict.get("label"),
|
|
|
|
|
"zones": obj_dict.get("current_zones", []),
|
|
|
|
|
"sub_label": obj_dict.get("sub_label"),
|
|
|
|
|
"stationary": obj_dict.get("stationary", False),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"camera": camera,
|
|
|
|
|
"timestamp": frame_time,
|
|
|
|
|
"detections": list(tracked_objects_dict.values()),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(f"Error executing get_live_context: {e}", exc_info=True)
|
|
|
|
|
return {
|
|
|
|
|
"error": f"Error getting live context: {str(e)}",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _get_live_frame_image_url(
|
|
|
|
|
request: Request,
|
|
|
|
|
camera: str,
|
|
|
|
|
allowed_cameras: List[str],
|
|
|
|
|
) -> Optional[str]:
|
|
|
|
|
"""
|
|
|
|
|
Fetch the current live frame for a camera as a base64 data URL.
|
|
|
|
|
|
|
|
|
|
Returns None if the frame cannot be retrieved. Used when include_live_image
|
|
|
|
|
is set to attach the image to the first user message.
|
|
|
|
|
"""
|
|
|
|
|
if (
|
|
|
|
|
camera not in allowed_cameras
|
|
|
|
|
or camera not in request.app.frigate_config.cameras
|
|
|
|
|
):
|
|
|
|
|
return None
|
|
|
|
|
try:
|
|
|
|
|
frame_processor = request.app.detected_frames_processor
|
|
|
|
|
if camera not in frame_processor.camera_states:
|
|
|
|
|
return None
|
|
|
|
|
frame = frame_processor.get_current_frame(camera, {})
|
|
|
|
|
if frame is None:
|
|
|
|
|
return None
|
|
|
|
|
height, width = frame.shape[:2]
|
|
|
|
|
max_dimension = 1024
|
|
|
|
|
if height > max_dimension or width > max_dimension:
|
|
|
|
|
scale = max_dimension / max(height, width)
|
|
|
|
|
frame = cv2.resize(
|
|
|
|
|
frame,
|
|
|
|
|
(int(width * scale), int(height * scale)),
|
|
|
|
|
interpolation=cv2.INTER_AREA,
|
|
|
|
|
)
|
|
|
|
|
_, img_encoded = cv2.imencode(".jpg", frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
|
|
|
|
|
b64 = base64.b64encode(img_encoded.tobytes()).decode("utf-8")
|
|
|
|
|
return f"data:image/jpeg;base64,{b64}"
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.debug("Failed to get live frame for %s: %s", camera, e)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _execute_tool_internal(
|
|
|
|
|
tool_name: str,
|
|
|
|
|
arguments: Dict[str, Any],
|
|
|
|
@@ -231,6 +347,11 @@ async def _execute_tool_internal(
|
|
|
|
|
except (json.JSONDecodeError, AttributeError) as e:
|
|
|
|
|
logger.warning(f"Failed to extract tool result: {e}")
|
|
|
|
|
return {"error": "Failed to parse tool result"}
|
|
|
|
|
elif tool_name == "get_live_context":
|
|
|
|
|
camera = arguments.get("camera")
|
|
|
|
|
if not camera:
|
|
|
|
|
return {"error": "Camera parameter is required"}
|
|
|
|
|
return await _execute_get_live_context(request, camera, allowed_cameras)
|
|
|
|
|
else:
|
|
|
|
|
return {"error": f"Unknown tool: {tool_name}"}
|
|
|
|
|
|
|
|
|
@@ -277,13 +398,43 @@ async def chat_completion(
|
|
|
|
|
current_datetime = datetime.now(timezone.utc)
|
|
|
|
|
current_date_str = current_datetime.strftime("%Y-%m-%d")
|
|
|
|
|
current_time_str = current_datetime.strftime("%H:%M:%S %Z")
|
|
|
|
|
|
|
|
|
|
cameras_info = []
|
|
|
|
|
config = request.app.frigate_config
|
|
|
|
|
for camera_id in allowed_cameras:
|
|
|
|
|
if camera_id not in config.cameras:
|
|
|
|
|
continue
|
|
|
|
|
camera_config = config.cameras[camera_id]
|
|
|
|
|
friendly_name = (
|
|
|
|
|
camera_config.friendly_name
|
|
|
|
|
if camera_config.friendly_name
|
|
|
|
|
else camera_id.replace("_", " ").title()
|
|
|
|
|
)
|
|
|
|
|
cameras_info.append(f" - {friendly_name} (ID: {camera_id})")
|
|
|
|
|
|
|
|
|
|
cameras_section = ""
|
|
|
|
|
if cameras_info:
|
|
|
|
|
cameras_section = (
|
|
|
|
|
"\n\nAvailable cameras:\n"
|
|
|
|
|
+ "\n".join(cameras_info)
|
|
|
|
|
+ "\n\nWhen users refer to cameras by their friendly name (e.g., 'Back Deck Camera'), use the corresponding camera ID (e.g., 'back_deck_cam') in tool calls."
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
live_image_note = ""
|
|
|
|
|
if body.include_live_image:
|
|
|
|
|
live_image_note = (
|
|
|
|
|
f"\n\nThe first user message includes a live image from camera "
|
|
|
|
|
f"'{body.include_live_image}'. Use get_live_context for that camera to get "
|
|
|
|
|
"current detection details (objects, zones) to aid in understanding the image."
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
system_prompt = f"""You are a helpful assistant for Frigate, a security camera NVR system. You help users answer questions about their cameras, detected objects, and events.
|
|
|
|
|
|
|
|
|
|
Current date and time: {current_date_str} at {current_time_str} (UTC)
|
|
|
|
|
|
|
|
|
|
When users ask questions about "today", "yesterday", "this week", etc., use the current date above as reference.
|
|
|
|
|
When searching for objects or events, use ISO 8601 format for dates (e.g., {current_date_str}T00:00:00Z for the start of today).
|
|
|
|
|
Always be accurate with time calculations based on the current date provided."""
|
|
|
|
|
Always be accurate with time calculations based on the current date provided.{cameras_section}{live_image_note}"""
|
|
|
|
|
|
|
|
|
|
conversation.append(
|
|
|
|
|
{
|
|
|
|
@@ -292,6 +443,7 @@ Always be accurate with time calculations based on the current date provided."""
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
first_user_message_seen = False
|
|
|
|
|
for msg in body.messages:
|
|
|
|
|
msg_dict = {
|
|
|
|
|
"role": msg.role,
|
|
|
|
@@ -301,6 +453,22 @@ Always be accurate with time calculations based on the current date provided."""
|
|
|
|
|
msg_dict["tool_call_id"] = msg.tool_call_id
|
|
|
|
|
if msg.name:
|
|
|
|
|
msg_dict["name"] = msg.name
|
|
|
|
|
|
|
|
|
|
if (
|
|
|
|
|
msg.role == "user"
|
|
|
|
|
and not first_user_message_seen
|
|
|
|
|
and body.include_live_image
|
|
|
|
|
):
|
|
|
|
|
first_user_message_seen = True
|
|
|
|
|
image_url = await _get_live_frame_image_url(
|
|
|
|
|
request, body.include_live_image, allowed_cameras
|
|
|
|
|
)
|
|
|
|
|
if image_url:
|
|
|
|
|
msg_dict["content"] = [
|
|
|
|
|
{"type": "text", "text": msg.content},
|
|
|
|
|
{"type": "image_url", "image_url": {"url": image_url}},
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
conversation.append(msg_dict)
|
|
|
|
|
|
|
|
|
|
tool_iterations = 0
|
|
|
|
|