LocalAI/swagger/swagger.yaml

basePath: /
definitions:
  config.Gallery:
    properties:
      name:
        type: string
      url:
        type: string
    type: object
  functions.Function:
    properties:
      description:
        type: string
      name:
        type: string
      parameters:
        additionalProperties: {}
        type: object
      strict:
        type: boolean
    type: object
  functions.Item:
    properties:
      properties:
        additionalProperties: {}
        type: object
      type:
        type: string
    type: object
  functions.JSONFunctionStructure:
    properties:
      $defs:
        additionalProperties: {}
        type: object
      anyOf:
        items:
          $ref: '#/definitions/functions.Item'
        type: array
      oneOf:
        items:
          $ref: '#/definitions/functions.Item'
        type: array
    type: object
  functions.Tool:
    properties:
      function:
        $ref: '#/definitions/functions.Function'
      type:
        type: string
    type: object
  gallery.File:
    properties:
      filename:
        type: string
      sha256:
        type: string
      uri:
        type: string
    type: object
  gallery.GalleryBackend:
    properties:
      alias:
        type: string
      backend:
        description: |-
          Backend is the resolved backend engine for this model (e.g. "llama-cpp").
          Populated at load time from overrides, inline config, or the URL-referenced config file.
        type: string
      capabilities:
        additionalProperties:
          type: string
        type: object
      description:
        type: string
      files:
        description: AdditionalFiles are used to add additional files to the model
        items:
          $ref: '#/definitions/gallery.File'
        type: array
      gallery:
        allOf:
        - $ref: '#/definitions/config.Gallery'
        description: Gallery is a reference to the gallery which contains the model
      icon:
        type: string
      installed:
        description: Installed is used to indicate if the model is installed or not
        type: boolean
      license:
        type: string
      mirrors:
        items:
          type: string
        type: array
      name:
        type: string
      size:
        description: |-
          Size is an optional hardcoded model size string (e.g. "500MB", "14.5GB").
          Used when the size cannot be estimated automatically.
        type: string
      tags:
        items:
          type: string
        type: array
      uri:
        type: string
      url:
        type: string
      urls:
        items:
          type: string
        type: array
    type: object
  gallery.Metadata:
    properties:
      backend:
        description: |-
          Backend is the resolved backend engine for this model (e.g. "llama-cpp").
          Populated at load time from overrides, inline config, or the URL-referenced config file.
        type: string
      description:
        type: string
      files:
        description: AdditionalFiles are used to add additional files to the model
        items:
          $ref: '#/definitions/gallery.File'
        type: array
      gallery:
        allOf:
        - $ref: '#/definitions/config.Gallery'
        description: Gallery is a reference to the gallery which contains the model
      icon:
        type: string
      installed:
        description: Installed is used to indicate if the model is installed or not
        type: boolean
      license:
        type: string
      name:
        type: string
      size:
        description: |-
          Size is an optional hardcoded model size string (e.g. "500MB", "14.5GB").
          Used when the size cannot be estimated automatically.
        type: string
      tags:
        items:
          type: string
        type: array
      url:
        type: string
      urls:
        items:
          type: string
        type: array
    type: object
  galleryop.OpStatus:
    properties:
      cancellable:
        description: Cancellable is true if the operation can be cancelled
        type: boolean
      cancelled:
        description: Cancelled is true if the operation was cancelled
        type: boolean
      deletion:
        description: Deletion is true if the operation is a deletion
        type: boolean
      downloaded_size:
        type: string
      error: {}
      file_name:
        type: string
      file_size:
        type: string
      gallery_element_name:
        type: string
      message:
        type: string
      processed:
        type: boolean
      progress:
        type: number
    type: object
  localai.APIInstructionResponse:
    properties:
      description:
        type: string
      name:
        type: string
      swagger_fragment:
        additionalProperties: {}
        type: object
      tags:
        items:
          type: string
        type: array
    type: object
  localai.GalleryBackend:
    properties:
      id:
        type: string
    type: object
  localai.GalleryModel:
    properties:
      backend:
        description: |-
          Backend is the resolved backend engine for this model (e.g. "llama-cpp").
          Populated at load time from overrides, inline config, or the URL-referenced config file.
        type: string
      config_file:
        additionalProperties: {}
        description: config_file is read in the situation where URL is blank - and
          therefore this is a base config.
        type: object
      description:
        type: string
      files:
        description: AdditionalFiles are used to add additional files to the model
        items:
          $ref: '#/definitions/gallery.File'
        type: array
      gallery:
        allOf:
        - $ref: '#/definitions/config.Gallery'
        description: Gallery is a reference to the gallery which contains the model
      icon:
        type: string
      id:
        type: string
      installed:
        description: Installed is used to indicate if the model is installed or not
        type: boolean
      license:
        type: string
      name:
        type: string
      overrides:
        additionalProperties: {}
        description: Overrides are used to override the configuration of the model
          located at URL
        type: object
      size:
        description: |-
          Size is an optional hardcoded model size string (e.g. "500MB", "14.5GB").
          Used when the size cannot be estimated automatically.
        type: string
      tags:
        items:
          type: string
        type: array
      url:
        type: string
      urls:
        items:
          type: string
        type: array
    type: object
  localai.ModelResponse:
    properties:
      config: {}
      details:
        items:
          type: string
        type: array
      error:
        type: string
      filename:
        type: string
      message:
        type: string
      success:
        type: boolean
    type: object
  localai.vramEstimateRequest:
    properties:
      context_size:
        description: context length to estimate for (default 8192)
        type: integer
      gpu_layers:
        description: number of layers to offload to GPU (0 = all)
        type: integer
      kv_quant_bits:
        description: KV cache quantization bits (0 = fp16)
        type: integer
      model:
        description: model name (must be installed)
        type: string
    type: object
  localai.vramEstimateResponse:
    properties:
      context_note:
        description: note when context_size was defaulted
        type: string
      model_max_context:
        description: model's trained maximum context length
        type: integer
      sizeBytes:
        description: total model weight size in bytes
        type: integer
      sizeDisplay:
        description: human-readable size (e.g. "4.2 GB")
        type: string
      vramBytes:
        description: estimated VRAM usage in bytes
        type: integer
      vramDisplay:
        description: human-readable VRAM (e.g. "6.1 GB")
        type: string
    type: object
  model.BackendLogLine:
    properties:
      stream:
        description: '"stdout" or "stderr"'
        type: string
      text:
        type: string
      timestamp:
        type: string
    type: object
  proto.MemoryUsageData:
    properties:
      breakdown:
        additionalProperties:
          format: int64
          type: integer
        type: object
      total:
        type: integer
    type: object
  proto.StatusResponse:
    properties:
      memory:
        $ref: '#/definitions/proto.MemoryUsageData'
      state:
        $ref: '#/definitions/proto.StatusResponse_State'
    type: object
  proto.StatusResponse_State:
    enum:
    - 0
    - 1
    - 2
    - -1
    format: int32
    type: integer
    x-enum-varnames:
    - StatusResponse_UNINITIALIZED
    - StatusResponse_BUSY
    - StatusResponse_READY
    - StatusResponse_ERROR
  proto.VADResponse:
    properties:
      segments:
        items:
          $ref: '#/definitions/proto.VADSegment'
        type: array
    type: object
  proto.VADSegment:
    properties:
      end:
        type: number
      start:
        type: number
    type: object
  schema.AnthropicContentBlock:
    properties:
      content: {}
      id:
        type: string
      input:
        additionalProperties: {}
        type: object
      is_error:
        type: boolean
      name:
        type: string
      source:
        $ref: '#/definitions/schema.AnthropicImageSource'
      text:
        type: string
      tool_use_id:
        type: string
      type:
        type: string
    type: object
  schema.AnthropicImageSource:
    properties:
      data:
        type: string
      media_type:
        type: string
      type:
        type: string
    type: object
  schema.AnthropicMessage:
    properties:
      content: {}
      role:
        type: string
    type: object
  schema.AnthropicRequest:
    properties:
      max_tokens:
        type: integer
      messages:
        items:
          $ref: '#/definitions/schema.AnthropicMessage'
        type: array
      metadata:
        additionalProperties:
          type: string
        type: object
      model:
        type: string
      stop_sequences:
        items:
          type: string
        type: array
      stream:
        type: boolean
      system:
        type: string
      temperature:
        type: number
      tool_choice: {}
      tools:
        items:
          $ref: '#/definitions/schema.AnthropicTool'
        type: array
      top_k:
        type: integer
      top_p:
        type: number
    type: object
  schema.AnthropicResponse:
    properties:
      content:
        items:
          $ref: '#/definitions/schema.AnthropicContentBlock'
        type: array
      id:
        type: string
      model:
        type: string
      role:
        type: string
      stop_reason:
        type: string
      stop_sequence:
        type: string
      type:
        type: string
      usage:
        $ref: '#/definitions/schema.AnthropicUsage'
    type: object
  schema.AnthropicTool:
    properties:
      description:
        type: string
      input_schema:
        additionalProperties: {}
        type: object
      name:
        type: string
    type: object
  schema.AnthropicUsage:
    properties:
      input_tokens:
        type: integer
      output_tokens:
        type: integer
    type: object
  schema.BackendMonitorRequest:
    properties:
      model:
        type: string
    type: object
  schema.BackendResponse:
    properties:
      id:
        type: string
      status_url:
        type: string
    type: object
  schema.Choice:
    properties:
      delta:
        $ref: '#/definitions/schema.Message'
      finish_reason:
        type: string
      index:
        type: integer
      logprobs:
        $ref: '#/definitions/schema.Logprobs'
      message:
        $ref: '#/definitions/schema.Message'
      text:
        type: string
    type: object
  schema.Detection:
    properties:
      class_name:
        type: string
      confidence:
        type: number
      height:
        type: number
      mask:
        description: base64-encoded PNG segmentation mask
        type: string
      width:
        type: number
      x:
        type: number
      "y":
        type: number
    type: object
  schema.DetectionRequest:
    properties:
      boxes:
        description: Box coordinates as [x1,y1,x2,y2,...] quads
        items:
          type: number
        type: array
      image:
        description: URL or base64-encoded image to analyze
        type: string
      model:
        type: string
      points:
        description: 'Point coordinates as [x,y,label,...] triples (label: 1=pos,
          0=neg)'
        items:
          type: number
        type: array
      prompt:
        description: Text prompt (for SAM 3 PCS mode)
        type: string
      threshold:
        description: Detection confidence threshold
        type: number
    type: object
  schema.DetectionResponse:
    properties:
      detections:
        items:
          $ref: '#/definitions/schema.Detection'
        type: array
    type: object
  schema.ElevenLabsSoundGenerationRequest:
    properties:
      bpm:
        type: integer
      caption:
        type: string
      do_sample:
        type: boolean
      duration_seconds:
        type: number
      instrumental:
        description: 'Simple mode: use text as description; optional instrumental
          / vocal_language'
        type: boolean
      keyscale:
        type: string
      language:
        type: string
      lyrics:
        type: string
      model_id:
        type: string
      prompt_influence:
        type: number
      text:
        type: string
      think:
        description: Advanced mode
        type: boolean
      timesignature:
        type: string
      vocal_language:
        type: string
    type: object
  schema.FunctionCall:
    properties:
      arguments:
        type: string
      name:
        type: string
    type: object
  schema.GalleryResponse:
    properties:
      estimated_size_bytes:
        type: integer
      estimated_size_display:
        type: string
      estimated_vram_bytes:
        type: integer
      estimated_vram_display:
        type: string
      status:
        type: string
      uuid:
        type: string
    type: object
  schema.InputTokensDetails:
    properties:
      image_tokens:
        type: integer
      text_tokens:
        type: integer
    type: object
  schema.Item:
    properties:
      b64_json:
        type: string
      index:
        type: integer
      object:
        type: string
      url:
        description: Images
        type: string
    type: object
  schema.JINADocumentResult:
    properties:
      document:
        $ref: '#/definitions/schema.JINAText'
      index:
        type: integer
      relevance_score:
        type: number
    type: object
  schema.JINARerankRequest:
    properties:
      backend:
        type: string
      documents:
        items:
          type: string
        type: array
      model:
        type: string
      query:
        type: string
      top_n:
        type: integer
    type: object
  schema.JINARerankResponse:
    properties:
      model:
        type: string
      results:
        items:
          $ref: '#/definitions/schema.JINADocumentResult'
        type: array
      usage:
        $ref: '#/definitions/schema.JINAUsageInfo'
    type: object
  schema.JINAText:
    properties:
      text:
        type: string
    type: object
  schema.JINAUsageInfo:
    properties:
      prompt_tokens:
        type: integer
      total_tokens:
        type: integer
    type: object
  schema.Job:
    properties:
      audios:
        description: List of audio URLs or base64 strings
        items:
          type: string
        type: array
      completed_at:
        type: string
      created_at:
        type: string
      error:
        description: Error message if failed
        type: string
      files:
        description: List of file URLs or base64 strings
        items:
          type: string
        type: array
      id:
        description: UUID
        type: string
      images:
        description: |-
          Multimedia content (for manual execution)
          Can contain URLs or base64-encoded data URIs
        items:
          type: string
        type: array
      parameters:
        additionalProperties:
          type: string
        description: Template parameters
        type: object
      result:
        description: Agent response
        type: string
      started_at:
        type: string
      status:
        allOf:
        - $ref: '#/definitions/schema.JobStatus'
        description: pending, running, completed, failed, cancelled
      task_id:
        description: Reference to Task
        type: string
      traces:
        description: Execution traces (reasoning, tool calls, tool results)
        items:
          $ref: '#/definitions/schema.JobTrace'
        type: array
      triggered_by:
        description: '"manual", "cron", "api"'
        type: string
      videos:
        description: List of video URLs or base64 strings
        items:
          type: string
        type: array
      webhook_error:
        description: Error if webhook failed
        type: string
      webhook_sent:
        description: Webhook delivery tracking
        type: boolean
      webhook_sent_at:
        type: string
    type: object
  schema.JobExecutionRequest:
    properties:
      audios:
        description: List of audio URLs or base64 strings
        items:
          type: string
        type: array
      files:
        description: List of file URLs or base64 strings
        items:
          type: string
        type: array
      images:
        description: |-
          Multimedia content (optional, for manual execution)
          Can contain URLs or base64-encoded data URIs
        items:
          type: string
        type: array
      parameters:
        additionalProperties:
          type: string
        description: Optional, for templating
        type: object
      task_id:
        description: Required
        type: string
      videos:
        description: List of video URLs or base64 strings
        items:
          type: string
        type: array
    type: object
  schema.JobExecutionResponse:
    properties:
      job_id:
        description: unique job identifier
        type: string
      status:
        description: initial status (pending)
        type: string
      url:
        description: URL to poll for job status
        type: string
    type: object
  schema.JobStatus:
    enum:
    - pending
    - running
    - completed
    - failed
    - cancelled
    type: string
    x-enum-varnames:
    - JobStatusPending
    - JobStatusRunning
    - JobStatusCompleted
    - JobStatusFailed
    - JobStatusCancelled
  schema.JobTrace:
    properties:
      arguments:
        additionalProperties: {}
        description: Tool arguments or result data
        type: object
      content:
        description: The actual trace content
        type: string
      timestamp:
        description: When this trace occurred
        type: string
      tool_name:
        description: Tool name (for tool_call/tool_result)
        type: string
      type:
        description: '"reasoning", "tool_call", "tool_result", "status"'
        type: string
    type: object
  schema.LogprobContent:
    properties:
      bytes:
        items:
          type: integer
        type: array
      id:
        type: integer
      logprob:
        type: number
      token:
        type: string
      top_logprobs:
        items:
          $ref: '#/definitions/schema.LogprobContent'
        type: array
    type: object
  schema.Logprobs:
    properties:
      content:
        items:
          $ref: '#/definitions/schema.LogprobContent'
        type: array
    type: object
  schema.LogprobsValue:
    properties:
      enabled:
        description: true if logprobs should be returned
        type: boolean
    type: object
  schema.Message:
    properties:
      content:
        description: The message content
      function_call:
        description: A result of a function call
      name:
        description: The message name (used for tools calls)
        type: string
      reasoning:
        description: Reasoning content extracted from <thinking>...</thinking> tags
        type: string
      role:
        description: The message role
        type: string
      string_audios:
        items:
          type: string
        type: array
      string_content:
        type: string
      string_images:
        items:
          type: string
        type: array
      string_videos:
        items:
          type: string
        type: array
      tool_call_id:
        type: string
      tool_calls:
        items:
          $ref: '#/definitions/schema.ToolCall'
        type: array
    type: object
  schema.ModelsDataResponse:
    properties:
      data:
        items:
          $ref: '#/definitions/schema.OpenAIModel'
        type: array
      object:
        type: string
    type: object
  schema.MultimediaSourceConfig:
    properties:
      headers:
        additionalProperties:
          type: string
        description: Custom headers for HTTP request (e.g., Authorization)
        type: object
      type:
        description: '"image", "video", "audio", "file"'
        type: string
      url:
        description: URL to fetch from
        type: string
    type: object
  schema.NodeData:
    properties:
      id:
        type: string
      lastSeen:
        type: string
      name:
        type: string
      serviceID:
        type: string
      tunnelAddress:
        type: string
    type: object
  schema.ORAnnotation:
    properties:
      end_index:
        type: integer
      start_index:
        type: integer
      title:
        type: string
      type:
        description: url_citation
        type: string
      url:
        type: string
    type: object
  schema.ORContentPart:
    properties:
      annotations:
        description: REQUIRED for output_text - must always be present (use [])
        items:
          $ref: '#/definitions/schema.ORAnnotation'
        type: array
      detail:
        description: low|high|auto for images
        type: string
      file_data:
        type: string
      file_url:
        type: string
      filename:
        type: string
      image_url:
        type: string
      logprobs:
        description: REQUIRED for output_text - must always be present (use [])
        items:
          $ref: '#/definitions/schema.ORLogProb'
        type: array
      refusal:
        type: string
      text:
        description: REQUIRED for output_text - must always be present (even if empty)
        type: string
      type:
        description: input_text|input_image|input_file|output_text|refusal
        type: string
    type: object
  schema.ORError:
    properties:
      code:
        type: string
      message:
        type: string
      param:
        type: string
      type:
        description: invalid_request|not_found|server_error|model_error|too_many_requests
        type: string
    type: object
  schema.ORFunctionTool:
    properties:
      description:
        type: string
      name:
        type: string
      parameters:
        additionalProperties: {}
        type: object
      strict:
        description: Always include in response
        type: boolean
      type:
        description: always "function"
        type: string
    type: object
  schema.ORIncompleteDetails:
    properties:
      reason:
        type: string
    type: object
  schema.ORInputTokensDetails:
    properties:
      cached_tokens:
        description: Always include, even if 0
        type: integer
    type: object
  schema.ORItemField:
    properties:
      arguments:
        type: string
      call_id:
        description: Function call fields
        type: string
      content:
        description: string or []ORContentPart for messages
      encrypted_content:
        description: Provider-specific encrypted content
        type: string
      id:
        description: Present for all output items
        type: string
      name:
        type: string
      output:
        description: Function call output fields
      role:
        description: Message fields
        type: string
      status:
        description: in_progress|completed|incomplete
        type: string
      summary:
        description: Reasoning fields (for type == "reasoning")
        items:
          $ref: '#/definitions/schema.ORContentPart'
        type: array
      type:
        description: message|function_call|function_call_output|reasoning|item_reference
        type: string
    type: object
  schema.ORLogProb:
    properties:
      bytes:
        items:
          type: integer
        type: array
      logprob:
        type: number
      token:
        type: string
      top_logprobs:
        items:
          $ref: '#/definitions/schema.ORTopLogProb'
        type: array
    type: object
  schema.OROutputTokensDetails:
    properties:
      reasoning_tokens:
        description: Always include, even if 0
        type: integer
    type: object
  schema.ORReasoning:
    properties:
      effort:
        type: string
      summary:
        type: string
    type: object
  schema.ORReasoningParam:
    properties:
      effort:
        description: '"none"|"low"|"medium"|"high"|"xhigh"'
        type: string
      summary:
        description: '"auto"|"concise"|"detailed"'
        type: string
    type: object
  schema.ORResponseResource:
    properties:
      background:
        type: boolean
      completed_at:
        description: 'Required: present as number or null'
        type: integer
      created_at:
        type: integer
      error:
        allOf:
        - $ref: '#/definitions/schema.ORError'
        description: Always present, null if no error
      frequency_penalty:
        type: number
      id:
        type: string
      incomplete_details:
        allOf:
        - $ref: '#/definitions/schema.ORIncompleteDetails'
        description: Always present, null if complete
      instructions:
        type: string
      max_output_tokens:
        type: integer
      max_tool_calls:
        description: nullable
        type: integer
      metadata:
        additionalProperties:
          type: string
        description: Metadata and operational flags
        type: object
      model:
        type: string
      object:
        description: always "response"
        type: string
      output:
        items:
          $ref: '#/definitions/schema.ORItemField'
        type: array
      parallel_tool_calls:
        type: boolean
      presence_penalty:
        type: number
      previous_response_id:
        type: string
      prompt_cache_key:
        description: nullable
        type: string
      reasoning:
        allOf:
        - $ref: '#/definitions/schema.ORReasoning'
        description: nullable
      safety_identifier:
        description: Safety and caching
        type: string
      service_tier:
        type: string
      status:
        description: in_progress|completed|failed|incomplete
        type: string
      store:
        type: boolean
      temperature:
        description: Sampling parameters (always required)
        type: number
      text:
        allOf:
        - $ref: '#/definitions/schema.ORTextConfig'
        description: Text format configuration
      tool_choice: {}
      tools:
        description: Tool-related fields
        items:
          $ref: '#/definitions/schema.ORFunctionTool'
        type: array
      top_logprobs:
        description: Default to 0
        type: integer
      top_p:
        type: number
      truncation:
        description: Truncation and reasoning
        type: string
      usage:
        allOf:
        - $ref: '#/definitions/schema.ORUsage'
        description: Usage statistics
    type: object
  schema.ORTextConfig:
    properties:
      format:
        $ref: '#/definitions/schema.ORTextFormat'
    type: object
  schema.ORTextFormat:
    properties:
      type:
        description: '"text" or "json_schema"'
        type: string
    type: object
  schema.ORTopLogProb:
    properties:
      bytes:
        items:
          type: integer
        type: array
      logprob:
        type: number
      token:
        type: string
    type: object
  schema.ORUsage:
    properties:
      input_tokens:
        type: integer
      input_tokens_details:
        allOf:
        - $ref: '#/definitions/schema.ORInputTokensDetails'
        description: Always present
      output_tokens:
        type: integer
      output_tokens_details:
        allOf:
        - $ref: '#/definitions/schema.OROutputTokensDetails'
        description: Always present
      total_tokens:
        type: integer
    type: object
  schema.OpenAIModel:
    properties:
      id:
        type: string
      object:
        type: string
    type: object
  schema.OpenAIRequest:
    properties:
      backend:
        type: string
      batch:
        description: Custom parameters - not present in the OpenAI API
        type: integer
      clip_skip:
        description: Diffusers
        type: integer
      echo:
        type: boolean
      encoding_format:
        description: 'Embedding encoding format: "float" (default) or "base64" (OpenAI
          Node.js SDK default)'
        type: string
      file:
        description: whisper
        type: string
      files:
        description: Multiple input images for img2img or inpainting
        items:
          type: string
        type: array
      frequency_penalty:
        type: number
      function_call:
        description: might be a string or an object
      functions:
        description: A list of available functions to call
        items:
          $ref: '#/definitions/functions.Function'
        type: array
      grammar:
        description: A grammar to constrain the LLM output
        type: string
      grammar_json_functions:
        $ref: '#/definitions/functions.JSONFunctionStructure'
      ignore_eos:
        type: boolean
      input: {}
      instruction:
        description: Edit endpoint
        type: string
      language:
        description: Also part of the OpenAI official spec
        type: string
      logit_bias:
        additionalProperties:
          format: float64
          type: number
        description: Map of token IDs to bias values (-100 to 100)
        type: object
      logprobs:
        allOf:
        - $ref: '#/definitions/schema.LogprobsValue'
        description: |-
          OpenAI API logprobs parameters
          logprobs: boolean - if true, returns log probabilities of each output token
          top_logprobs: integer 0-20 - number of most likely tokens to return at each token position
      max_tokens:
        type: integer
      messages:
        description: Messages is read only by chat/completion API calls
        items:
          $ref: '#/definitions/schema.Message'
        type: array
      metadata:
        additionalProperties:
          type: string
        type: object
      min_p:
        type: number
      model:
        type: string
      model_base_name:
        type: string
      "n":
        description: Also part of the OpenAI official spec. use it for returning multiple
          results
        type: integer
      n_keep:
        type: integer
      negative_prompt:
        type: string
      negative_prompt_scale:
        type: number
      presence_penalty:
        type: number
      prompt:
        description: Prompt is read only by completion/image API calls
      quality:
        description: Image (not supported by OpenAI)
        type: string
      reasoning_effort:
        type: string
      ref_images:
        description: Reference images for models that support them (e.g., Flux Kontext)
        items:
          type: string
        type: array
      repeat_last_n:
        type: integer
      repeat_penalty:
        type: number
      response_format:
        description: whisper/image
      rope_freq_base:
        type: number
      rope_freq_scale:
        type: number
      seed:
        type: integer
      size:
        description: image
        type: string
      step:
        type: integer
      stop: {}
      stream:
        type: boolean
      temperature:
        type: number
      tfz:
        type: number
      tokenizer:
        description: RWKV (?)
        type: string
      tool_choice: {}
      tools:
        items:
          $ref: '#/definitions/functions.Tool'
        type: array
      top_k:
        type: integer
      top_logprobs:
        description: Number of top logprobs per token (0-20)
        type: integer
      top_p:
        description: Common options between all the API calls, part of the OpenAI
          spec
        type: number
      translate:
        description: Only for audio transcription
        type: boolean
      typical_p:
        type: number
    required:
    - file
    type: object
  schema.OpenAIResponse:
    properties:
      choices:
        items:
          $ref: '#/definitions/schema.Choice'
        type: array
      created:
        type: integer
      data:
        items:
          $ref: '#/definitions/schema.Item'
        type: array
      id:
        type: string
      model:
        type: string
      object:
        type: string
      usage:
        $ref: '#/definitions/schema.OpenAIUsage'
    type: object
  schema.OpenAIUsage:
    properties:
      completion_tokens:
        type: integer
      input_tokens:
        description: Fields for image generation API compatibility
        type: integer
      input_tokens_details:
        $ref: '#/definitions/schema.InputTokensDetails'
      output_tokens:
        type: integer
      prompt_tokens:
        type: integer
      timing_prompt_processing:
        description: Extra timing data, disabled by default as is't not a part of
          OpenAI specification
        type: number
      timing_token_generation:
        type: number
      total_tokens:
        type: integer
    type: object
  schema.OpenResponsesRequest:
    properties:
      allowed_tools:
        description: Restrict which tools can be invoked
        items:
          type: string
        type: array
      background:
        description: Run request in background
        type: boolean
      frequency_penalty:
        description: Frequency penalty (-2.0 to 2.0)
        type: number
      include:
        description: What to include in response
        items:
          type: string
        type: array
      input:
        description: string or []ORItemParam
      instructions:
        type: string
      logit_bias:
        additionalProperties:
          format: float64
          type: number
        description: OpenAI-compatible extensions (not in Open Responses spec)
        type: object
      max_output_tokens:
        type: integer
      max_tool_calls:
        description: Maximum number of tool calls
        type: integer
      metadata:
        additionalProperties:
          type: string
        type: object
      model:
        type: string
      parallel_tool_calls:
        description: Allow parallel tool calls
        type: boolean
      presence_penalty:
        description: Presence penalty (-2.0 to 2.0)
        type: number
      previous_response_id:
        type: string
      reasoning:
        $ref: '#/definitions/schema.ORReasoningParam'
      service_tier:
        description: '"auto"|"default"|priority hint'
        type: string
      store:
        description: Whether to store the response
        type: boolean
      stream:
        type: boolean
      temperature:
        type: number
      text_format:
        description: Additional parameters from spec
      tool_choice:
        description: '"auto"|"required"|"none"|{type:"function",name:"..."}'
      tools:
        items:
          $ref: '#/definitions/schema.ORFunctionTool'
        type: array
      top_logprobs:
        description: Number of top logprobs to return
        type: integer
      top_p:
        type: number
      truncation:
        description: '"auto"|"disabled"'
        type: string
    type: object
  schema.P2PNodesResponse:
    properties:
      federated_nodes:
        items:
          $ref: '#/definitions/schema.NodeData'
        type: array
      llama_cpp_nodes:
        items:
          $ref: '#/definitions/schema.NodeData'
        type: array
      mlx_nodes:
        items:
          $ref: '#/definitions/schema.NodeData'
        type: array
    type: object
  schema.SysInfoModel:
    properties:
      id:
        type: string
    type: object
  schema.SystemInformationResponse:
    properties:
      backends:
        description: available backend engines
        items:
          type: string
        type: array
      loaded_models:
        description: currently loaded models
        items:
          $ref: '#/definitions/schema.SysInfoModel'
        type: array
    type: object
  schema.TTSRequest:
    description: TTS request body
    properties:
      backend:
        description: backend engine override
        type: string
      input:
        description: text input
        type: string
      language:
        description: (optional) language to use with TTS model
        type: string
      model:
        type: string
      response_format:
        description: (optional) output format
        type: string
      sample_rate:
        description: (optional) desired output sample rate
        type: integer
      stream:
        description: (optional) enable streaming TTS
        type: boolean
      voice:
        description: voice audio file or speaker id
        type: string
    type: object
  schema.Task:
    properties:
      created_at:
        type: string
      cron:
        description: Optional cron expression
        type: string
      cron_parameters:
        additionalProperties:
          type: string
        description: Parameters to use when executing cron jobs
        type: object
      description:
        description: Optional description
        type: string
      enabled:
        description: Can be disabled without deletion
        type: boolean
      id:
        description: UUID
        type: string
      model:
        description: Model name (must have MCP config)
        type: string
      multimedia_sources:
        description: |-
          Multimedia sources (for cron jobs)
          URLs to fetch multimedia content from when cron job executes
          Each source can have custom headers for authentication/authorization
        items:
          $ref: '#/definitions/schema.MultimediaSourceConfig'
        type: array
      name:
        description: User-friendly name
        type: string
      prompt:
        description: Template prompt (supports Go template .param syntax)
        type: string
      updated_at:
        type: string
      webhooks:
        description: |-
          Webhook configuration (for notifications).
          Supports multiple webhook endpoints.
          Webhooks can handle both success and failure cases using template variables:
          .Job (Job object), .Task (Task object), .Result (if successful),
          .Error (if failed), .Status (job status string).
        items:
          $ref: '#/definitions/schema.WebhookConfig'
        type: array
    type: object
  schema.TokenizeRequest:
    properties:
      content:
        description: text to tokenize
        type: string
      model:
        type: string
    type: object
  schema.TokenizeResponse:
    properties:
      tokens:
        description: token IDs
        items:
          type: integer
        type: array
    type: object
  schema.ToolCall:
    properties:
      function:
        $ref: '#/definitions/schema.FunctionCall'
      id:
        type: string
      index:
        type: integer
      type:
        type: string
    type: object
  schema.VADRequest:
    description: VAD request body
    properties:
      audio:
        description: raw audio samples as float32 PCM
        items:
          type: number
        type: array
      model:
        type: string
    type: object
  schema.VideoRequest:
    properties:
      cfg_scale:
        description: classifier-free guidance scale
        type: number
      end_image:
        description: URL or base64 of the last frame
        type: string
      fps:
        description: frames per second
        type: integer
      height:
        description: output height in pixels
        type: integer
      input_reference:
        description: reference image or video URL
        type: string
      model:
        type: string
      negative_prompt:
        description: things to avoid in the output
        type: string
      num_frames:
        description: total number of frames to generate
        type: integer
      prompt:
        description: text description of the video to generate
        type: string
      response_format:
        description: output format (url or b64_json)
        type: string
      seconds:
        description: duration in seconds (alternative to num_frames)
        type: string
      seed:
        description: random seed for reproducibility
        type: integer
      size:
        description: WxH shorthand (e.g. "512x512")
        type: string
      start_image:
        description: URL or base64 of the first frame
        type: string
      step:
        description: number of diffusion steps
        type: integer
      width:
        description: output width in pixels
        type: integer
    type: object
  schema.WebhookConfig:
    properties:
      headers:
        additionalProperties:
          type: string
        description: Custom headers (e.g., Authorization)
        type: object
      method:
        description: 'HTTP method (POST, PUT, PATCH) - default: POST'
        type: string
      payload_template:
        description: Optional template for payload
        type: string
      url:
        description: Webhook endpoint URL
        type: string
    type: object
info:
  contact:
    name: LocalAI
    url: https://localai.io
  description: The LocalAI Rest API.
  license:
    name: MIT
    url: https://raw.githubusercontent.com/mudler/LocalAI/master/LICENSE
  title: LocalAI API
  version: 2.0.0
paths:
  /api/agent/jobs:
    get:
      parameters:
      - description: Filter by task ID
        in: query
        name: task_id
        type: string
      - description: Filter by status (pending, running, completed, failed, cancelled)
        in: query
        name: status
        type: string
      - description: Max number of jobs to return
        in: query
        name: limit
        type: integer
      - description: Set to 'true' for admin cross-user listing
        in: query
        name: all_users
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: jobs
          schema:
            items:
              $ref: '#/definitions/schema.Job'
            type: array
      summary: List agent jobs
      tags:
      - agent-jobs
  /api/agent/jobs/{id}:
    delete:
      parameters:
      - description: Job ID
        in: path
        name: id
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: message
          schema:
            additionalProperties:
              type: string
            type: object
        "404":
          description: error
          schema:
            additionalProperties:
              type: string
            type: object
      summary: Delete an agent job
      tags:
      - agent-jobs
    get:
      parameters:
      - description: Job ID
        in: path
        name: id
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: job
          schema:
            $ref: '#/definitions/schema.Job'
        "404":
          description: error
          schema:
            additionalProperties:
              type: string
            type: object
      summary: Get an agent job
      tags:
      - agent-jobs
  /api/agent/jobs/{id}/cancel:
    post:
      parameters:
      - description: Job ID
        in: path
        name: id
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: message
          schema:
            additionalProperties:
              type: string
            type: object
        "400":
          description: error
          schema:
            additionalProperties:
              type: string
            type: object
        "404":
          description: error
          schema:
            additionalProperties:
              type: string
            type: object
      summary: Cancel an agent job
      tags:
      - agent-jobs
  /api/agent/jobs/execute:
    post:
      consumes:
      - application/json
      parameters:
      - description: Job execution request
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/schema.JobExecutionRequest'
      produces:
      - application/json
      responses:
        "201":
          description: job created
          schema:
            $ref: '#/definitions/schema.JobExecutionResponse'
        "400":
          description: error
          schema:
            additionalProperties:
              type: string
            type: object
      summary: Execute an agent job
      tags:
      - agent-jobs
  /api/agent/tasks:
    get:
      parameters:
      - description: Set to 'true' for admin cross-user listing
        in: query
        name: all_users
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: tasks
          schema:
            items:
              $ref: '#/definitions/schema.Task'
            type: array
      summary: List agent tasks
      tags:
      - agent-jobs
    post:
      consumes:
      - application/json
      parameters:
      - description: Task definition
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/schema.Task'
      produces:
      - application/json
      responses:
        "201":
          description: id
          schema:
            additionalProperties:
              type: string
            type: object
        "400":
          description: error
          schema:
            additionalProperties:
              type: string
            type: object
      summary: Create a new agent task
      tags:
      - agent-jobs
  /api/agent/tasks/{id}:
    delete:
      parameters:
      - description: Task ID
        in: path
        name: id
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: message
          schema:
            additionalProperties:
              type: string
            type: object
        "404":
          description: error
          schema:
            additionalProperties:
              type: string
            type: object
      summary: Delete an agent task
      tags:
      - agent-jobs
    get:
      parameters:
      - description: Task ID
        in: path
        name: id
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: task
          schema:
            $ref: '#/definitions/schema.Task'
        "404":
          description: error
          schema:
            additionalProperties:
              type: string
            type: object
      summary: Get an agent task
      tags:
      - agent-jobs
    put:
      consumes:
      - application/json
      parameters:
      - description: Task ID
        in: path
        name: id
        required: true
        type: string
      - description: Updated task definition
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/schema.Task'
      produces:
      - application/json
      responses:
        "200":
          description: message
          schema:
            additionalProperties:
              type: string
            type: object
        "400":
          description: error
          schema:
            additionalProperties:
              type: string
            type: object
        "404":
          description: error
          schema:
            additionalProperties:
              type: string
            type: object
      summary: Update an agent task
      tags:
      - agent-jobs
  /api/agent/tasks/{name}/execute:
    post:
      consumes:
      - application/json
      parameters:
      - description: Task name
        in: path
        name: name
        required: true
        type: string
      - description: Optional template parameters
        in: body
        name: parameters
        schema:
          type: object
      produces:
      - application/json
      responses:
        "201":
          description: job created
          schema:
            $ref: '#/definitions/schema.JobExecutionResponse'
        "400":
          description: error
          schema:
            additionalProperties:
              type: string
            type: object
        "404":
          description: error
          schema:
            additionalProperties:
              type: string
            type: object
      summary: Execute an agent task by name
      tags:
      - agent-jobs
  /api/backend-logs:
    get:
      description: Returns a sorted list of model IDs that have captured backend process
        output
      produces:
      - application/json
      responses:
        "200":
          description: Model IDs with logs
          schema:
            items:
              type: string
            type: array
      summary: List models with backend logs
      tags:
      - monitoring
  /api/backend-logs/{modelId}:
    get:
      description: Returns all captured log lines (stdout/stderr) for the specified
        model's backend process
      parameters:
      - description: Model ID
        in: path
        name: modelId
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Log lines
          schema:
            items:
              $ref: '#/definitions/model.BackendLogLine'
            type: array
      summary: Get backend logs for a model
      tags:
      - monitoring
  /api/backend-logs/{modelId}/clear:
    post:
      description: Removes all captured log lines for the specified model's backend
        process
      parameters:
      - description: Model ID
        in: path
        name: modelId
        required: true
        type: string
      responses:
        "204":
          description: Logs cleared
      summary: Clear backend logs for a model
      tags:
      - monitoring
  /api/backend-traces:
    get:
      description: Returns captured backend traces (LLM calls, embeddings, TTS, etc.)
        in reverse chronological order
      produces:
      - application/json
      responses:
        "200":
          description: Backend operation traces
          schema:
            additionalProperties: true
            type: object
      summary: List backend operation traces
      tags:
      - monitoring
  /api/backend-traces/clear:
    post:
      description: Removes all captured backend operation traces from the buffer
      responses:
        "204":
          description: Traces cleared
      summary: Clear backend traces
      tags:
      - monitoring
  /api/instructions:
    get:
      description: Returns a compact list of instruction areas with descriptions and
        URLs for detailed guides
      produces:
      - application/json
      responses:
        "200":
          description: instructions list with hint
          schema:
            additionalProperties: true
            type: object
      summary: List available API instruction areas
      tags:
      - instructions
  /api/instructions/{name}:
    get:
      description: Returns a markdown guide (default) or filtered OpenAPI fragment
        (format=json) for a named instruction
      parameters:
      - description: Instruction name (e.g. chat-inference, config-management)
        in: path
        name: name
        required: true
        type: string
      - description: 'Response format: json for OpenAPI fragment, omit for markdown'
        in: query
        name: format
        type: string
      produces:
      - application/json
      - text/markdown
      responses:
        "200":
          description: instruction documentation
          schema:
            $ref: '#/definitions/localai.APIInstructionResponse'
        "404":
          description: instruction not found
          schema:
            additionalProperties:
              type: string
            type: object
      summary: Get an instruction's API guide or OpenAPI fragment
      tags:
      - instructions
  /api/models/{name}/{action}:
    put:
      description: Enable or disable a model from being loaded on demand. Disabled
        models remain installed but cannot be loaded.
      parameters:
      - description: Model name
        in: path
        name: name
        required: true
        type: string
      - description: 'Action: ''enable'' or ''disable'''
        in: path
        name: action
        required: true
        type: string
      responses:
        "200":
          description: OK
          schema:
            $ref: '#/definitions/localai.ModelResponse'
        "400":
          description: Bad Request
          schema:
            $ref: '#/definitions/localai.ModelResponse'
        "404":
          description: Not Found
          schema:
            $ref: '#/definitions/localai.ModelResponse'
        "500":
          description: Internal Server Error
          schema:
            $ref: '#/definitions/localai.ModelResponse'
      summary: Toggle model enabled/disabled status
      tags:
      - config
  /api/models/config-json/{name}:
    patch:
      consumes:
      - application/json
      description: Deep-merges the JSON patch body into the existing model config
      parameters:
      - description: Model name
        in: path
        name: name
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: success message
          schema:
            additionalProperties: true
            type: object
      summary: Partially update a model configuration
      tags:
      - config
  /api/models/config-metadata:
    get:
      description: Returns config field metadata. Use ?section=<id> to filter by section,
        or omit for a section index.
      parameters:
      - description: Section ID to filter (e.g. 'general', 'llm', 'parameters') or
          'all' for everything
        in: query
        name: section
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Section index or filtered field metadata
          schema:
            additionalProperties: true
            type: object
      summary: List model configuration field metadata
      tags:
      - config
  /api/models/config-metadata/autocomplete/{provider}:
    get:
      description: Returns runtime-resolved values for dynamic providers (backends,
        models)
      parameters:
      - description: Provider name (backends, models, models:chat, models:tts, models:transcript,
          models:vad)
        in: path
        name: provider
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: values array
          schema:
            additionalProperties: true
            type: object
      summary: Get dynamic autocomplete values for a config field
      tags:
      - config
  /api/models/vram-estimate:
    post:
      consumes:
      - application/json
      description: Estimates VRAM based on model weight files, context size, and GPU
        layers
      parameters:
      - description: VRAM estimation parameters
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/localai.vramEstimateRequest'
      produces:
      - application/json
      responses:
        "200":
          description: VRAM estimate
          schema:
            $ref: '#/definitions/localai.vramEstimateResponse'
      summary: Estimate VRAM usage for a model
      tags:
      - config
  /api/p2p:
    get:
      responses:
        "200":
          description: Response
          schema:
            items:
              $ref: '#/definitions/schema.P2PNodesResponse'
            type: array
      summary: Returns available P2P nodes
      tags:
      - p2p
  /api/p2p/token:
    get:
      responses:
        "200":
          description: Response
          schema:
            type: string
      summary: Show the P2P token
      tags:
      - p2p
  /api/traces:
    get:
      description: Returns captured API exchange traces (request/response pairs) in
        reverse chronological order
      produces:
      - application/json
      responses:
        "200":
          description: Traced API exchanges
          schema:
            additionalProperties: true
            type: object
      summary: List API request/response traces
      tags:
      - monitoring
  /api/traces/clear:
    post:
      description: Removes all captured API request/response traces from the buffer
      responses:
        "204":
          description: Traces cleared
      summary: Clear API traces
      tags:
      - monitoring
  /backend/monitor:
    get:
      parameters:
      - description: Backend statistics request
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/schema.BackendMonitorRequest'
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/proto.StatusResponse'
      summary: Backend monitor endpoint
      tags:
      - monitoring
  /backend/shutdown:
    post:
      parameters:
      - description: Backend statistics request
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/schema.BackendMonitorRequest'
      responses: {}
      summary: Backend shutdown endpoint
      tags:
      - monitoring
  /backends:
    get:
      responses:
        "200":
          description: Response
          schema:
            items:
              $ref: '#/definitions/gallery.GalleryBackend'
            type: array
      summary: List all Backends
      tags:
      - backends
  /backends/apply:
    post:
      parameters:
      - description: query params
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/localai.GalleryBackend'
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/schema.BackendResponse'
      summary: Install backends to LocalAI.
      tags:
      - backends
  /backends/available:
    get:
      responses:
        "200":
          description: Response
          schema:
            items:
              $ref: '#/definitions/gallery.GalleryBackend'
            type: array
      summary: List all available Backends
      tags:
      - backends
  /backends/delete/{name}:
    post:
      parameters:
      - description: Backend name
        in: path
        name: name
        required: true
        type: string
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/schema.BackendResponse'
      summary: delete backends from LocalAI.
      tags:
      - backends
  /backends/galleries:
    get:
      responses:
        "200":
          description: Response
          schema:
            items:
              $ref: '#/definitions/config.Gallery'
            type: array
      summary: List all Galleries
      tags:
      - backends
  /backends/jobs:
    get:
      responses:
        "200":
          description: Response
          schema:
            additionalProperties:
              $ref: '#/definitions/galleryop.OpStatus'
            type: object
      summary: Returns all the jobs status progress
      tags:
      - backends
  /backends/jobs/{uuid}:
    get:
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/galleryop.OpStatus'
      summary: Returns the job status
      tags:
      - backends
  /metrics:
    get:
      produces:
      - text/plain
      responses:
        "200":
          description: Prometheus metrics
          schema:
            type: string
      summary: Prometheus metrics endpoint
      tags:
      - monitoring
  /models/apply:
    post:
      parameters:
      - description: query params
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/localai.GalleryModel'
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/schema.GalleryResponse'
      summary: Install models to LocalAI.
      tags:
      - models
  /models/available:
    get:
      responses:
        "200":
          description: Response
          schema:
            items:
              $ref: '#/definitions/gallery.Metadata'
            type: array
      summary: List installable models.
      tags:
      - models
  /models/delete/{name}:
    post:
      parameters:
      - description: Model name
        in: path
        name: name
        required: true
        type: string
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/schema.GalleryResponse'
      summary: delete models to LocalAI.
      tags:
      - models
  /models/galleries:
    get:
      responses:
        "200":
          description: Response
          schema:
            items:
              $ref: '#/definitions/config.Gallery'
            type: array
      summary: List all Galleries
      tags:
      - models
  /models/jobs:
    get:
      responses:
        "200":
          description: Response
          schema:
            additionalProperties:
              $ref: '#/definitions/galleryop.OpStatus'
            type: object
      summary: Returns all the jobs status progress
      tags:
      - models
  /models/jobs/{uuid}:
    get:
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/galleryop.OpStatus'
      summary: Returns the job status
      tags:
      - models
  /system:
    get:
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/schema.SystemInformationResponse'
      summary: Show the LocalAI instance information
      tags:
      - monitoring
  /tokenMetrics:
    get:
      consumes:
      - application/json
      produces:
      - audio/x-wav
      responses:
        "200":
          description: generated audio/wav file
          schema:
            type: string
      summary: Get TokenMetrics for Active Slot.
      tags:
      - tokenize
  /tts:
    post:
      consumes:
      - application/json
      parameters:
      - description: query params
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/schema.TTSRequest'
      produces:
      - audio/x-wav
      responses:
        "200":
          description: generated audio/wav file
          schema:
            type: string
      summary: Generates audio from the input text.
      tags:
      - audio
  /v1/audio/speech:
    post:
      consumes:
      - application/json
      parameters:
      - description: query params
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/schema.TTSRequest'
      produces:
      - audio/x-wav
      responses:
        "200":
          description: generated audio/wav file
          schema:
            type: string
      summary: Generates audio from the input text.
      tags:
      - audio
  /v1/audio/transcriptions:
    post:
      consumes:
      - multipart/form-data
      parameters:
      - description: model
        in: formData
        name: model
        required: true
        type: string
      - description: file
        in: formData
        name: file
        required: true
        type: file
      responses:
        "200":
          description: Response
          schema:
            additionalProperties:
              type: string
            type: object
      summary: Transcribes audio into the input language.
      tags:
      - audio
  /v1/chat/completions:
    post:
      parameters:
      - description: query params
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/schema.OpenAIRequest'
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/schema.OpenAIResponse'
      summary: Generate a chat completions for a given prompt and model.
      tags:
      - inference
  /v1/completions:
    post:
      parameters:
      - description: query params
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/schema.OpenAIRequest'
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/schema.OpenAIResponse'
      summary: Generate completions for a given prompt and model.
      tags:
      - inference
  /v1/detection:
    post:
      parameters:
      - description: query params
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/schema.DetectionRequest'
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/schema.DetectionResponse'
      summary: Detects objects in the input image.
      tags:
      - detection
  /v1/edits:
    post:
      parameters:
      - description: query params
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/schema.OpenAIRequest'
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/schema.OpenAIResponse'
      summary: OpenAI edit endpoint
      tags:
      - inference
  /v1/embeddings:
    post:
      parameters:
      - description: query params
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/schema.OpenAIRequest'
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/schema.OpenAIResponse'
      summary: Get a vector representation of a given input that can be easily consumed
        by machine learning models and algorithms.
      tags:
      - embeddings
  /v1/images/generations:
    post:
      parameters:
      - description: query params
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/schema.OpenAIRequest'
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/schema.OpenAIResponse'
      summary: Creates an image given a prompt.
      tags:
      - images
  /v1/images/inpainting:
    post:
      consumes:
      - multipart/form-data
      description: Perform image inpainting. Accepts multipart/form-data with `image`
        and `mask` files.
      parameters:
      - description: Model identifier
        in: formData
        name: model
        required: true
        type: string
      - description: Text prompt guiding the generation
        in: formData
        name: prompt
        required: true
        type: string
      - description: Number of inference steps (default 25)
        in: formData
        name: steps
        type: integer
      - description: Original image file
        in: formData
        name: image
        required: true
        type: file
      - description: Mask image file (white = area to inpaint)
        in: formData
        name: mask
        required: true
        type: file
      produces:
      - application/json
      responses:
        "200":
          description: OK
          schema:
            $ref: '#/definitions/schema.OpenAIResponse'
        "400":
          description: Bad Request
          schema:
            additionalProperties:
              type: string
            type: object
        "500":
          description: Internal Server Error
          schema:
            additionalProperties:
              type: string
            type: object
      summary: Image inpainting
      tags:
      - images
  /v1/mcp/chat/completions:
    post:
      parameters:
      - description: query params
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/schema.OpenAIRequest'
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/schema.OpenAIResponse'
      summary: MCP chat completions with automatic tool execution
      tags:
      - mcp
  /v1/messages:
    post:
      parameters:
      - description: query params
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/schema.AnthropicRequest'
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/schema.AnthropicResponse'
      summary: Generate a message response for the given messages and model.
      tags:
      - inference
  /v1/models:
    get:
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/schema.ModelsDataResponse'
      summary: List and describe the various models available in the API.
      tags:
      - models
  /v1/rerank:
    post:
      parameters:
      - description: query params
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/schema.JINARerankRequest'
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/schema.JINARerankResponse'
      summary: Reranks a list of phrases by relevance to a given text query.
      tags:
      - rerank
  /v1/responses:
    post:
      parameters:
      - description: Request body
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/schema.OpenResponsesRequest'
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/schema.ORResponseResource'
      summary: Create a response using the Open Responses API
      tags:
      - inference
  /v1/responses/{id}:
    get:
      description: Retrieve a response by ID. Can be used for polling background responses
        or resuming streaming responses.
      parameters:
      - description: Response ID
        in: path
        name: id
        required: true
        type: string
      - description: Set to 'true' to resume streaming
        in: query
        name: stream
        type: string
      - description: Sequence number to resume from (for streaming)
        in: query
        name: starting_after
        type: integer
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/schema.ORResponseResource'
        "400":
          description: Bad Request
          schema:
            additionalProperties: true
            type: object
        "404":
          description: Not Found
          schema:
            additionalProperties: true
            type: object
      summary: Get a response by ID
      tags:
      - inference
  /v1/responses/{id}/cancel:
    post:
      description: Cancel a background response if it's still in progress
      parameters:
      - description: Response ID
        in: path
        name: id
        required: true
        type: string
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/schema.ORResponseResource'
        "400":
          description: Bad Request
          schema:
            additionalProperties: true
            type: object
        "404":
          description: Not Found
          schema:
            additionalProperties: true
            type: object
      summary: Cancel a response
      tags:
      - inference
  /v1/sound-generation:
    post:
      parameters:
      - description: query params
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/schema.ElevenLabsSoundGenerationRequest'
      responses:
        "200":
          description: Response
          schema:
            type: string
      summary: Generates audio from the input text.
      tags:
      - audio
  /v1/text-to-speech/{voice-id}:
    post:
      parameters:
      - description: Account ID
        in: path
        name: voice-id
        required: true
        type: string
      - description: query params
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/schema.TTSRequest'
      responses:
        "200":
          description: Response
          schema:
            type: string
      summary: Generates audio from the input text.
      tags:
      - audio
  /v1/tokenMetrics:
    get:
      consumes:
      - application/json
      produces:
      - audio/x-wav
      responses:
        "200":
          description: generated audio/wav file
          schema:
            type: string
      summary: Get TokenMetrics for Active Slot.
      tags:
      - tokenize
  /v1/tokenize:
    post:
      parameters:
      - description: Request
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/schema.TokenizeRequest'
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/schema.TokenizeResponse'
      summary: Tokenize the input.
      tags:
      - tokenize
  /vad:
    post:
      consumes:
      - application/json
      parameters:
      - description: query params
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/schema.VADRequest'
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/proto.VADResponse'
      summary: Detect voice fragments in an audio stream
      tags:
      - audio
  /video:
    post:
      parameters:
      - description: query params
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/schema.VideoRequest'
      responses:
        "200":
          description: Response
          schema:
            $ref: '#/definitions/schema.OpenAIResponse'
      summary: Creates a video given a prompt.
      tags:
      - video
  /ws/backend-logs/{modelId}:
    get:
      description: Opens a WebSocket connection for real-time backend log streaming.
        Sends an initial batch of existing lines (type "initial"), then streams new
        lines as they appear (type "line"). Supports ping/pong keepalive.
      parameters:
      - description: Model ID
        in: path
        name: modelId
        required: true
        type: string
      responses: {}
      summary: Stream backend logs via WebSocket
      tags:
      - monitoring
schemes:
- http
- https
securityDefinitions:
  BearerAuth:
    in: header
    name: Authorization
    type: apiKey
swagger: "2.0"