{ "$schema": "http://json-schema.org/draft-07/schema#", "title": "Inference Configuration", "description": "Configuration schema for model inference entrypoint", "type": "object", "required": ["schema_version", "inference_type", "load_time_parameters"], "definitions": { "ggufFile": { "oneOf": [ { "type": "string", "pattern": "^(?!/)(?:[^/\\n]+/)*[^/\\n]+\\.gguf$", "description": "Relative path to .gguf file (no leading slash, subdirectories allowed)" }, { "type": "string", "pattern": "^/(?:[^/\\n]+/)*[^/\\n]+\\.gguf$", "description": "Absolute path to .gguf file (leading slash required)" }, { "type": "string", "pattern": "^https?://[^\\s]+\\.gguf(\\?[^\\s]*)?$", "description": "HTTPS/HTTP URL to .gguf file (query parameters allowed)" } ] }, "safetensorsFile": { "oneOf": [ { "type": "string", "pattern": "^(?!/)(?:[^/\\n]+/)*[^/\\n]+\\.safetensors$", "description": "Relative path to .safetensors file (no leading slash, subdirectories allowed)" }, { "type": "string", "pattern": "^/(?:[^/\\n]+/)*[^/\\n]+\\.safetensors$", "description": "Absolute path to .safetensors file (leading slash required)" }, { "type": "string", "pattern": "^https?://[^\\s]+\\.safetensors(\\?[^\\s]*)?$", "description": "HTTPS/HTTP URL to .safetensors file (query parameters allowed)" } ] }, "samplingParameters": { "type": "object", "description": "Sampling configuration for text generation", "properties": { "temperature": { "type": "number", "minimum": 0.0, "maximum": 2.0, "description": "Sampling temperature (0.0 = deterministic, higher = more random)" }, "top_p": { "type": "number", "minimum": 0.0, "maximum": 1.0, "description": "Nucleus sampling probability (cumulative probability threshold)" }, "min_p": { "type": "number", "minimum": 0.0, "maximum": 1.0, "description": "Minimum probability threshold for token consideration" }, "repetition_penalty": { "type": "number", "minimum": 0.0, "maximum": 2.0, "description": "Penalty for token repetition (1.0 = no penalty, higher = more penalty)" } }, "additionalProperties": false } }, "properties": { "schema_version": { "type": "string", "enum": ["1.0.0"], "description": "Schema version for compatibility checking and migration" }, "inference_type": {"type": "string", "description": "Combined inference backend and type in format: backend/type"}, "load_time_parameters": {"type": "object", "description": "Parameters required at model load time"}, "generation_time_parameters": {"type": "object", "description": "Optional parameters used during generation"} }, "allOf": [ { "if": {"properties": {"schema_version": {"const": "1.0.0"}}}, "then": { "properties": { "inference_type": {"enum": ["llama.cpp/text-to-text", "llama.cpp/image-to-text", "llama.cpp/lfm2-audio-v1"]} }, "allOf": [ { "if": {"properties": {"inference_type": {"const": "llama.cpp/text-to-text"}}}, "then": { "properties": { "load_time_parameters": { "required": ["model"], "properties": { "chat_template": {"type": "string", "description": "Optional chat template override"}, "model": {"$ref": "#/definitions/ggufFile", "description": "Text model file (local path or URL)"} }, "additionalProperties": false }, "generation_time_parameters": { "properties": {"sampling_parameters": {"$ref": "#/definitions/samplingParameters"}}, "additionalProperties": false } } } }, { "if": {"properties": {"inference_type": {"const": "llama.cpp/image-to-text"}}}, "then": { "properties": { "load_time_parameters": { "required": ["model", "multimodal_projector"], "properties": { "chat_template": {"type": "string", "description": "Optional chat template override"}, "model": { "$ref": "#/definitions/ggufFile", "description": "Backbone model file (local path or URL)" }, "multimodal_projector": { "$ref": "#/definitions/ggufFile", "description": "Multimodal projector file (local path or URL)" } }, "additionalProperties": false }, "generation_time_parameters": { "properties": {"sampling_parameters": {"$ref": "#/definitions/samplingParameters"}}, "additionalProperties": false } } } }, { "if": {"properties": {"inference_type": {"const": "llama.cpp/lfm2-audio-v1"}}}, "then": { "properties": { "load_time_parameters": { "required": ["model", "multimodal_projector", "audio_decoder", "audio_tokenizer"], "properties": { "chat_template": {"type": "string", "description": "Optional chat template override"}, "model": { "$ref": "#/definitions/ggufFile", "description": "Backbone model file (local path or URL)" }, "multimodal_projector": { "$ref": "#/definitions/ggufFile", "description": "Audio encoder file (local path or URL)" }, "audio_decoder": { "$ref": "#/definitions/ggufFile", "description": "Audio decoder file (local path or URL)" }, "audio_tokenizer": { "$ref": "#/definitions/safetensorsFile", "description": "Audio tokenizer file (local path or URL)" } }, "additionalProperties": false }, "generation_time_parameters": { "properties": { "sampling_parameters": {"$ref": "#/definitions/samplingParameters"}, "number_of_decoding_threads": { "type": "integer", "description": "Number of threads for audio decoding", "minimum": 1 } }, "additionalProperties": false } } } } ] } } ] }