Add config option to select fp16 or quantized jina vision model (#14270)

* Add config option to select fp16 or quantized jina vision model

* requires_fp16 for text and large models only

* fix model type check

* fix cpu

* pass model size
This commit is contained in:
Josh Hawkins 2024-10-10 17:46:21 -05:00 committed by GitHub
parent dd6276e706
commit 54eb03d2a1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 44 additions and 10 deletions

View File

@ -520,6 +520,8 @@ semantic_search:
reindex: False
# Optional: Set device used to run embeddings, options are AUTO, CPU, GPU. (default: shown below)
device: "AUTO"
# Optional: Set the model size used for embeddings. (default: shown below)
model_size: "small"
# Optional: Configuration for AI generated tracked object descriptions
# NOTE: Semantic Search must be enabled for this to do anything.

View File

@ -39,6 +39,16 @@ The vision model is able to embed both images and text into the same vector spac
The text model is used to embed tracked object descriptions and perform searches against them. Descriptions can be created, viewed, and modified on the Search page when clicking on the gray tracked object chip at the top left of each review item. See [the Generative AI docs](/configuration/genai.md) for more information on how to automatically generate tracked object descriptions.
Differently weighted CLIP models are available and can be selected by setting the `model_size` config option:
```yaml
semantic_search:
enabled: True
model_size: small
```
Using `large` as the model size setting employs the full Jina model appropriate for high performance systems running a GPU. The `small` size uses a quantized version of the model that uses much less RAM and runs faster on CPU with a very negligible difference in embedding quality. Most users will not need to change this setting from the default of `small`.
## Usage
1. Semantic search is used in conjunction with the other filters available on the Search page. Use a combination of traditional filtering and semantic search for the best results.

View File

@ -13,3 +13,6 @@ class SemanticSearchConfig(FrigateBaseModel):
default=False, title="Reindex all detections on startup."
)
device: str = Field(default="AUTO", title="Device Type")
model_size: str = Field(
default="small", title="The size of the embeddings model used."
)

View File

@ -68,7 +68,9 @@ class Embeddings:
models = [
"jinaai/jina-clip-v1-text_model_fp16.onnx",
"jinaai/jina-clip-v1-tokenizer",
"jinaai/jina-clip-v1-vision_model_fp16.onnx",
"jinaai/jina-clip-v1-vision_model_fp16.onnx"
if config.model_size == "large"
else "jinaai/jina-clip-v1-vision_model_quantized.onnx",
"jinaai/jina-clip-v1-preprocessor_config.json",
]
@ -95,19 +97,29 @@ class Embeddings:
"text_model_fp16.onnx": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/onnx/text_model_fp16.onnx",
},
embedding_function=jina_text_embedding_function,
model_size=config.model_size,
model_type="text",
requestor=self.requestor,
device="CPU",
)
model_file = (
"vision_model_fp16.onnx"
if self.config.model_size == "large"
else "vision_model_quantized.onnx"
)
download_urls = {
model_file: f"https://huggingface.co/jinaai/jina-clip-v1/resolve/main/onnx/{model_file}",
"preprocessor_config.json": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/preprocessor_config.json",
}
self.vision_embedding = GenericONNXEmbedding(
model_name="jinaai/jina-clip-v1",
model_file="vision_model_fp16.onnx",
download_urls={
"vision_model_fp16.onnx": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/onnx/vision_model_fp16.onnx",
"preprocessor_config.json": "https://huggingface.co/jinaai/jina-clip-v1/resolve/main/preprocessor_config.json",
},
model_file=model_file,
download_urls=download_urls,
embedding_function=jina_vision_embedding_function,
model_size=config.model_size,
model_type="vision",
requestor=self.requestor,
device=self.config.device,

View File

@ -41,6 +41,7 @@ class GenericONNXEmbedding:
model_file: str,
download_urls: Dict[str, str],
embedding_function: Callable[[List[np.ndarray]], np.ndarray],
model_size: str,
model_type: str,
requestor: InterProcessRequestor,
tokenizer_file: Optional[str] = None,
@ -54,7 +55,9 @@ class GenericONNXEmbedding:
self.embedding_function = embedding_function
self.model_type = model_type # 'text' or 'vision'
self.providers, self.provider_options = get_ort_providers(
force_cpu=device == "CPU", requires_fp16=True, openvino_device=device
force_cpu=device == "CPU",
requires_fp16=model_size == "large" or self.model_type == "text",
openvino_device=device,
)
self.download_path = os.path.join(MODEL_CACHE_DIR, self.model_name)

View File

@ -207,9 +207,12 @@ export default function Explore() {
const { payload: textTokenizerState } = useModelState(
"jinaai/jina-clip-v1-tokenizer",
);
const { payload: visionModelState } = useModelState(
"jinaai/jina-clip-v1-vision_model_fp16.onnx",
);
const modelFile =
config?.semantic_search.model_size === "large"
? "jinaai/jina-clip-v1-vision_model_fp16.onnx"
: "jinaai/jina-clip-v1-vision_model_quantized.onnx";
const { payload: visionModelState } = useModelState(modelFile);
const { payload: visionFeatureExtractorState } = useModelState(
"jinaai/jina-clip-v1-preprocessor_config.json",
);

View File

@ -417,6 +417,7 @@ export interface FrigateConfig {
semantic_search: {
enabled: boolean;
model_size: string;
};
snapshots: {