From c05e260ae9b5ee9c9a0a166c7297604ef831c89c Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Wed, 17 Sep 2025 18:26:32 -0600 Subject: [PATCH] Update ROCm to not hang when running on complex RNN models (#20118) * Update ROCm to not hang when running on complex RNN models * Formatting --- docker/rocm/Dockerfile | 3 +++ frigate/detectors/plugins/onnx.py | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/docker/rocm/Dockerfile b/docker/rocm/Dockerfile index cfd8a92e1..86bc1c229 100644 --- a/docker/rocm/Dockerfile +++ b/docker/rocm/Dockerfile @@ -67,6 +67,9 @@ FROM deps-prelim AS rocm-prelim-hsa-override0 ENV HSA_ENABLE_SDMA=0 ENV TF_ROCM_USE_IMMEDIATE_MODE=1 +# avoid kernel crashes +ENV HIP_FORCE_DEV_KERNARG=1 + COPY --from=rocm-dist / / RUN ldconfig diff --git a/frigate/detectors/plugins/onnx.py b/frigate/detectors/plugins/onnx.py index 955a58524..7c7670733 100644 --- a/frigate/detectors/plugins/onnx.py +++ b/frigate/detectors/plugins/onnx.py @@ -51,6 +51,15 @@ class ONNXDetector(DetectionApi): "enable_cuda_graph": True, } + sess_options = None + + if providers[0] == "ROCMExecutionProvider": + # avoid AMD GPU kernel crashes + sess_options = ort.SessionOptions() + sess_options.graph_optimization_level = ( + ort.GraphOptimizationLevel.ORT_DISABLE_ALL + ) + self.model = ort.InferenceSession( path, providers=providers, provider_options=options )