Reduce onnx memory usage (#14285)

This commit is contained in:
Nicolas Mowen 2024-10-11 12:03:47 -06:00 committed by GitHub
parent 6df541e1fd
commit d4b9b5a7dd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -16,7 +16,14 @@ def get_ort_providers(
force_cpu: bool = False, openvino_device: str = "AUTO", requires_fp16: bool = False force_cpu: bool = False, openvino_device: str = "AUTO", requires_fp16: bool = False
) -> tuple[list[str], list[dict[str, any]]]: ) -> tuple[list[str], list[dict[str, any]]]:
if force_cpu: if force_cpu:
return (["CPUExecutionProvider"], [{}]) return (
["CPUExecutionProvider"],
[
{
"arena_extend_strategy": "kSameAsRequested",
}
],
)
providers = ort.get_available_providers() providers = ort.get_available_providers()
options = [] options = []
@ -28,6 +35,7 @@ def get_ort_providers(
if not requires_fp16 or os.environ.get("USE_FP_16", "True") != "False": if not requires_fp16 or os.environ.get("USE_FP_16", "True") != "False":
options.append( options.append(
{ {
"arena_extend_strategy": "kSameAsRequested",
"trt_fp16_enable": requires_fp16, "trt_fp16_enable": requires_fp16,
"trt_timing_cache_enable": True, "trt_timing_cache_enable": True,
"trt_engine_cache_enable": True, "trt_engine_cache_enable": True,
@ -41,10 +49,17 @@ def get_ort_providers(
os.makedirs("/config/model_cache/openvino/ort", exist_ok=True) os.makedirs("/config/model_cache/openvino/ort", exist_ok=True)
options.append( options.append(
{ {
"arena_extend_strategy": "kSameAsRequested",
"cache_dir": "/config/model_cache/openvino/ort", "cache_dir": "/config/model_cache/openvino/ort",
"device_type": openvino_device, "device_type": openvino_device,
} }
) )
elif provider == "CPUExecutionProvider":
options.append(
{
"arena_extend_strategy": "kSameAsRequested",
}
)
else: else:
options.append({}) options.append({})