diff --git a/detect_objects.py b/detect_objects.py
index 91455e950..cb59bd71d 100644
--- a/detect_objects.py
+++ b/detect_objects.py
@@ -374,6 +374,8 @@ def main():
         if camera_name in CONFIG['cameras']:
             best_object = object_processor.get_best(camera_name, label)
             best_frame = best_object.get('frame', np.zeros((720,1280,3), np.uint8))
+
+            best_frame = cv2.cvtColor(best_frame, cv2.COLOR_YUV2BGR_I420)
             
             crop = bool(request.args.get('crop', 0, type=int))
             if crop:
@@ -384,7 +386,6 @@ def main():
             width = int(height*best_frame.shape[1]/best_frame.shape[0])
 
             best_frame = cv2.resize(best_frame, dsize=(width, height), interpolation=cv2.INTER_AREA)
-            best_frame = cv2.cvtColor(best_frame, cv2.COLOR_RGB2BGR)
             ret, jpg = cv2.imencode('.jpg', best_frame)
             response = make_response(jpg.tobytes())
             response.headers['Content-Type'] = 'image/jpg'
@@ -410,12 +411,13 @@ def main():
             frame = object_processor.get_current_frame(camera_name)
             if frame is None:
                 frame = np.zeros((720,1280,3), np.uint8)
+            
+            frame = cv2.cvtColor(frame, cv2.COLOR_YUV2BGR_I420)
 
             height = int(request.args.get('h', str(frame.shape[0])))
             width = int(height*frame.shape[1]/frame.shape[0])
 
             frame = cv2.resize(frame, dsize=(width, height), interpolation=cv2.INTER_AREA)
-            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
 
             ret, jpg = cv2.imencode('.jpg', frame)
             response = make_response(jpg.tobytes())
@@ -432,10 +434,10 @@ def main():
             if frame is None:
                 frame = np.zeros((height,int(height*16/9),3), np.uint8)
 
-            width = int(height*frame.shape[1]/frame.shape[0])
+            frame = cv2.cvtColor(frame, cv2.COLOR_YUV2BGR_I420)
 
+            width = int(height*frame.shape[1]/frame.shape[0])
             frame = cv2.resize(frame, dsize=(width, height), interpolation=cv2.INTER_LINEAR)
-            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
 
             ret, jpg = cv2.imencode('.jpg', frame)
             yield (b'--frame\r\n'
diff --git a/frigate/object_processing.py b/frigate/object_processing.py
index 6ea58925b..40b26f473 100644
--- a/frigate/object_processing.py
+++ b/frigate/object_processing.py
@@ -260,7 +260,7 @@ class TrackedObjectProcessor(threading.Thread):
         def snapshot(camera, obj):
             if not 'frame' in obj:
                 return
-            best_frame = cv2.cvtColor(obj['frame'], cv2.COLOR_RGB2BGR)
+            best_frame = cv2.cvtColor(obj['frame'], cv2.COLOR_YUV2BGR_I420)
             mqtt_config = self.camera_config[camera].get('mqtt', {'crop_to_region': False})
             if mqtt_config.get('crop_to_region'):
                 region = obj['region']
diff --git a/frigate/util.py b/frigate/util.py
index fc111b422..9040577ae 100755
--- a/frigate/util.py
+++ b/frigate/util.py
@@ -70,6 +70,37 @@ def calculate_region(frame_shape, xmin, ymin, xmax, ymax, multiplier=2):
 
     return (x_offset, y_offset, x_offset+size, y_offset+size)
 
+def yuv_region_2_rgb(frame, region):
+    height = frame.shape[0]//3*2
+    width = frame.shape[1]
+    # make sure the size is a multiple of 4
+    size = (region[3] - region[1])//4*4
+
+    x1 = region[0] 
+    y1 = region[1]
+
+    uv_x1 = x1//2
+    uv_y1 = y1//4
+
+    uv_width = size//2
+    uv_height = size//4
+
+    u_y_start = height
+    v_y_start = height + height//4
+    two_x_offset = width//2
+
+    yuv_cropped_frame = np.zeros((size+size//2, size), np.uint8)
+    # y channel
+    yuv_cropped_frame[0:size, 0:size] = frame[y1:y1+size, x1:x1+size]
+    # u channel
+    yuv_cropped_frame[size:size+uv_height, 0:uv_width] = frame[uv_y1+u_y_start:uv_y1+u_y_start+uv_height, uv_x1:uv_x1+uv_width]
+    yuv_cropped_frame[size:size+uv_height, uv_width:size] = frame[uv_y1+u_y_start:uv_y1+u_y_start+uv_height, uv_x1+two_x_offset:uv_x1+two_x_offset+uv_width]
+    # v channel
+    yuv_cropped_frame[size+uv_height:size+uv_height*2, 0:uv_width] = frame[uv_y1+v_y_start:uv_y1+v_y_start+uv_height, uv_x1:uv_x1+uv_width]
+    yuv_cropped_frame[size+uv_height:size+uv_height*2, uv_width:size] = frame[uv_y1+v_y_start:uv_y1+v_y_start+uv_height, uv_x1+two_x_offset:uv_x1+two_x_offset+uv_width]
+
+    return cv2.cvtColor(yuv_cropped_frame, cv2.COLOR_YUV2RGB_I420)
+
 def intersection(box_a, box_b):
     return (
         max(box_a[0], box_b[0]),
diff --git a/frigate/video.py b/frigate/video.py
index 3dd7d0f63..fe87502a9 100755
--- a/frigate/video.py
+++ b/frigate/video.py
@@ -14,7 +14,7 @@ import json
 import base64
 from typing import Dict, List
 from collections import defaultdict
-from frigate.util import draw_box_with_label, area, calculate_region, clipped, intersection_over_union, intersection, EventsPerSecond, listen, FrameManager, SharedMemoryFrameManager
+from frigate.util import draw_box_with_label, yuv_region_2_rgb, area, calculate_region, clipped, intersection_over_union, intersection, EventsPerSecond, listen, FrameManager, SharedMemoryFrameManager
 from frigate.objects import ObjectTracker
 from frigate.edgetpu import RemoteObjectDetector
 from frigate.motion import MotionDetector
@@ -88,7 +88,7 @@ def filtered(obj, objects_to_track, object_filters, mask=None):
     return False
 
 def create_tensor_input(frame, region):
-    cropped_frame = frame[region[1]:region[3], region[0]:region[2]]
+    cropped_frame = yuv_region_2_rgb(frame, region)
 
     # Resize to 300x300 if needed
     if cropped_frame.shape != (300, 300, 3):
@@ -303,14 +303,11 @@ def process_frames(camera_name: str, frame_queue: mp.Queue, frame_shape,
         # re-compute regions
         regions = [calculate_region(frame_shape, a[0], a[1], a[2], a[3], 1.0)
             for a in combined_regions]
-        
-        if len(regions) > 0:
-            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_YUV2RGB_I420)
 
         # resize regions and detect
         detections = []
         for region in regions:
-            detections.extend(detect(object_detector, rgb_frame, region, objects_to_track, object_filters, mask))
+            detections.extend(detect(object_detector, frame, region, objects_to_track, object_filters, mask))
         
         #########
         # merge objects, check for clipped objects and look again up to 4 times
@@ -343,7 +340,7 @@ def process_frames(camera_name: str, frame_queue: mp.Queue, frame_shape,
                             box[0], box[1],
                             box[2], box[3])
                         
-                        selected_objects.extend(detect(object_detector, rgb_frame, region, objects_to_track, object_filters, mask))
+                        selected_objects.extend(detect(object_detector, frame, region, objects_to_track, object_filters, mask))
 
                         refining = True
                     else: