Optimization of Sync Records: Implementing Pagination and Temporary Table (#6585)

* Update pull_request.yml

* Add temporary table for deletion and use pagination to process recordings in chunks for deletion of recordings with missing files

* move RecordingsToDelete class to models.py

* recording cleanup: bugfixes

* Update cleanup.py

* improve log message in cleanup.py

Co-authored-by: Nicolas Mowen <nickmowen213@gmail.com>

---------

Co-authored-by: Nicolas Mowen <nickmowen213@gmail.com>
This commit is contained in:
Sergey Krashevich 2023-06-11 16:01:50 +03:00 committed by GitHub
parent 7459a1cdde
commit 5c27cb7e9b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 58 additions and 22 deletions

View File

@ -65,3 +65,11 @@ class Recordings(Model): # type: ignore[misc]
motion = IntegerField(null=True) motion = IntegerField(null=True)
objects = IntegerField(null=True) objects = IntegerField(null=True)
segment_size = FloatField(default=0) # this should be stored as MB segment_size = FloatField(default=0) # this should be stored as MB
# Used for temporary table in record/cleanup.py
class RecordingsToDelete(Model): # type: ignore[misc]
id = CharField(null=False, primary_key=False, max_length=30)
class Meta:
temporary = True

View File

@ -8,11 +8,11 @@ import threading
from multiprocessing.synchronize import Event as MpEvent from multiprocessing.synchronize import Event as MpEvent
from pathlib import Path from pathlib import Path
from peewee import DoesNotExist from peewee import DatabaseError, DoesNotExist, chunked
from frigate.config import FrigateConfig, RetainModeEnum from frigate.config import FrigateConfig, RetainModeEnum
from frigate.const import RECORD_DIR, SECONDS_IN_DAY from frigate.const import RECORD_DIR, SECONDS_IN_DAY
from frigate.models import Event, Recordings, Timeline from frigate.models import Event, Recordings, RecordingsToDelete, Timeline
from frigate.record.util import remove_empty_directories from frigate.record.util import remove_empty_directories
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -217,34 +217,62 @@ class RecordingCleanup(threading.Thread):
logger.debug("Start sync recordings.") logger.debug("Start sync recordings.")
# get all recordings in the db # get all recordings in the db
recordings: Recordings = Recordings.select() recordings = Recordings.select(Recordings.id, Recordings.path)
# get all recordings files on disk # get all recordings files on disk and put them in a set
files_on_disk = [] files_on_disk = {
for root, _, files in os.walk(RECORD_DIR): os.path.join(root, file)
for file in files: for root, _, files in os.walk(RECORD_DIR)
files_on_disk.append(os.path.join(root, file)) for file in files
}
recordings_to_delete = [] # Use pagination to process records in chunks
for recording in recordings.objects().iterator(): page_size = 1000
num_pages = (recordings.count() + page_size - 1) // page_size
recordings_to_delete = set()
for page in range(num_pages):
for recording in recordings.paginate(page, page_size):
if recording.path not in files_on_disk: if recording.path not in files_on_disk:
recordings_to_delete.append(recording.id) recordings_to_delete.add(recording.id)
# convert back to list of dictionaries for insertion
recordings_to_delete = [
{"id": recording_id} for recording_id in recordings_to_delete
]
if len(recordings_to_delete) / recordings.count() > 0.5:
logger.debug(
f"Deleting {(len(recordings_to_delete) / recordings.count()):2f}% of recordings could be due to configuration error. Aborting..."
)
return
logger.debug( logger.debug(
f"Deleting {len(recordings_to_delete)} recordings with missing files" f"Deleting {len(recordings_to_delete)} recordings with missing files"
) )
# delete up to 100,000 at a time
max_deletes = 100000 # create a temporary table for deletion
for i in range(0, len(recordings_to_delete), max_deletes): RecordingsToDelete.create_table(temporary=True)
Recordings.delete().where(
Recordings.id << recordings_to_delete[i : i + max_deletes] # insert ids to the temporary table
).execute() max_inserts = 1000
for batch in chunked(recordings_to_delete, max_inserts):
RecordingsToDelete.insert_many(batch).execute()
try:
# delete records in the main table that exist in the temporary table
query = Recordings.delete().where(
Recordings.id.in_(RecordingsToDelete.select(RecordingsToDelete.id))
)
query.execute()
except DatabaseError as e:
logger.error(f"Database error during delete: {e}")
logger.debug("End sync recordings.") logger.debug("End sync recordings.")
def run(self) -> None: def run(self) -> None:
# on startup sync recordings with disk (disabled due to too much CPU usage) # on startup sync recordings with disk
# self.sync_recordings() self.sync_recordings()
# Expire tmp clips every minute, recordings and clean directories every hour. # Expire tmp clips every minute, recordings and clean directories every hour.
for counter in itertools.cycle(range(self.config.record.expire_interval)): for counter in itertools.cycle(range(self.config.record.expire_interval)):

View File

@ -11,7 +11,7 @@ from playhouse.sqliteq import SqliteQueueDatabase
from setproctitle import setproctitle from setproctitle import setproctitle
from frigate.config import FrigateConfig from frigate.config import FrigateConfig
from frigate.models import Event, Recordings, Timeline from frigate.models import Event, Recordings, RecordingsToDelete, Timeline
from frigate.record.cleanup import RecordingCleanup from frigate.record.cleanup import RecordingCleanup
from frigate.record.maintainer import RecordingMaintainer from frigate.record.maintainer import RecordingMaintainer
from frigate.types import RecordMetricsTypes from frigate.types import RecordMetricsTypes
@ -46,7 +46,7 @@ def manage_recordings(
}, },
timeout=60, timeout=60,
) )
models = [Event, Recordings, Timeline] models = [Event, Recordings, Timeline, RecordingsToDelete]
db.bind(models) db.bind(models)
maintainer = RecordingMaintainer( maintainer = RecordingMaintainer(