Refactor face library page (#17424)

* Section faces by event id

* Make score keeping more robust

* layout improvements

* Cleanup dialog

* Fix clicking behavior

* Add view in explore option

* math.round

* Don't require events

* Cleanup

* Remove selection

* Don't require

* Change dialog size with snapshot

* Use filename as key

* fix key

* Rework layout for mobile

* Handle mobile landscape

* Fix train issue

* Match logic

* Move deletion logic

* Fix reprocessing

* Support creating a new face

* Translations

* Do sorting in frontend

* Adjust unknown

* Cleanup

* Set max limit to faces to recognize

* Fix sorting

* Fix
This commit is contained in:
Nicolas Mowen 2025-03-28 12:52:12 -06:00 committed by GitHub
parent 37e0b9b904
commit b14abffea3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 325 additions and 148 deletions

View File

@ -59,7 +59,7 @@ Fine-tune face recognition with these optional parameters:
### Recognition
- `model_size`: Which model size to use, options are `small` or `large`
- `unknown_score`: Min score to mark a person as a potential match, matches below this will be marked as unknown.
- `unknown_score`: Min score to mark a person as a potential match, matches at or below this will be marked as unknown.
- Default: `0.8`.
- `recognition_threshold`: Recognition confidence score required to add the face to the object as a sub label.
- Default: `0.9`.

View File

@ -41,13 +41,9 @@ def get_faces():
face_dict[name] = []
for file in sorted(
filter(
lambda f: (f.lower().endswith((".webp", ".png", ".jpg", ".jpeg"))),
os.listdir(face_dir),
),
key=lambda f: os.path.getctime(os.path.join(face_dir, f)),
reverse=True,
for file in filter(
lambda f: (f.lower().endswith((".webp", ".png", ".jpg", ".jpeg"))),
os.listdir(face_dir),
):
face_dict[name].append(file)
@ -125,10 +121,13 @@ def train_face(request: Request, name: str, body: dict = None):
sanitized_name = sanitize_filename(name)
rand_id = "".join(random.choices(string.ascii_lowercase + string.digits, k=6))
new_name = f"{sanitized_name}-{rand_id}.webp"
new_file = os.path.join(FACE_DIR, f"{sanitized_name}/{new_name}")
new_file_folder = os.path.join(FACE_DIR, f"{sanitized_name}")
if not os.path.exists(new_file_folder):
os.mkdir(new_file_folder)
if training_file_name:
shutil.move(training_file, new_file)
shutil.move(training_file, os.path.join(new_file_folder, new_name))
else:
try:
event: Event = Event.get(Event.id == event_id)
@ -155,7 +154,7 @@ def train_face(request: Request, name: str, body: dict = None):
x2 = x1 + int(face_box[2] * detect_config.width) - 4
y2 = y1 + int(face_box[3] * detect_config.height) - 4
face = snapshot[y1:y2, x1:x2]
cv2.imwrite(new_file, face)
cv2.imwrite(os.path.join(new_file_folder, new_name), face)
context: EmbeddingsContext = request.app.embeddings
context.clear_face_classifier()

View File

@ -33,7 +33,8 @@ logger = logging.getLogger(__name__)
MAX_DETECTION_HEIGHT = 1080
MIN_MATCHING_FACES = 2
MAX_FACES_ATTEMPTS_AFTER_REC = 6
MAX_FACE_ATTEMPTS = 12
class FaceRealTimeProcessor(RealTimeProcessorApi):
@ -170,6 +171,23 @@ class FaceRealTimeProcessor(RealTimeProcessorApi):
)
return
# check if we have hit limits
if (
id in self.person_face_history
and len(self.person_face_history[id]) >= MAX_FACES_ATTEMPTS_AFTER_REC
):
# if we are at max attempts after rec and we have a rec
if obj_data.get("sub_label"):
logger.debug(
"Not processing due to hitting max attempts after true recognition."
)
return
# if we don't have a rec and are at max attempts
if len(self.person_face_history[id]) >= MAX_FACE_ATTEMPTS:
logger.debug("Not processing due to hitting max rec attempts.")
return
face: Optional[dict[str, any]] = None
if self.requires_face_detection:
@ -241,7 +259,7 @@ class FaceRealTimeProcessor(RealTimeProcessorApi):
sub_label, score = res
if score < self.face_config.unknown_score:
if score <= self.face_config.unknown_score:
sub_label = "unknown"
logger.debug(
@ -255,13 +273,23 @@ class FaceRealTimeProcessor(RealTimeProcessorApi):
os.makedirs(folder, exist_ok=True)
cv2.imwrite(file, face_frame)
files = sorted(
filter(lambda f: (f.endswith(".webp")), os.listdir(folder)),
key=lambda f: os.path.getctime(os.path.join(folder, f)),
reverse=True,
)
# delete oldest face image if maximum is reached
if len(files) > self.config.face_recognition.save_attempts:
os.unlink(os.path.join(folder, files[-1]))
if id not in self.person_face_history:
self.person_face_history[id] = []
self.person_face_history[id].append(
(sub_label, score, face_frame.shape[0] * face_frame.shape[1])
)
(weighted_sub_label, weighted_score) = self.weighted_average_by_area(
(weighted_sub_label, weighted_score) = self.weighted_average(
self.person_face_history[id]
)
@ -297,6 +325,9 @@ class FaceRealTimeProcessor(RealTimeProcessorApi):
sub_label, score = res
if score <= self.face_config.unknown_score:
sub_label = "unknown"
return {"success": True, "score": score, "face_name": sub_label}
elif topic == EmbeddingsRequestEnum.register_face.value:
rand_id = "".join(
@ -366,6 +397,9 @@ class FaceRealTimeProcessor(RealTimeProcessorApi):
sub_label, score = res
if score <= self.face_config.unknown_score:
sub_label = "unknown"
if self.config.face_recognition.save_attempts:
# write face to library
folder = os.path.join(FACE_DIR, "train")
@ -375,38 +409,49 @@ class FaceRealTimeProcessor(RealTimeProcessorApi):
)
shutil.move(current_file, new_file)
files = sorted(
filter(lambda f: (f.endswith(".webp")), os.listdir(folder)),
key=lambda f: os.path.getctime(os.path.join(folder, f)),
reverse=True,
)
# delete oldest face image if maximum is reached
if len(files) > self.config.face_recognition.save_attempts:
os.unlink(os.path.join(folder, files[-1]))
def expire_object(self, object_id: str):
if object_id in self.person_face_history:
self.person_face_history.pop(object_id)
def weighted_average_by_area(self, results_list: list[tuple[str, float, int]]):
score_count = {}
def weighted_average(
self, results_list: list[tuple[str, float, int]], max_weight: int = 4000
):
"""
Calculates a robust weighted average, capping the area weight and giving more weight to higher scores.
Args:
results_list: A list of tuples, where each tuple contains (name, score, face_area).
max_weight: The maximum weight to apply based on face area.
Returns:
A tuple containing the prominent name and its weighted average score, or (None, 0.0) if the list is empty.
"""
if not results_list:
return None, 0.0
weighted_scores = {}
total_face_areas = {}
total_weights = {}
for name, score, face_area in results_list:
if name == "unknown":
continue
if name not in weighted_scores:
score_count[name] = 1
weighted_scores[name] = 0.0
total_face_areas[name] = 0.0
else:
score_count[name] += 1
total_weights[name] = 0.0
weighted_scores[name] += score * face_area
total_face_areas[name] += face_area
# Capped weight based on face area
weight = min(face_area, max_weight)
prominent_name = max(score_count)
# Score-based weighting (higher scores get more weight)
weight *= (score - self.face_config.unknown_score) * 10
weighted_scores[name] += score * weight
total_weights[name] += weight
return prominent_name, weighted_scores[prominent_name] / total_face_areas[
prominent_name
]
if not weighted_scores:
return None, 0.0
best_name = max(weighted_scores, key=weighted_scores.get)
weighted_average = weighted_scores[best_name] / total_weights[best_name]
return best_name, weighted_average

View File

@ -17,6 +17,7 @@
"createFaceLibrary": {
"title": "Create Face Library",
"desc": "Create a new face library",
"new": "Create New Face",
"nextSteps": "It is recommended to use the Train tab to select and train images for each person as they are detected. When building a strong foundation it is strongly recommended to only train on images that are straight-on. Ignore images from cameras that recognize faces from an angle."
},
"train": {

View File

@ -3,6 +3,7 @@ import TimeAgo from "@/components/dynamic/TimeAgo";
import AddFaceIcon from "@/components/icons/AddFaceIcon";
import ActivityIndicator from "@/components/indicators/activity-indicator";
import CreateFaceWizardDialog from "@/components/overlay/detail/FaceCreateWizardDialog";
import TextEntryDialog from "@/components/overlay/dialog/TextEntryDialog";
import UploadImageDialog from "@/components/overlay/dialog/UploadImageDialog";
import { Button } from "@/components/ui/button";
import {
@ -32,13 +33,23 @@ import { useFormattedTimestamp } from "@/hooks/use-date-utils";
import useKeyboardListener from "@/hooks/use-keyboard-listener";
import useOptimisticState from "@/hooks/use-optimistic-state";
import { cn } from "@/lib/utils";
import { Event } from "@/types/event";
import { FaceLibraryData, RecognizedFaceData } from "@/types/face";
import { FaceRecognitionConfig, FrigateConfig } from "@/types/frigateConfig";
import { TooltipPortal } from "@radix-ui/react-tooltip";
import axios from "axios";
import { useCallback, useEffect, useMemo, useRef, useState } from "react";
import { isDesktop, isMobile } from "react-device-detect";
import { useTranslation } from "react-i18next";
import { LuImagePlus, LuRefreshCw, LuScanFace, LuTrash2 } from "react-icons/lu";
import {
LuImagePlus,
LuPlus,
LuRefreshCw,
LuScanFace,
LuSearch,
LuTrash2,
} from "react-icons/lu";
import { useNavigate } from "react-router-dom";
import { toast } from "sonner";
import useSWR from "swr";
@ -391,14 +402,53 @@ function TrainingGrid({
onClickFace,
onRefresh,
}: TrainingGridProps) {
const { t } = useTranslation(["views/faceLibrary"]);
const { t } = useTranslation(["views/faceLibrary", "views/explore"]);
const navigate = useNavigate();
// face data
const [selectedEvent, setSelectedEvent] = useState<RecognizedFaceData>();
const faceGroups = useMemo(() => {
const groups: { [eventId: string]: RecognizedFaceData[] } = {};
Array.from(new Set(attemptImages))
.sort()
.reverse()
.forEach((image) => {
const parts = image.split("-");
const data = {
filename: image,
timestamp: Number.parseFloat(parts[0]),
eventId: `${parts[0]}-${parts[1]}`,
name: parts[2],
score: Number.parseFloat(parts[3]),
};
if (groups[data.eventId]) {
groups[data.eventId].push(data);
} else {
groups[data.eventId] = [data];
}
});
return groups;
}, [attemptImages]);
const eventIdsQuery = useMemo(
() => Object.keys(faceGroups).join(","),
[faceGroups],
);
const { data: events } = useSWR<Event[]>([
"event_ids",
{ ids: eventIdsQuery },
]);
// selection
const [selectedEvent, setSelectedEvent] = useState<Event>();
const formattedDate = useFormattedTimestamp(
selectedEvent?.timestamp ?? 0,
selectedEvent?.start_time ?? 0,
config?.ui.time_format == "24hour"
? t("time.formattedTimestampWithYear.24hour", { ns: "common" })
: t("time.formattedTimestampWithYear.12hour", { ns: "common" }),
@ -415,23 +465,32 @@ function TrainingGrid({
}
}}
>
<DialogContent>
<DialogContent
className={cn(
"",
selectedEvent?.has_snapshot && isDesktop && "max-w-7xl",
)}
>
<DialogHeader>
<DialogTitle>{t("details.face")}</DialogTitle>
<DialogDescription>{t("details.faceDesc")}</DialogDescription>
</DialogHeader>
<div className="flex flex-col gap-1.5">
<div className="text-sm text-primary/40">{t("details.person")}</div>
<div className="text-sm capitalize">{selectedEvent?.name}</div>
</div>
<div className="flex flex-col gap-1.5">
<div className="text-sm text-primary/40">
{t("details.confidence")}
</div>
<div className="text-sm capitalize">
{(selectedEvent?.score || 0) * 100}%
{selectedEvent?.sub_label ?? "Unknown"}
</div>
</div>
{selectedEvent?.data.sub_label_score && (
<div className="flex flex-col gap-1.5">
<div className="text-sm text-primary/40">
{t("details.confidence")}
</div>
<div className="text-sm capitalize">
{Math.round(selectedEvent?.data?.sub_label_score || 0) * 100}%
</div>
</div>
)}
<div className="flex flex-col gap-1.5">
<div className="text-sm text-primary/40">
{t("details.timestamp")}
@ -440,36 +499,89 @@ function TrainingGrid({
</div>
<img
className="w-full"
src={`${baseUrl}api/events/${selectedEvent?.eventId}/thumbnail.jpg`}
src={`${baseUrl}api/events/${selectedEvent?.id}/${selectedEvent?.has_snapshot ? "snapshot.jpg" : "thumbnail.jpg"}`}
/>
</DialogContent>
</Dialog>
<div className="scrollbar-container flex flex-wrap gap-2 overflow-y-scroll p-1">
{attemptImages.map((image: string) => (
<FaceAttempt
key={image}
image={image}
faceNames={faceNames}
recognitionConfig={config.face_recognition}
selected={selectedFaces.includes(image)}
onClick={(data, meta) => {
if (meta) {
onClickFace(image, meta);
} else {
setSelectedEvent(data);
}
}}
onRefresh={onRefresh}
/>
))}
{Object.entries(faceGroups).map(([key, group]) => {
const event = events?.find((ev) => ev.id == key);
return (
<div
key={key}
className={cn(
"flex flex-col gap-2 rounded-lg bg-card p-2",
isMobile && "w-full",
)}
>
<div className="flex flex-row justify-between">
<div className="capitalize">
Person
{event?.sub_label
? `: ${event.sub_label} (${Math.round((event.data.sub_label_score || 0) * 100)}%)`
: ": Unknown"}
</div>
{event && (
<Tooltip>
<TooltipTrigger>
<div
className="cursor-pointer"
onClick={() => {
navigate(`/explore?event_id=${event.id}`);
}}
>
<LuSearch className="size-4 text-muted-foreground" />
</div>
</TooltipTrigger>
<TooltipPortal>
<TooltipContent>
{t("details.item.button.viewInExplore", {
ns: "views/explore",
})}
</TooltipContent>
</TooltipPortal>
</Tooltip>
)}
</div>
<div
className={cn(
"gap-2",
isDesktop
? "flex flex-row flex-wrap"
: "grid grid-cols-2 sm:grid-cols-5 lg:grid-cols-6",
)}
>
{group.map((data: RecognizedFaceData) => (
<FaceAttempt
key={data.filename}
data={data}
faceNames={faceNames}
recognitionConfig={config.face_recognition}
selected={selectedFaces.includes(data.filename)}
onClick={(data, meta) => {
if (meta || selectedFaces.length > 0) {
onClickFace(data.filename, true);
} else if (event) {
setSelectedEvent(event);
}
}}
onRefresh={onRefresh}
/>
))}
</div>
</div>
);
})}
</div>
</>
);
}
type FaceAttemptProps = {
image: string;
data: RecognizedFaceData;
faceNames: string[];
recognitionConfig: FaceRecognitionConfig;
selected: boolean;
@ -477,7 +589,7 @@ type FaceAttemptProps = {
onRefresh: () => void;
};
function FaceAttempt({
image,
data,
faceNames,
recognitionConfig,
selected,
@ -485,16 +597,6 @@ function FaceAttempt({
onRefresh,
}: FaceAttemptProps) {
const { t } = useTranslation(["views/faceLibrary"]);
const data = useMemo<RecognizedFaceData>(() => {
const parts = image.split("-");
return {
timestamp: Number.parseFloat(parts[0]),
eventId: `${parts[0]}-${parts[1]}`,
name: parts[2],
score: Number.parseFloat(parts[3]),
};
}, [image]);
const scoreStatus = useMemo(() => {
if (data.score >= recognitionConfig.recognition_threshold) {
@ -508,6 +610,8 @@ function FaceAttempt({
// interaction
const [newFace, setNewFace] = useState(false);
const imgRef = useRef<HTMLImageElement | null>(null);
useContextMenu(imgRef, () => {
@ -519,7 +623,9 @@ function FaceAttempt({
const onTrainAttempt = useCallback(
(trainName: string) => {
axios
.post(`/faces/train/${trainName}/classify`, { training_file: image })
.post(`/faces/train/${trainName}/classify`, {
training_file: data.filename,
})
.then((resp) => {
if (resp.status == 200) {
toast.success(t("toast.success.trainedFace"), {
@ -538,12 +644,12 @@ function FaceAttempt({
});
});
},
[image, onRefresh, t],
[data, onRefresh, t],
);
const onReprocess = useCallback(() => {
axios
.post(`/faces/reprocess`, { training_file: image })
.post(`/faces/reprocess`, { training_file: data.filename })
.then((resp) => {
if (resp.status == 200) {
toast.success(t("toast.success.updatedFaceScore"), {
@ -561,79 +667,102 @@ function FaceAttempt({
position: "top-center",
});
});
}, [image, onRefresh, t]);
}, [data, onRefresh, t]);
return (
<div
className={cn(
"relative flex cursor-pointer flex-col rounded-lg outline outline-[3px]",
selected
? "shadow-selected outline-selected"
: "outline-transparent duration-500",
)}
>
<div className="relative w-full overflow-hidden rounded-t-lg border border-t-0 *:text-card-foreground">
<img
ref={imgRef}
className="size-44"
src={`${baseUrl}clips/faces/train/${image}`}
onClick={(e) => onClick(data, e.metaKey || e.ctrlKey)}
<>
{newFace && (
<TextEntryDialog
open={true}
setOpen={setNewFace}
title={t("createFaceLibrary.new")}
onSave={(newName) => onTrainAttempt(newName)}
/>
<div className="absolute bottom-1 right-1 z-10 rounded-lg bg-black/50 px-2 py-1 text-xs text-white">
<TimeAgo className="text-white" time={data.timestamp * 1000} dense />
)}
<div
className={cn(
"relative flex cursor-pointer flex-col rounded-lg outline outline-[3px]",
selected
? "shadow-selected outline-selected"
: "outline-transparent duration-500",
)}
>
<div className="relative w-full overflow-hidden rounded-lg *:text-card-foreground">
<img
ref={imgRef}
className={cn("size-44", isMobile && "w-full")}
src={`${baseUrl}clips/faces/train/${data.filename}`}
onClick={(e) => onClick(data, e.metaKey || e.ctrlKey)}
/>
<div className="absolute bottom-1 right-1 z-10 rounded-lg bg-black/50 px-2 py-1 text-xs text-white">
<TimeAgo
className="text-white"
time={data.timestamp * 1000}
dense
/>
</div>
</div>
</div>
<div className="rounded-b-lg bg-card p-2">
<div className="flex w-full flex-row items-center justify-between gap-2">
<div className="flex flex-col items-start text-xs text-primary-variant">
<div className="capitalize">{data.name}</div>
<div
className={cn(
"",
scoreStatus == "match" && "text-success",
scoreStatus == "potential" && "text-orange-400",
scoreStatus == "unknown" && "text-danger",
)}
>
{Math.round(data.score * 100)}%
<div className="p-2">
<div className="flex w-full flex-row items-center justify-between gap-2">
<div className="flex flex-col items-start text-xs text-primary-variant">
<div className="capitalize">{data.name}</div>
<div
className={cn(
"",
scoreStatus == "match" && "text-success",
scoreStatus == "potential" && "text-orange-400",
scoreStatus == "unknown" && "text-danger",
)}
>
{Math.round(data.score * 100)}%
</div>
</div>
<div className="flex flex-row items-start justify-end gap-5 md:gap-4">
<Tooltip>
<DropdownMenu>
<DropdownMenuTrigger asChild>
<TooltipTrigger>
<AddFaceIcon className="size-5 cursor-pointer text-primary-variant hover:text-primary" />
</TooltipTrigger>
</DropdownMenuTrigger>
<DropdownMenuContent>
<DropdownMenuLabel>{t("trainFaceAs")}</DropdownMenuLabel>
<DropdownMenuItem
className="flex cursor-pointer gap-2 capitalize"
onClick={() => setNewFace(true)}
>
<LuPlus />
{t("createFaceLibrary.new")}
</DropdownMenuItem>
{faceNames.map((faceName) => (
<DropdownMenuItem
key={faceName}
className="flex cursor-pointer gap-2 capitalize"
onClick={() => onTrainAttempt(faceName)}
>
<LuScanFace />
{faceName}
</DropdownMenuItem>
))}
</DropdownMenuContent>
</DropdownMenu>
<TooltipContent>{t("trainFace")}</TooltipContent>
</Tooltip>
<Tooltip>
<TooltipTrigger>
<LuRefreshCw
className="size-5 cursor-pointer text-primary-variant hover:text-primary"
onClick={() => onReprocess()}
/>
</TooltipTrigger>
<TooltipContent>{t("button.reprocessFace")}</TooltipContent>
</Tooltip>
</div>
</div>
<div className="flex flex-row items-start justify-end gap-5 md:gap-4">
<Tooltip>
<DropdownMenu>
<DropdownMenuTrigger asChild>
<TooltipTrigger>
<AddFaceIcon className="size-5 cursor-pointer text-primary-variant hover:text-primary" />
</TooltipTrigger>
</DropdownMenuTrigger>
<DropdownMenuContent>
<DropdownMenuLabel>{t("trainFaceAs")}</DropdownMenuLabel>
{faceNames.map((faceName) => (
<DropdownMenuItem
key={faceName}
className="cursor-pointer capitalize"
onClick={() => onTrainAttempt(faceName)}
>
{faceName}
</DropdownMenuItem>
))}
</DropdownMenuContent>
</DropdownMenu>
<TooltipContent>{t("trainFace")}</TooltipContent>
</Tooltip>
<Tooltip>
<TooltipTrigger>
<LuRefreshCw
className="size-5 cursor-pointer text-primary-variant hover:text-primary"
onClick={() => onReprocess()}
/>
</TooltipTrigger>
<TooltipContent>{t("button.reprocessFace")}</TooltipContent>
</Tooltip>
</div>
</div>
</div>
</div>
</>
);
}
@ -643,6 +772,8 @@ type FaceGridProps = {
onDelete: (name: string, ids: string[]) => void;
};
function FaceGrid({ faceImages, pageToggle, onDelete }: FaceGridProps) {
const sortedFaces = useMemo(() => faceImages.sort().reverse(), [faceImages]);
return (
<div
className={cn(
@ -650,7 +781,7 @@ function FaceGrid({ faceImages, pageToggle, onDelete }: FaceGridProps) {
isDesktop ? "flex flex-wrap" : "grid grid-cols-2",
)}
>
{faceImages.map((image: string) => (
{sortedFaces.map((image: string) => (
<FaceImage
key={image}
name={pageToggle}

View File

@ -3,6 +3,7 @@ export type FaceLibraryData = {
};
export type RecognizedFaceData = {
filename: string;
timestamp: number;
eventId: string;
name: string;