Skip to content

Commit 3b8922f

Browse files
matteiusclaude
andcommitted
Loop retention cleanup until backlog is cleared, fix orphan detection perf (fixes #369)
Retention cleanup previously deleted at most 100 recordings per stream per 15-minute cycle. On large setups (44+ cameras, hundreds of thousands of files) the backlog grew faster than cleanup could process, causing silent storage leaks. - Loop time-based, quota, and tiered retention until all expired recordings are deleted (batches of 100), with a 300-second time budget per cycle - Rewrite get_orphaned_db_entries() to release db_mutex before access() calls, preventing O(N) filesystem I/O from blocking all DB writers - Increase MAX_ORPHANED_BATCH from 100 to 500 - Warn when stream count hits MAX_STREAMS_BATCH (64) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 2a9d4b0 commit 3b8922f

2 files changed

Lines changed: 229 additions & 131 deletions

File tree

src/database/db_recordings.c

Lines changed: 95 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1657,19 +1657,23 @@ int get_recordings_for_quota_enforcement(const char *stream_name,
16571657
* Get orphaned recording entries (DB entries without files on disk)
16581658
* Protected recordings are excluded (never considered orphaned).
16591659
*
1660+
* Uses a two-phase approach to avoid holding db_mutex during filesystem I/O:
1661+
* Phase 1: Under lock — get total count + fetch a limited batch of candidates
1662+
* Phase 2: Without lock — check access() on each candidate path
1663+
*
16601664
* @param recordings Array to fill with recording metadata
16611665
* @param max_count Maximum number of recordings to return
1662-
* @param total_checked If non-NULL, receives the total number of recordings checked.
1663-
* The caller can use this together with the return value to
1664-
* compute an orphan ratio for safety thresholding.
1666+
* @param total_checked If non-NULL, receives the total number of unprotected
1667+
* complete recordings in the database. The caller can
1668+
* use this together with the return value to compute an
1669+
* orphan ratio for safety thresholding.
16651670
* @return Number of orphaned recordings found, or -1 on error
16661671
*/
16671672
int get_orphaned_db_entries(recording_metadata_t *recordings, int max_count,
16681673
int *total_checked) {
16691674
int rc;
16701675
sqlite3_stmt *stmt;
16711676
int count = 0;
1672-
int checked = 0;
16731677

16741678
sqlite3 *db = get_db_handle();
16751679
pthread_mutex_t *db_mutex = get_db_mutex();
@@ -1684,87 +1688,124 @@ int get_orphaned_db_entries(recording_metadata_t *recordings, int max_count,
16841688
return -1;
16851689
}
16861690

1691+
// Phase 1a: Get total count of eligible recordings (fast, index-only)
1692+
int total_count = 0;
16871693
pthread_mutex_lock(db_mutex);
16881694

1689-
// Get all unprotected complete recordings and check if files exist.
1690-
// Protected recordings are never considered orphaned — they must be
1691-
// explicitly unprotected before any automatic cleanup can touch them.
1695+
const char *count_sql =
1696+
"SELECT COUNT(*) FROM recordings "
1697+
"WHERE is_complete = 1 AND protected = 0;";
1698+
1699+
rc = sqlite3_prepare_v2(db, count_sql, -1, &stmt, NULL);
1700+
if (rc != SQLITE_OK) {
1701+
log_error("Failed to prepare orphan count query: %s", sqlite3_errmsg(db));
1702+
pthread_mutex_unlock(db_mutex);
1703+
return -1;
1704+
}
1705+
if (sqlite3_step(stmt) == SQLITE_ROW) {
1706+
total_count = sqlite3_column_int(stmt, 0);
1707+
}
1708+
sqlite3_finalize(stmt);
1709+
1710+
if (total_checked) {
1711+
*total_checked = total_count;
1712+
}
1713+
1714+
if (total_count == 0) {
1715+
pthread_mutex_unlock(db_mutex);
1716+
return 0;
1717+
}
1718+
1719+
// Phase 1b: Fetch a limited batch of candidates (oldest first)
1720+
// We fetch up to max_count candidates and will check them for orphans.
16921721
const char *sql =
16931722
"SELECT id, stream_name, file_path, start_time, end_time, "
16941723
"size_bytes, width, height, fps, codec, is_complete, trigger_type "
16951724
"FROM recordings "
16961725
"WHERE is_complete = 1 "
16971726
"AND protected = 0 "
1698-
"ORDER BY start_time ASC;";
1727+
"ORDER BY start_time ASC "
1728+
"LIMIT ?;";
16991729

17001730
rc = sqlite3_prepare_v2(db, sql, -1, &stmt, NULL);
17011731
if (rc != SQLITE_OK) {
1702-
log_error("Failed to prepare statement: %s", sqlite3_errmsg(db));
1732+
log_error("Failed to prepare orphan candidate query: %s", sqlite3_errmsg(db));
17031733
pthread_mutex_unlock(db_mutex);
17041734
return -1;
17051735
}
17061736

1707-
// Keep iterating all rows even after max_count orphans are found so that
1708-
// 'checked' reflects the true total — the caller needs this for ratio checks.
1709-
while (sqlite3_step(stmt) == SQLITE_ROW) {
1710-
checked++;
1711-
const char *path = (const char *)sqlite3_column_text(stmt, 2);
1712-
1713-
// Check if file exists
1714-
if (path && count < max_count && access(path, F_OK) != 0) {
1715-
// File doesn't exist - this is an orphaned entry
1716-
recordings[count].id = (uint64_t)sqlite3_column_int64(stmt, 0);
1737+
sqlite3_bind_int(stmt, 1, max_count);
17171738

1718-
const char *stream = (const char *)sqlite3_column_text(stmt, 1);
1719-
if (stream) {
1720-
safe_strcpy(recordings[count].stream_name, stream, sizeof(recordings[count].stream_name), 0);
1721-
} else {
1722-
recordings[count].stream_name[0] = '\0';
1723-
}
1739+
// Read all candidates into the output buffer (reuse it as scratch space)
1740+
int candidates = 0;
1741+
while (sqlite3_step(stmt) == SQLITE_ROW && candidates < max_count) {
1742+
recordings[candidates].id = (uint64_t)sqlite3_column_int64(stmt, 0);
17241743

1725-
safe_strcpy(recordings[count].file_path, path, sizeof(recordings[count].file_path), 0);
1744+
const char *stream = (const char *)sqlite3_column_text(stmt, 1);
1745+
if (stream) {
1746+
safe_strcpy(recordings[candidates].stream_name, stream, sizeof(recordings[candidates].stream_name), 0);
1747+
} else {
1748+
recordings[candidates].stream_name[0] = '\0';
1749+
}
17261750

1727-
recordings[count].start_time = (time_t)sqlite3_column_int64(stmt, 3);
1751+
const char *path = (const char *)sqlite3_column_text(stmt, 2);
1752+
if (path) {
1753+
safe_strcpy(recordings[candidates].file_path, path, sizeof(recordings[candidates].file_path), 0);
1754+
} else {
1755+
recordings[candidates].file_path[0] = '\0';
1756+
}
17281757

1729-
if (sqlite3_column_type(stmt, 4) != SQLITE_NULL) {
1730-
recordings[count].end_time = (time_t)sqlite3_column_int64(stmt, 4);
1731-
} else {
1732-
recordings[count].end_time = 0;
1733-
}
1758+
recordings[candidates].start_time = (time_t)sqlite3_column_int64(stmt, 3);
17341759

1735-
recordings[count].size_bytes = (uint64_t)sqlite3_column_int64(stmt, 5);
1736-
recordings[count].width = sqlite3_column_int(stmt, 6);
1737-
recordings[count].height = sqlite3_column_int(stmt, 7);
1738-
recordings[count].fps = sqlite3_column_int(stmt, 8);
1760+
if (sqlite3_column_type(stmt, 4) != SQLITE_NULL) {
1761+
recordings[candidates].end_time = (time_t)sqlite3_column_int64(stmt, 4);
1762+
} else {
1763+
recordings[candidates].end_time = 0;
1764+
}
17391765

1740-
const char *codec = (const char *)sqlite3_column_text(stmt, 9);
1741-
if (codec) {
1742-
safe_strcpy(recordings[count].codec, codec, sizeof(recordings[count].codec), 0);
1743-
} else {
1744-
recordings[count].codec[0] = '\0';
1745-
}
1766+
recordings[candidates].size_bytes = (uint64_t)sqlite3_column_int64(stmt, 5);
1767+
recordings[candidates].width = sqlite3_column_int(stmt, 6);
1768+
recordings[candidates].height = sqlite3_column_int(stmt, 7);
1769+
recordings[candidates].fps = sqlite3_column_int(stmt, 8);
17461770

1747-
recordings[count].is_complete = sqlite3_column_int(stmt, 10) != 0;
1771+
const char *codec = (const char *)sqlite3_column_text(stmt, 9);
1772+
if (codec) {
1773+
safe_strcpy(recordings[candidates].codec, codec, sizeof(recordings[candidates].codec), 0);
1774+
} else {
1775+
recordings[candidates].codec[0] = '\0';
1776+
}
17481777

1749-
const char *trigger_type = (const char *)sqlite3_column_text(stmt, 11);
1750-
if (trigger_type) {
1751-
safe_strcpy(recordings[count].trigger_type, trigger_type, sizeof(recordings[count].trigger_type), 0);
1752-
} else {
1753-
safe_strcpy(recordings[count].trigger_type, "scheduled", sizeof(recordings[count].trigger_type), 0);
1754-
}
1778+
recordings[candidates].is_complete = sqlite3_column_int(stmt, 10) != 0;
17551779

1756-
count++;
1780+
const char *trigger_type = (const char *)sqlite3_column_text(stmt, 11);
1781+
if (trigger_type) {
1782+
safe_strcpy(recordings[candidates].trigger_type, trigger_type, sizeof(recordings[candidates].trigger_type), 0);
1783+
} else {
1784+
safe_strcpy(recordings[candidates].trigger_type, "scheduled", sizeof(recordings[candidates].trigger_type), 0);
17571785
}
1786+
1787+
candidates++;
17581788
}
17591789

17601790
sqlite3_finalize(stmt);
17611791
pthread_mutex_unlock(db_mutex);
1762-
1763-
if (total_checked) {
1764-
*total_checked = checked;
1792+
// --- db_mutex released: filesystem I/O below does not block DB writers ---
1793+
1794+
// Phase 2: Check which candidates are orphaned (file missing on disk)
1795+
// Compact orphaned entries to the front of the recordings array.
1796+
for (int i = 0; i < candidates; i++) {
1797+
if (recordings[i].file_path[0] != '\0' &&
1798+
access(recordings[i].file_path, F_OK) != 0) {
1799+
// File doesn't exist — orphaned entry
1800+
if (count != i) {
1801+
recordings[count] = recordings[i];
1802+
}
1803+
count++;
1804+
}
17651805
}
17661806

1767-
log_info("Checked %d recordings, found %d orphaned DB entries", checked, count);
1807+
log_info("Orphan check: %d candidates checked, %d orphaned (total recordings: %d)",
1808+
candidates, count, total_count);
17681809
return count;
17691810
}
17701811

0 commit comments

Comments
 (0)