|
22 | 22 | import os |
23 | 23 | import pickle |
24 | 24 | import sqlite3 |
| 25 | +import tempfile |
25 | 26 | import time |
26 | 27 | from pathlib import Path |
27 | 28 | from typing import Hashable, Iterable, Optional, Sequence |
|
35 | 36 | ) |
36 | 37 |
|
37 | 38 | __all__ = [ |
| 39 | + "FileStreamProgramCache", |
38 | 40 | "ProgramCacheResource", |
39 | 41 | "SQLiteProgramCache", |
40 | 42 | "make_program_cache_key", |
@@ -431,3 +433,187 @@ def _enforce_size_cap(self) -> None: |
431 | 433 | return |
432 | 434 | conn.execute("DELETE FROM entries WHERE key = ?", (k,)) |
433 | 435 | total -= sz |
| 436 | + |
| 437 | + |
| 438 | +# --------------------------------------------------------------------------- |
| 439 | +# FileStream backend |
| 440 | +# --------------------------------------------------------------------------- |
| 441 | + |
| 442 | + |
| 443 | +_FILESTREAM_SCHEMA_VERSION = 1 |
| 444 | +_ENTRIES_SUBDIR = "entries" |
| 445 | +_TMP_SUBDIR = "tmp" |
| 446 | +_SCHEMA_FILE = "SCHEMA_VERSION" |
| 447 | + |
| 448 | + |
| 449 | +class FileStreamProgramCache(ProgramCacheResource): |
| 450 | + """Persistent program cache backed by a directory of atomic files. |
| 451 | +
|
| 452 | + Designed for multi-process use: writes stage a temporary file and then |
| 453 | + :func:`os.replace` it into place, so concurrent readers never observe a |
| 454 | + partially-written entry. There is no cross-process LRU tracking; size |
| 455 | + enforcement is best-effort by file mtime. |
| 456 | +
|
| 457 | + Parameters |
| 458 | + ---------- |
| 459 | + path: |
| 460 | + Directory that owns the cache. Created if missing. |
| 461 | + max_size_bytes: |
| 462 | + Optional soft cap on total on-disk size. Enforced opportunistically |
| 463 | + on writes; concurrent writers may briefly exceed it. |
| 464 | + """ |
| 465 | + |
| 466 | + def __init__( |
| 467 | + self, |
| 468 | + path: str | os.PathLike, |
| 469 | + *, |
| 470 | + max_size_bytes: Optional[int] = None, |
| 471 | + ) -> None: |
| 472 | + if max_size_bytes is not None and max_size_bytes < 0: |
| 473 | + raise ValueError("max_size_bytes must be non-negative or None") |
| 474 | + self._root = Path(path) |
| 475 | + self._entries = self._root / _ENTRIES_SUBDIR |
| 476 | + self._tmp = self._root / _TMP_SUBDIR |
| 477 | + self._schema_path = self._root / _SCHEMA_FILE |
| 478 | + self._max_size_bytes = max_size_bytes |
| 479 | + self._root.mkdir(parents=True, exist_ok=True) |
| 480 | + self._entries.mkdir(exist_ok=True) |
| 481 | + self._tmp.mkdir(exist_ok=True) |
| 482 | + if not self._schema_path.exists(): |
| 483 | + self._schema_path.write_text(str(_FILESTREAM_SCHEMA_VERSION)) |
| 484 | + |
| 485 | + # -- key-to-path helpers ------------------------------------------------- |
| 486 | + |
| 487 | + def _path_for_key(self, key: object) -> Path: |
| 488 | + k = _as_key_bytes(key) |
| 489 | + hex_ = k.hex() if k else "empty" |
| 490 | + if len(hex_) < 3: |
| 491 | + hex_ = hex_.rjust(3, "0") |
| 492 | + return self._entries / hex_[:2] / hex_[2:] |
| 493 | + |
| 494 | + # -- mapping API --------------------------------------------------------- |
| 495 | + |
| 496 | + def __contains__(self, key: object) -> bool: |
| 497 | + return self._path_for_key(key).exists() |
| 498 | + |
| 499 | + def __getitem__(self, key: object) -> ObjectCode: |
| 500 | + path = self._path_for_key(key) |
| 501 | + try: |
| 502 | + data = path.read_bytes() |
| 503 | + except FileNotFoundError: |
| 504 | + raise KeyError(key) |
| 505 | + k = _as_key_bytes(key) |
| 506 | + try: |
| 507 | + record = pickle.loads(data) |
| 508 | + schema, stored_key, payload, _created_at = record |
| 509 | + if schema != _FILESTREAM_SCHEMA_VERSION: |
| 510 | + raise ValueError(f"unknown schema {schema}") |
| 511 | + if stored_key != k: |
| 512 | + raise ValueError("key mismatch") |
| 513 | + value = pickle.loads(payload) |
| 514 | + except Exception: |
| 515 | + # Corrupt entry -- delete and treat as a miss. |
| 516 | + try: |
| 517 | + path.unlink() |
| 518 | + except FileNotFoundError: |
| 519 | + pass |
| 520 | + raise KeyError(key) |
| 521 | + if not isinstance(value, ObjectCode): |
| 522 | + try: |
| 523 | + path.unlink() |
| 524 | + except FileNotFoundError: |
| 525 | + pass |
| 526 | + raise KeyError(key) |
| 527 | + return value |
| 528 | + |
| 529 | + def __setitem__(self, key: object, value: object) -> None: |
| 530 | + obj = _require_object_code(value) |
| 531 | + k = _as_key_bytes(key) |
| 532 | + payload = pickle.dumps(obj, protocol=_PICKLE_PROTOCOL) |
| 533 | + record = pickle.dumps( |
| 534 | + (_FILESTREAM_SCHEMA_VERSION, k, payload, time.time()), |
| 535 | + protocol=_PICKLE_PROTOCOL, |
| 536 | + ) |
| 537 | + |
| 538 | + target = self._path_for_key(key) |
| 539 | + target.parent.mkdir(parents=True, exist_ok=True) |
| 540 | + |
| 541 | + fd, tmp_name = tempfile.mkstemp(prefix="entry-", dir=self._tmp) |
| 542 | + tmp_path = Path(tmp_name) |
| 543 | + try: |
| 544 | + with os.fdopen(fd, "wb") as fh: |
| 545 | + fh.write(record) |
| 546 | + fh.flush() |
| 547 | + os.fsync(fh.fileno()) |
| 548 | + os.replace(tmp_path, target) |
| 549 | + except BaseException: |
| 550 | + try: |
| 551 | + tmp_path.unlink() |
| 552 | + except FileNotFoundError: |
| 553 | + pass |
| 554 | + raise |
| 555 | + self._enforce_size_cap() |
| 556 | + |
| 557 | + def __delitem__(self, key: object) -> None: |
| 558 | + path = self._path_for_key(key) |
| 559 | + try: |
| 560 | + path.unlink() |
| 561 | + except FileNotFoundError: |
| 562 | + raise KeyError(key) |
| 563 | + |
| 564 | + def __len__(self) -> int: |
| 565 | + count = 0 |
| 566 | + for _ in self._iter_entry_paths(): |
| 567 | + count += 1 |
| 568 | + return count |
| 569 | + |
| 570 | + def clear(self) -> None: |
| 571 | + for path in list(self._iter_entry_paths()): |
| 572 | + try: |
| 573 | + path.unlink() |
| 574 | + except FileNotFoundError: |
| 575 | + pass |
| 576 | + # Remove empty subdirs (best-effort; concurrent writers may re-create). |
| 577 | + if self._entries.exists(): |
| 578 | + for sub in sorted(self._entries.iterdir(), reverse=True): |
| 579 | + if sub.is_dir(): |
| 580 | + try: |
| 581 | + sub.rmdir() |
| 582 | + except OSError: |
| 583 | + pass |
| 584 | + |
| 585 | + # -- internals ----------------------------------------------------------- |
| 586 | + |
| 587 | + def _iter_entry_paths(self) -> Iterable[Path]: |
| 588 | + if not self._entries.exists(): |
| 589 | + return |
| 590 | + for sub in self._entries.iterdir(): |
| 591 | + if not sub.is_dir(): |
| 592 | + continue |
| 593 | + for entry in sub.iterdir(): |
| 594 | + if entry.is_file(): |
| 595 | + yield entry |
| 596 | + |
| 597 | + def _enforce_size_cap(self) -> None: |
| 598 | + if self._max_size_bytes is None: |
| 599 | + return |
| 600 | + entries = [] |
| 601 | + total = 0 |
| 602 | + for path in self._iter_entry_paths(): |
| 603 | + try: |
| 604 | + st = path.stat() |
| 605 | + except FileNotFoundError: |
| 606 | + continue |
| 607 | + entries.append((st.st_mtime, st.st_size, path)) |
| 608 | + total += st.st_size |
| 609 | + if total <= self._max_size_bytes: |
| 610 | + return |
| 611 | + entries.sort() # oldest mtime first |
| 612 | + for _mtime, size, path in entries: |
| 613 | + if total <= self._max_size_bytes: |
| 614 | + return |
| 615 | + try: |
| 616 | + path.unlink() |
| 617 | + total -= size |
| 618 | + except FileNotFoundError: |
| 619 | + pass |
0 commit comments