Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 48 additions & 1 deletion simplyblock_core/controllers/snapshot_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,18 @@ def add(lvol_id, snapshot_name, backup=False, lock=True):
logger.error(e)
return False, str(e)

# Reject snapshot creation on an lvol that is being deleted. SPDK's
# blobstore reuses the lvol's metadata for the snapshot's parent
# pointer; if the lvol is mid-delete (async or sync), creating a
# snapshot from it can leave the resulting snapshot's parent_id
# dangling and produce the open_ref/clone-entries inconsistency
# that makes the snapshot undeletable until node restart.
if lvol.status == LVol.STATUS_IN_DELETION:
msg = (f"Cannot create snapshot from lvol {lvol_id}: "
f"lvol is in deletion")
logger.error(msg)
return False, msg

# Block during restart Phase 5
try:
snode = db_controller.get_storage_node_by_id(lvol.node_id)
Expand Down Expand Up @@ -409,9 +421,28 @@ def delete(snapshot_uuid, force_delete=False):
return True
return False

# A clone counts as "still blocking the snapshot" when either it's
# alive (status != IN_DELETION) OR its SPDK-side delete hasn't
# completed yet (deletion_status not set). The previous code only
# excluded IN_DELETION clones unconditionally — that allowed the
# snapshot's hard-delete to fire while SPDK still held the clone's
# bdev open, returning EBUSY (-16) "Cannot remove snapshot because
# it is open" and ultimately producing the open_ref / no-clone-
# entries metadata inconsistency that requires a node restart.
# Now we soft-delete the snapshot in that case; the clone's own
# delete-completion path will re-trigger snapshot_controller.delete
# once SPDK has actually removed the bdev (deletion_status set).
clones = []
for lvol in db_controller.get_lvols(snode.cluster_id):
if lvol.cloned_from_snap and lvol.cloned_from_snap == snapshot_uuid and lvol.status != LVol.STATUS_IN_DELETION:
if not lvol.cloned_from_snap or lvol.cloned_from_snap != snapshot_uuid:
continue
if lvol.status != LVol.STATUS_IN_DELETION:
clones.append(lvol)
continue
# IN_DELETION: only treat as gone if SPDK delete already
# completed for this clone (data-plane removed, just awaiting
# DB cleanup). Otherwise it's still in flight and blocks us.
if not getattr(lvol, "deletion_status", None):
clones.append(lvol)

if len(clones) >= 1:
Expand Down Expand Up @@ -501,6 +532,22 @@ def clone(snapshot_id, clone_name, new_size=0, pvc_name=None, pvc_namespace=None
logger.error(e)
return False, str(e)

# Reject cloning a snapshot that is in pending deletion. If a prior
# clone-create failed (e.g. an SPDK duplicate-name collision on the
# CLN_xxxx bdev) the mgmt layer issues an async snapshot delete; if
# we let a fresh clone slip through that window, SPDK ends up with
# the snapshot's parent metadata partially overwritten by the new
# clone's lineage. The later sync delete then leaves the original
# snapshot with non-zero open_ref but no clone entries, producing
# the "Cannot remove snapshot because it is open" / EBUSY (-16)
# state that requires a node restart to clear.
if snap.deleted or snap.status == SnapShot.STATUS_IN_DELETION:
msg = (f"Cannot clone snapshot {snapshot_id}: "
f"snapshot is in deletion (deleted={snap.deleted}, "
f"status={snap.status})")
logger.error(msg)
return False, msg

try:
pool = db_controller.get_pool_by_id(snap.lvol.pool_uuid)
except KeyError:
Expand Down
62 changes: 56 additions & 6 deletions simplyblock_core/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,40 @@ def sum_records(records):
return total


_BDEV_NAME_NUMERIC_SUFFIX = re.compile(r'(?:^|[/_])(\d+)\s*$')


def _used_bdev_name_numbers(db_controller):
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function can be written like this:

def _used_bdev_name_numbers(db_controller):
    used = set()
    for lvol in db_controller.get_lvols():
        used.add(lvol.vuid)

    for snap in db_controller.get_snapshots():
        used.add(snap.vuid)
    return used

"""Collect all numeric suffixes already used in lvol/snapshot bdev
names cluster-wide (e.g. ``LVS_x/CLN_6900`` -> 6900,
``LVS_x/SNAP_77047`` -> 77047). The clone- and snapshot-create
paths build their bdev name as ``CLN_<vuid>``/``SNAP_<vuid>``
where ``<vuid>`` comes from the random helpers below; if a fresh
random number lands on an already-used suffix SPDK rejects the
create with ``lvol with name ... already exists``. The mgmt
fallout from that failure is what produced the stuck-snapshot
metadata-inconsistency incident (parent's open_ref non-zero,
clone entries empty) so we dedupe up-front.
"""
used = set()
for lvol in db_controller.get_lvols():
for name in (getattr(lvol, "lvol_bdev", None),
getattr(lvol, "top_bdev", None)):
if not name:
continue
m = _BDEV_NAME_NUMERIC_SUFFIX.search(name)
if m:
used.add(int(m.group(1)))
for snap in db_controller.get_snapshots():
name = getattr(snap, "snap_bdev", None)
if not name:
continue
m = _BDEV_NAME_NUMERIC_SUFFIX.search(name)
if m:
used.add(int(m.group(1)))
return used


def get_random_vuid():
from simplyblock_core.db_controller import DBController
db_controller = DBController()
Expand All @@ -377,9 +411,17 @@ def get_random_vuid():
for lvol in db_controller.get_lvols():
used_vuids.append(lvol.vuid)

r = 1 + int(random.random() * 10000)
while r in used_vuids:
r = 1 + int(random.random() * 10000)
used = set(used_vuids) | _used_bdev_name_numbers(db_controller)

# 1M range + dedupe against existing bdev-name numeric suffixes
# (CLN_xxxx / LVOL_xxxx / SNAP_xxxx). With ~10k lvols+snaps the
# 10k-only legacy range hit ~50% birthday-collision probability;
# 1M brings that to <1%. Combined with the dedupe set we avoid the
# SPDK ``lvol with name already exists`` rejection that triggered
# the snapshot-delete-in-flight metadata corruption.
r = 1 + int(random.random() * 1000000)
while r in used:
r = 1 + int(random.random() * 1000000)
return r


Expand Down Expand Up @@ -1272,12 +1314,20 @@ def addNvmeDevices(rpc_client, snode, devs):
def get_random_snapshot_vuid():
from simplyblock_core.db_controller import DBController
db_controller = DBController()
used_vuids = []
used_vuids = set()
for snap in db_controller.get_snapshots():
used_vuids.append(snap.vuid)
used_vuids.add(snap.vuid)

# Same dedupe rationale as ``get_random_vuid``: avoid colliding with
# any existing CLN_/LVOL_/SNAP_ bdev-name numeric suffix so the
# SPDK-side create cannot reject with "lvol with name already
# exists". That rejection in the clone path is what triggered the
# mgmt-side async snapshot delete + reuse-during-deletion sequence
# producing stuck snapshots (incident: aws_dual_soak 2026-04-30).
used = used_vuids | _used_bdev_name_numbers(db_controller)

r = 1 + int(random.random() * 1000000)
while r in used_vuids:
while r in used:
r = 1 + int(random.random() * 1000000)
return r

Expand Down
Loading
Loading