feat: code quality improve and fix key section error (#1476)

Zeyi-Lin · claude · web-flow · commit cf1301d244ab · 2026-03-08T13:51:30.000+08:00
* refactor: optimize wandb sync code quality

- Replace fragile class name string matching with isinstance checks
- Simplify redundant if-else branches
- Move numpy import to function top to avoid repeated imports
- Replace print() with swanlab logging system

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;

* refactor: optimize tensorboard sync code quality

- Add proper None checks for global_step parameter
- Fix numpy import to avoid repeated imports
- Fix NCHW format handling to correctly extract first image
- Add error logging for image conversion failures
- Improve error handling with try-except blocks

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;

* del wandb summary

* fix section split

* section split unit test

---------

Co-authored-by: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/swanlab/converter/wb/wb_local_converter.py b/swanlab/converter/wb/wb_local_converter.py
@@ -351,8 +351,6 @@ def _pre_upload_cb(n):
         progress = None
         task_id = None
         record_count = 0
-        last_summary = {}
-        last_summary_step = 0
 
         if Progress is not None:
             progress = Progress(
@@ -474,20 +472,7 @@ def _pre_upload_cb(n):
                         _log_scalars_direct(scalar_dict, step)
                     if media_dict:
                         swanlab_run.log(media_dict, step=step)
-                    last_summary_step = step
                 del scalar_dict, media_dict
-            elif record_type == "summary":
-                # Accumulate into last_summary; only log once after the loop ends.
-                # wandb writes a summary record after every step, so calling log() here
-                # would result in N redundant log calls (N = number of steps).
-                for item in record_pb.summary.update:
-                    key = item.key or '/'.join(item.nested_key)
-                    if not key or key.startswith('_') or '/' in key:
-                        continue
-                    try:
-                        last_summary[key] = float(item.value_json)
-                    except (ValueError, TypeError):
-                        pass
 
             # GC every GC_INTERVAL records to reduce overhead
             record_count += 1
diff --git a/swanlab/data/run/key.py b/swanlab/data/run/key.py
@@ -165,10 +165,9 @@ def create_column(
         # PUBLIC 可选是否传递名称，如果 key 包含斜杠，则使用斜杠前的部分作为section的名称
         # CUSTOM 时如果 key 包含斜杠，则使用斜杠前的部分作为section的名称，并且将 section_type 设置为 PUBLIC
         if section_type in ["PUBLIC", "CUSTOM"]:
-            split_key = key.split("/")
-            if len(split_key) > 1 and split_key[0]:
-                # 如果key包含斜杠，则使用斜杠前的部分作为section的名称
-                result.section = split_key[0]
+            if "/" in key:
+                # 如果key包含斜杠，则使用最后一个斜杠前的部分作为section的名称
+                result.section = key.rsplit("/", 1)[0]
                 section_type: SectionType = "PUBLIC"
         else:
             result.section = None
diff --git a/swanlab/sync/tensorboard.py b/swanlab/sync/tensorboard.py
@@ -1,5 +1,6 @@
 import functools
 import swanlab
+from swanlab.log import swanlog as swl
 
 
 def _extract_args(args, kwargs, param_names):
@@ -69,7 +70,7 @@ def patched_add_scalar(self, *args, **kwargs):
         )
 
         data = {tag: scalar_value}
-        swanlab.log(data=data, step=int(global_step))
+        swanlab.log(data=data, step=int(global_step) if global_step is not None else None)
 
         return original_add_scalar(self, *args, **kwargs)
 
@@ -83,42 +84,53 @@ def patched_add_scalars(self, *args, **kwargs):
         )
         for dict_tag, value in scalar_value_dict.items():
             data = {f"{tag}/{dict_tag}": value}
-            swanlab.log(data=data, step=int(global_step))
+            swanlab.log(data=data, step=int(global_step) if global_step is not None else None)
         return original_add_scalars(self, *args, **kwargs)
 
     @functools.wraps(original_add_image)
     def patched_add_image(self, *args, **kwargs):
         if types_set is not None and 'image' not in types_set:
             return original_add_image(self, *args, **kwargs)
-        import numpy as np
+
+        try:
+            import numpy as np
+        except ImportError:
+            np = None
+
+        if np is None:
+            swl.warning("numpy not available, skipping image conversion")
+            return original_add_image(self, *args, **kwargs)
 
         tag, img_tensor, global_step, dataformats = _extract_args(
             args, kwargs, ['tag', 'img_tensor', 'global_step', 'dataformats']
         )
-        dataformats = dataformats or 'CHW'  # 设置默认值
-
-        # Convert to numpy array if it's a tensor
-        if hasattr(img_tensor, 'cpu'):
-            img_tensor = img_tensor.cpu()
-        if hasattr(img_tensor, 'numpy'):
-            img_tensor = img_tensor.numpy()
-
-        # Handle different input formats
-        if dataformats == 'CHW':
-            # Convert CHW to HWC for swanlab
-            img_tensor = np.transpose(img_tensor, (1, 2, 0))
-        elif dataformats == 'NCHW':
-            # Take first image if batch dimension exists and convert to HWC
-            img_tensor = np.transpose(img_tensor, (1, 2, 0))
-        elif dataformats == 'HW':
-            # Add channel dimension for grayscale
-            img_tensor = np.expand_dims(img_tensor, axis=-1)
-        elif dataformats == 'HWC':
-            # Already in correct format
-            pass
-
-        data = {tag: swanlab.Image(img_tensor)}
-        swanlab.log(data=data, step=int(global_step))
+        dataformats = dataformats or 'CHW'
+
+        try:
+            # Convert to numpy array if it's a tensor
+            if hasattr(img_tensor, 'cpu'):
+                img_tensor = img_tensor.cpu()
+            if hasattr(img_tensor, 'numpy'):
+                img_tensor = img_tensor.numpy()
+
+            # Handle different input formats
+            if dataformats == 'CHW':
+                # Convert CHW to HWC for swanlab
+                img_tensor = np.transpose(img_tensor, (1, 2, 0))
+            elif dataformats == 'NCHW':
+                # Take first image if batch dimension exists and convert to HWC
+                img_tensor = np.transpose(img_tensor[0], (1, 2, 0))
+            elif dataformats == 'HW':
+                # Add channel dimension for grayscale
+                img_tensor = np.expand_dims(img_tensor, axis=-1)
+            elif dataformats == 'HWC':
+                # Already in correct format
+                pass
+
+            data = {tag: swanlab.Image(img_tensor)}
+            swanlab.log(data=data, step=int(global_step) if global_step is not None else None)
+        except Exception as e:
+            swl.warning(f"Failed to convert image for tag '{tag}': {e}")
 
         return original_add_image(self, *args, **kwargs)
 
@@ -130,7 +142,7 @@ def patched_add_text(self, *args, **kwargs):
             args, kwargs, ['tag', 'text_string', 'global_step']
         )
         data = {tag: swanlab.Text(text_string)}
-        swanlab.log(data=data, step=int(global_step))
+        swanlab.log(data=data, step=int(global_step) if global_step is not None else None)
         return original_add_text(self, *args, **kwargs)
 
     def patched_close(self):
diff --git a/swanlab/sync/wandb.py b/swanlab/sync/wandb.py
@@ -1,4 +1,5 @@
 import swanlab
+from swanlab.log import swanlog as swl
 
 def _extract_args(args, kwargs, param_names):
     """
@@ -62,6 +63,8 @@ def sync_wandb(
     try:
         import wandb
         from wandb import sdk as wandb_sdk
+        from wandb import Image as WandbImage
+        WANDB_IMAGE_AVAILABLE = True
     except ImportError:
         raise ImportError("please install wandb first, command: `pip install wandb`")
     
@@ -94,9 +97,7 @@ def patched_init(*args, **kwargs):
         
         if wandb_run is False:
             kwargs["mode"] = "offline"
-            return original_init(*args, **kwargs)
-        else:
-            return original_init(*args, **kwargs)
+        return original_init(*args, **kwargs)
 
     def patched_config_update(self, *args, **kwargs):
         d, _ = _extract_args(args, kwargs, ['d', 'allow_val_change'])
@@ -107,45 +108,51 @@ def patched_config_update(self, *args, **kwargs):
 
     def patched_log(self, *args, **kwargs):
         data, step, commit, sync = _extract_args(args, kwargs, ['data', 'step', 'commit', 'sync'])
-        
+
         if data is None:
             return original_log(self, *args, **kwargs)
-        
+
+        # Import numpy once
+        try:
+            import numpy as np
+        except ImportError:
+            np = None
+
         # 处理数据，支持 wandb.Image
         processed_data = {}
         for key, value in data.items():
             if isinstance(value, (int, float, bool, str)):
                 # 标量类型直接保留
                 processed_data[key] = value
-            elif hasattr(value, '__class__') and value.__class__.__name__ == 'Image' and hasattr(value, 'image'):
+            elif WANDB_IMAGE_AVAILABLE and isinstance(value, WandbImage):
                 # 检测是否为 wandb.Image
                 try:
+                    if np is None:
+                        swl.warning(f"numpy not available, skipping wandb.Image conversion for key '{key}'")
+                        continue
                     # 获取 wandb.Image 的图像数据
                     if value.image is not None:
-                        # 将 PIL Image 转换为 numpy 数组
-                        import numpy as np
                         img_array = np.array(value.image)
-                        
-                        # 创建 swanlab.Image
                         caption = getattr(value, '_caption', None)
                         swanlab_image = swanlab.Image(img_array, caption=caption)
                         processed_data[key] = swanlab_image
                     else:
                         # 如果 image 为 None，尝试使用 _image
                         if hasattr(value, '_image') and value._image is not None:
-                            import numpy as np
                             img_array = np.array(value._image)
                             caption = getattr(value, '_caption', None)
                             swanlab_image = swanlab.Image(img_array, caption=caption)
                             processed_data[key] = swanlab_image
                 except Exception as e:
                     # 如果转换失败，记录错误但继续处理其他数据
-                    print(f"Warning: Failed to convert wandb.Image for key '{key}': {e}")
+                    swl.warning(f"Failed to convert wandb.Image for key '{key}': {e}")
                     continue
-            elif isinstance(value, list) and value and hasattr(value[0], '__class__') and value[0].__class__.__name__ == 'Image':
+            elif isinstance(value, list) and value and WANDB_IMAGE_AVAILABLE and isinstance(value[0], WandbImage):
                 # 检测是否为 wandb.Image 列表
                 try:
-                    import numpy as np
+                    if np is None:
+                        swl.warning(f"numpy not available, skipping wandb.Image list conversion for key '{key}'")
+                        continue
                     swanlab_images = []
                     for v in value:
                         if hasattr(v, 'image') and v.image is not None:
@@ -160,7 +167,7 @@ def patched_log(self, *args, **kwargs):
                         processed_data[key] = swanlab_images
                 except Exception as e:
                     # 如果转换失败，记录错误但继续处理其他数据
-                    print(f"Warning: Failed to convert wandb.Image list for key '{key}': {e}")
+                    swl.warning(f"Failed to convert wandb.Image list for key '{key}': {e}")
                     continue
         
         if processed_data:
diff --git a/test/unit/data/run/test_key.py b/test/unit/data/run/test_key.py
@@ -8,6 +8,7 @@
 import pytest
 
 from swanlab.data.run.key import SwanLabKey
+from swanlab.data.modules import Line, DataWrapper
 from swanlab.toolkit import ChartType
 from tutils.setup import UseMockRunState
 
@@ -196,3 +197,38 @@ def test_step_not_none(self):
             assert len(key_obj.steps) == 101, "Steps should contain one entry when step is provided"
             assert 1 in key_obj.steps, "Step 1 should be present in the steps"
             assert 101 not in key_obj.steps, "Step 10 should be present in the steps"
+
+    @pytest.mark.parametrize(
+        "key, expected_section",
+        [
+            ("loss/metrics/label1", "loss/metrics"),  # 多个斜杠，取最后一个斜杠前的部分
+            ("train/loss", "train"),                   # 单个斜杠
+            ("accuracy", "default"),                   # 无斜杠，保持默认
+            ("a/b/c/d", "a/b/c"),                     # 更多斜杠
+        ],
+    )
+    def test_section_split(self, key, expected_section):
+        """
+        测试 section 按最后一个斜杠分割的逻辑
+        """
+        with UseMockRunState() as run_state:
+            # 创建 DataWrapper 并解析
+            data = DataWrapper(key=key, data=[Line(0.5)])
+            data.parse(step=0, key=key)
+
+            # 创建 SwanLabKey 对象
+            swanlab_key = SwanLabKey(key=key, media_dir=run_state.store.media_dir, log_dir=run_state.store.log_dir)
+
+            # 创建列信息
+            column_info = swanlab_key.create_column(
+                key=key,
+                name=None,
+                column_class="CUSTOM",
+                column_config=None,
+                section_type="PUBLIC",
+                data=data,
+                num=0
+            )
+
+            # 验证 section_name
+            assert column_info.section_name == expected_section