Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions embodichain/lab/sim/sensors/contact_sensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,9 @@ def _precompute_filter_ids(self, config: ContactSensorCfg):
)
continue
self.item_user_ids = torch.cat(
(self.item_user_ids, rigid_object.get_user_ids())
(self.item_user_ids, rigid_object.get_user_ids().to(self.device))
)
env_ids = torch.tensor(
env_ids = torch.as_tensor(
rigid_object._all_indices, dtype=torch.int32, device=self.device
)
self.item_env_ids = torch.cat((self.item_env_ids, env_ids))
Expand All @@ -192,21 +192,22 @@ def _precompute_filter_ids(self, config: ContactSensorCfg):
f"Link {link_name} not found in articulation {articulation_cfg.uid}."
)
continue
link_user_ids = articulation.get_user_ids(link_name).reshape(-1)
link_user_ids = articulation.get_user_ids(link_name).reshape(-1).to(self.device)
self.item_user_ids = torch.cat((self.item_user_ids, link_user_ids))
env_ids = torch.tensor(
env_ids = torch.as_tensor(
articulation._all_indices, dtype=torch.int32, device=self.device
)
self.item_env_ids = torch.cat((self.item_env_ids, env_ids))
# build user_id to env_id map
max_user_id = int(self.item_user_ids.max().item())
max_user_id = int(self.item_user_ids.max().item()) if len(self.item_user_ids) > 0 else -1
self.item_user_env_ids_map = torch.full(
size=(max_user_id + 1,),
fill_value=-1,
dtype=self.item_user_ids.dtype,
device=self.device,
)
self.item_user_env_ids_map[self.item_user_ids] = self.item_env_ids
if len(self.item_user_ids) > 0:
self.item_user_env_ids_map[self.item_user_ids] = self.item_env_ids

def _build_sensor_from_config(self, config: ContactSensorCfg, device: torch.device):
self._precompute_filter_ids(config)
Expand Down
124 changes: 119 additions & 5 deletions embodichain/lab/sim/sim_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
from __future__ import annotations

import os
import gc
import sys
import queue
import dexsim
import torch
import numpy as np
Expand Down Expand Up @@ -160,6 +162,8 @@ class SimulationManager:
"""

_instances = {}

_cleanup_queue: queue.Queue = queue.Queue()

SUPPORTED_SENSOR_TYPES = {
"Camera": Camera,
Expand All @@ -184,6 +188,11 @@ def __init__(
# Mark as initialized
self.instance_id = instance_id

# if not sim_config.render_cfg.is_legacy and instance_id > 0:
# logger.log_error(
# f"Ray Tracing rendering backend is only supported for single instance (instance_id=0). "
# )

# Cache paths
self._sim_cache_dir = SIM_CACHE_DIR
self._material_cache_dir = MATERIAL_CACHE_DIR
Expand Down Expand Up @@ -329,11 +338,10 @@ def num_envs(self) -> int:
"""
return len(self._arenas) if len(self._arenas) > 0 else 1

@cached_property
@property
def is_use_gpu_physics(self) -> bool:
"""Check if the physics simulation is using GPU."""
world_config = dexsim.get_world_config()
return self.device.type == "cuda" and world_config.enable_gpu_sim
return self.device.type == "cuda"

@cached_property
def is_rt_enabled(self) -> bool:
Expand Down Expand Up @@ -1783,15 +1791,121 @@ def export_usd(self, fpath: str) -> bool:
logger.log_error(f"Failed to export simulation scene to USD: {e}")
return False

@staticmethod
def wait_scene_destruction(timeout_ms: int = 10000) -> None:
"""A public helper to wait for the underlying C++ scenes (dexsim.World) to destruct completely."""
import dexsim
import gc

# Force garbage collection to break cycle references
gc.collect()

import time
wait_times = 0
scene_count = dexsim.get_world_num()
max_loops = timeout_ms // 10
while scene_count > 0 and wait_times < max_loops:
time.sleep(0.01)
scene_count = dexsim.get_world_num()
wait_times += 1
if wait_times % 50 == 0:
from embodichain.utils import logger
logger.log_info(f"Waiting for dexsim.World scenes to destruct. Remaining scenes: {scene_count}")
if scene_count > 0:
from embodichain.utils import logger
logger.log_warning(f"Scene destruction wait timeout, {scene_count} C++ scene(s) still alive!")

def destroy(self) -> None:
"""
不再原地由于深层局部变量残留导致 C++ 对象无法析构,
而是将自身打包成销毁任务,投递到清理队列,等待顶层进行延迟消费。
"""
self._is_pending_kill = True

# 转移真正的销毁逻辑到清理队列
SimulationManager._cleanup_queue.put(self._deferred_destroy)

def _deferred_destroy(self) -> None:
"""Destroy all simulated assets and release resources."""
# Clean up all gizmos before destroying the simulation
for uid in list(self._gizmos.keys()):
self.disable_gizmo(uid)

import sys, gc

self.clean_materials()

self._env.clean()
self._world.quit()
if self._env:
self._env.clean()
if self._world:
self._world.quit()

# REMOVE INSTANCE FROM POOL
instance_id = getattr(self, "instance_id", 0)
SimulationManager.reset(instance_id)

# Helper to aggressively decouple C++ wrapped objects
def _sever_wrapper_refs(obj_registry):
if not hasattr(self, obj_registry): return
registry = getattr(self, obj_registry)
if not isinstance(registry, dict): return
for uid, obj in registry.items():
if hasattr(obj, '_world'): obj._world = None
if hasattr(obj, '_ps'): obj._ps = None
if hasattr(obj, '_env'): obj._env = None
if hasattr(obj, '_entities'): obj._entities = []
registry.clear()

_sever_wrapper_refs('_gizmos')
_sever_wrapper_refs('_markers')
_sever_wrapper_refs('_rigid_objects')
_sever_wrapper_refs('_rigid_object_groups')
_sever_wrapper_refs('_soft_objects')
_sever_wrapper_refs('_cloth_objects')
_sever_wrapper_refs('_articulations')
_sever_wrapper_refs('_robots')
_sever_wrapper_refs('_sensors')
_sever_wrapper_refs('_lights')

# Explicitly clear Python references to trigger C++ object destructors
self._ps = None
self._env = None
self._world = None
self._default_plane = None

# Try to break ANY possible frame cycle
gc.collect()

self._visual_materials.clear()
self._texture_cache.clear()
self._arenas.clear()
self._markers.clear()
self._gizmos.clear()

SimulationManager.reset(self.instance_id)

# Forcefully drop underlying C++ object wrappers
self._env = None
self._world = None

gc.collect()

@staticmethod
def flush_cleanup_queue():
"""提供给顶层主循环 / Pytest Fixture 调用的出队执行器和同步栅栏"""
import gc
while not SimulationManager._cleanup_queue.empty():
task = SimulationManager._cleanup_queue.get_nowait()
try:
task()
except Exception as e:
from embodichain.utils import logger
logger.log_error(f"Error during delayed destruction: {e}")
pass

# 队列排空后,做一次顶层的全量 GC,彻底回收死掉但还没释放 RefPtr 的对象
gc.collect()

# 此时再等待 C++ 的 Scene 归零,因为栈是顶层,绝对不会卡死
SimulationManager.wait_scene_destruction()

2 changes: 1 addition & 1 deletion embodichain/lab/sim/utility/sim_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
ArticulationFlag,
LoadOption,
RigidBodyShape,
# SDFConfig,
SDFConfig,
PhysicalAttr,
)
from dexsim.engine import Articulation
Expand Down
19 changes: 18 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def pytest_addoption(parser):
parser.addoption(
"--renderer",
action="store",
default=None,
default="hybrid",
help="Specify the renderer backend: legacy, hybrid, or fast-rt",
)

Expand All @@ -39,3 +39,20 @@ def pytest_configure(config):
from embodichain.lab.sim import cfg

cfg.DEFAULT_RENDERER = renderer

@pytest.fixture(autouse=True, scope="function")
def wait_scene_destruction_after_test():
"""Ensure C++ engine scenes are fully destructed globally after each test exits."""
yield

# [改良方案 - 延迟销毁]: 顶层出队与报错清理。
# Pytest 会在失败时保留 Traceback,打断异常栈可以确保栈上的临时对象的局部变量能被垃圾回收。
import sys
import gc
sys.last_traceback = None
sys.last_value = None
sys.last_type = None

# [核心修补]: 统一消费清理队列内的 SimManager 和相关对象
from embodichain.lab.sim.sim_manager import SimulationManager
SimulationManager.flush_cleanup_queue()
4 changes: 4 additions & 0 deletions tests/sim/objects/test_articulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,10 @@ def test_get_joint_drive_with_joint_ids(self):
def teardown_method(self):
"""Clean up resources after each test method."""
self.sim.destroy()
import embodichain.lab.sim as om
om.SimulationManager.flush_cleanup_queue()
self.__dict__.clear()
import gc; gc.collect()


class TestArticulationCPU(BaseArticulationTest):
Expand Down
4 changes: 4 additions & 0 deletions tests/sim/objects/test_cloth_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ def test_get_current_vertex_positions(self):
def teardown_method(self):
"""Clean up resources after each test method."""
self.sim.destroy()
import embodichain.lab.sim as om
om.SimulationManager.flush_cleanup_queue()
self.__dict__.clear()
import gc; gc.collect()


class TestSoftObjectCUDA(BaseSoftObjectTest):
Expand Down
4 changes: 4 additions & 0 deletions tests/sim/objects/test_light.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,7 @@ def test_set_and_get_local_pose_matrix_and_vector(self):
def teardown_method(self):
"""Clean up resources after each test method."""
self.sim.destroy()
import embodichain.lab.sim as om
om.SimulationManager.flush_cleanup_queue()
self.__dict__.clear()
import gc; gc.collect()
6 changes: 5 additions & 1 deletion tests/sim/objects/test_rigid_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def setup_simulation(self, sim_device):
headless=True, sim_device=sim_device, num_envs=NUM_ARENAS
)
self.sim = SimulationManager(config)

self.sim.enable_physics(False)
duck_path = get_data_path(DUCK_PATH)
assert os.path.isfile(duck_path)
table_path = get_data_path(TABLE_PATH)
Expand Down Expand Up @@ -581,6 +581,10 @@ def test_misc_properties(self):
def teardown_method(self):
"""Clean up resources after each test method."""
self.sim.destroy()
import embodichain.lab.sim as om
om.SimulationManager.flush_cleanup_queue()
self.__dict__.clear()
import gc; gc.collect()


class TestRigidObjectCPU(BaseRigidObjectTest):
Expand Down
4 changes: 4 additions & 0 deletions tests/sim/objects/test_rigid_object_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,10 @@ def test_set_visible(self):
def teardown_method(self):
"""Clean up resources after each test method."""
self.sim.destroy()
import embodichain.lab.sim as om
om.SimulationManager.flush_cleanup_queue()
self.__dict__.clear()
import gc; gc.collect()


class TestRigidObjectGroupCPU(BaseRigidObjectGroupTest):
Expand Down
38 changes: 29 additions & 9 deletions tests/sim/objects/test_robot.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,13 @@

# Base test class for CPU and CUDA
class BaseRobotTest:
def setup_simulation(self, sim_device):
@classmethod
def setup_simulation(cls, sim_device):
if hasattr(cls, "sim"):
return
# Set up simulation with specified device (CPU or CUDA)
config = SimulationManagerCfg(headless=True, sim_device=sim_device, num_envs=10)
self.sim = SimulationManager(config)
cls.sim = SimulationManager(config)

cfg = DexforceW1Cfg.from_dict(
{
Expand All @@ -62,11 +65,11 @@ def setup_simulation(self, sim_device):
}
)

self.robot: Robot = self.sim.add_robot(cfg=cfg)
cls.robot: Robot = cls.sim.add_robot(cfg=cfg)

# Initialize GPU physics if needed
if sim_device == "cuda" and getattr(self.sim, "is_use_gpu_physics", False):
self.sim.init_gpu_physics()
if sim_device == "cuda" and getattr(cls.sim, "is_use_gpu_physics", False):
cls.sim.init_gpu_physics()

def test_get_joint_ids(self):
left_joint_ids = self.robot.get_joint_ids("left_arm")
Expand Down Expand Up @@ -286,8 +289,17 @@ def test_robot_cfg_merge(self):
), "Solver config merge failed."

def teardown_method(self):
"""Clean up resources after each test method."""
self.sim.destroy()
pass

@classmethod
def teardown_class(cls):
"""Clean up resources after each test class."""
if hasattr(cls, "sim"):
cls.sim.destroy()
import embodichain.lab.sim as om
om.SimulationManager.flush_cleanup_queue()
del cls.sim
import gc; gc.collect()

def test_set_physical_visible(self):
self.robot.set_physical_visible(
Expand All @@ -307,13 +319,21 @@ def test_set_physical_visible(self):

class TestRobotCPU(BaseRobotTest):
def setup_method(self):
self.setup_simulation("cpu")
pass

@classmethod
def setup_class(cls):
cls.setup_simulation("cpu")


@pytest.mark.skip(reason="Skipping CUDA tests temporarily")
class TestRobotCUDA(BaseRobotTest):
def setup_method(self):
self.setup_simulation("cuda")
pass

@classmethod
def setup_class(cls):
cls.setup_simulation("cuda")


if __name__ == "__main__":
Expand Down
4 changes: 4 additions & 0 deletions tests/sim/objects/test_soft_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ def test_remove(self):
def teardown_method(self):
"""Clean up resources after each test method."""
self.sim.destroy()
import embodichain.lab.sim as om
om.SimulationManager.flush_cleanup_queue()
self.__dict__.clear()
import gc; gc.collect()


class TestSoftObjectCUDA(BaseSoftObjectTest):
Expand Down
Loading