From 3512b969407fd3fadc3fd615b99985802df45c3a Mon Sep 17 00:00:00 2001 From: Christian Berendt Date: Thu, 2 Jul 2026 14:28:15 +0200 Subject: [PATCH] Create per-device ZTP firmware symlink from sonic_parameters.version The ZTP firmware install now uses a dynamic-url built from (osism/ansible-collection-services#2131), so each switch fetches sonic-broadcom-enterprise-base_.bin during ZTP. Have sync_sonic create that per-device name as a relative symlink to the version-specific image sonic-broadcom-enterprise-base_.bin, driven by the device's sonic_parameters.version custom field. - Add SONIC_FIRMWARE_DIR/PREFIX/SUFFIX/IDENTIFIER settings mirroring the ansible httpd role defaults; SONIC_FIRMWARE_DIR defaults to the same httpd-served directory as the config exports. - Add export_firmware_link(), reconciling the symlink on every run and skipping devices without a configured version. The target image is provided out of band, so a dangling link until it arrives is expected. - Read sonic_parameters.version in sync_sonic and pass it to the exporter per device; config_db.json and the CONFIG DB VERSION are untouched. Assisted-by: Claude:claude-opus-4-8 Signed-off-by: Christian Berendt --- osism/settings.py | 18 ++ osism/tasks/conductor/sonic/exporter.py | 96 ++++++++ osism/tasks/conductor/sonic/sync.py | 61 +++-- .../tasks/conductor/sonic/test_exporter.py | 216 +++++++++++++++++- tests/unit/tasks/conductor/sonic/test_sync.py | 75 +++++- 5 files changed, 437 insertions(+), 29 deletions(-) diff --git a/osism/settings.py b/osism/settings.py index 51b746ff..f6e81cff 100644 --- a/osism/settings.py +++ b/osism/settings.py @@ -70,6 +70,24 @@ def read_secret(secret_name): SONIC_EXPORT_SUFFIX = os.getenv("SONIC_EXPORT_SUFFIX", "_config_db.json") SONIC_EXPORT_IDENTIFIER = os.getenv("SONIC_EXPORT_IDENTIFIER", "serial-number") +# SONiC ZTP firmware configuration +# +# The ZTP firmware install uses a dynamic-url built from +# (see osism/ansible-collection-services#2131), +# so every switch fetches its firmware image via a per-device name during ZTP. +# sync_sonic creates that per-device name as a symlink to the version-specific +# image , driven by the device's +# sonic_parameters.version custom field. The defaults mirror the ansible +# httpd role and place the links in the same httpd-served directory as the +# config exports (SONIC_EXPORT_DIR), which stays the single source of truth +# for the base export path unless a separate firmware directory is set. +SONIC_FIRMWARE_DIR = os.getenv("SONIC_FIRMWARE_DIR", SONIC_EXPORT_DIR) +SONIC_FIRMWARE_PREFIX = os.getenv( + "SONIC_FIRMWARE_PREFIX", "sonic-broadcom-enterprise-base_" +) +SONIC_FIRMWARE_SUFFIX = os.getenv("SONIC_FIRMWARE_SUFFIX", ".bin") +SONIC_FIRMWARE_IDENTIFIER = os.getenv("SONIC_FIRMWARE_IDENTIFIER", "serial-number") + NETBOX_SECONDARIES = ( os.getenv("NETBOX_SECONDARIES", read_secret("NETBOX_SECONDARIES")) or "[]" diff --git a/osism/tasks/conductor/sonic/exporter.py b/osism/tasks/conductor/sonic/exporter.py index f8995880..0ca5ff90 100644 --- a/osism/tasks/conductor/sonic/exporter.py +++ b/osism/tasks/conductor/sonic/exporter.py @@ -285,3 +285,99 @@ def export_config_to_file(device, config): except Exception as e: logger.error(f"Failed to export config for device {device.name}: {e}") raise + + +def export_firmware_link(device, version): + """Create the per-device SONiC ZTP firmware symlink for a device. + + PR osism/ansible-collection-services#2131 switched the ZTP firmware + install to a dynamic-url built from ```` + (identifier ``serial-number`` by default), so during ZTP each switch + fetches ````. This function creates that + per-device name as a *relative* symlink to the version-specific image + ```` in the same firmware directory, using the + version from the device's ``sonic_parameters.version`` custom field. + + The symlink is reconciled on every call: a missing, stale, or wrongly + pointed link is repaired even when nothing else changed. The target image + is provided out of band (e.g. downloaded by metalbox), so the link may be + dangling until that image is present -- creating a symlink does not require + the target to exist. + + Args: + device: NetBox device object + version: SONiC firmware version string (e.g. "4.4.2"), or a falsy + value when the device has no ``sonic_parameters.version`` set. + + Returns: + bool: True if the symlink was created or repointed, False if it + already pointed at the correct target or firmware management was + skipped (no version configured). + + Raises: + Exception: If reconciling the symlink fails, so a failed link is + distinguishable from "no changes". + """ + if not version: + logger.debug( + f"No SONiC firmware version configured for device {device.name}, " + "skipping firmware symlink" + ) + return False + + try: + firmware_dir = settings.SONIC_FIRMWARE_DIR + prefix = settings.SONIC_FIRMWARE_PREFIX + suffix = settings.SONIC_FIRMWARE_SUFFIX + identifier_type = settings.SONIC_FIRMWARE_IDENTIFIER + + os.makedirs(firmware_dir, exist_ok=True) + + # Determine the per-device identifier the switch requests during ZTP + if identifier_type == "serial-number": + identifier = ( + device.serial if hasattr(device, "serial") and device.serial else None + ) + if not identifier: + logger.warning( + f"Serial number not found for device {device.name}, " + "falling back to hostname for firmware symlink" + ) + identifier = get_device_hostname(device) + else: + identifier = get_device_hostname(device) + + link_name = f"{prefix}{identifier}{suffix}" + link_path = os.path.join(firmware_dir, link_name) + # Relative target within firmware_dir, matching the served httpd layout + target_name = f"{prefix}{version}{suffix}" + + if link_name == target_name: + # The device identifier equals the version: a symlink here would + # point to itself and shadow the actual image + logger.debug( + f"Skipping firmware symlink for device {device.name}: " + "link name equals version image name" + ) + return False + + if os.path.islink(link_path) and os.readlink(link_path) == target_name: + logger.debug( + f"Firmware symlink {link_path} already points to {target_name}" + ) + return False + + if os.path.exists(link_path) or os.path.islink(link_path): + logger.debug(f"Removing existing firmware file/symlink: {link_path}") + os.remove(link_path) + + os.symlink(target_name, link_path) + logger.info( + f"Created firmware symlink {link_path} -> {target_name} " + f"for device {device.name}" + ) + return True + + except Exception as e: + logger.error(f"Failed to create firmware symlink for device {device.name}: {e}") + raise diff --git a/osism/tasks/conductor/sonic/sync.py b/osism/tasks/conductor/sonic/sync.py index 8f41c1b7..4d471478 100644 --- a/osism/tasks/conductor/sonic/sync.py +++ b/osism/tasks/conductor/sonic/sync.py @@ -18,10 +18,27 @@ _load_metalbox_devices_cache, ) from .constants import DEFAULT_SONIC_ROLES, SUPPORTED_HWSKUS -from .exporter import save_config_to_netbox, export_config_to_file +from .exporter import ( + save_config_to_netbox, + export_config_to_file, + export_firmware_link, +) from .cache import clear_interface_cache, get_interface_cache_stats +def _get_sonic_parameter(device, key): + """Return ``sonic_parameters[key]`` from a device's custom fields, or None. + + Centralises the nested ``custom_fields -> sonic_parameters -> `` + lookup so the per-field reads in ``sync_sonic`` (hwsku, config_version, + version) stay short and consistent. Missing custom fields, a missing or + empty ``sonic_parameters``, or a missing key all yield None. + """ + custom_fields = getattr(device, "custom_fields", None) or {} + sonic_parameters = custom_fields.get("sonic_parameters") or {} + return sonic_parameters.get(key) + + def sync_sonic(device_name=None, task_id=None, show_diff=True): """Sync SONiC configurations for eligible devices. @@ -156,31 +173,25 @@ def sync_sonic(device_name=None, task_id=None, show_diff=True): # Generate SONIC configuration for each device for device in devices: - # Get HWSKU from sonic_parameters custom field, default to None - hwsku = None - if ( - hasattr(device, "custom_fields") - and "sonic_parameters" in device.custom_fields - and device.custom_fields["sonic_parameters"] - and "hwsku" in device.custom_fields["sonic_parameters"] - ): - hwsku = device.custom_fields["sonic_parameters"]["hwsku"] - - # Get config_version from sonic_parameters custom field, default to None - config_version = None - if ( - hasattr(device, "custom_fields") - and "sonic_parameters" in device.custom_fields - and device.custom_fields["sonic_parameters"] - and "config_version" in device.custom_fields["sonic_parameters"] - ): - config_version = device.custom_fields["sonic_parameters"][ - "config_version" - ] + # Read the per-device SONiC settings from the sonic_parameters + # custom field (all default to None when unset). + hwsku = _get_sonic_parameter(device, "hwsku") + + # config_version overrides the generated CONFIG DB VERSION + config_version = _get_sonic_parameter(device, "config_version") + if config_version: logger.debug( f"Device {device.name} has custom config_version: {config_version}" ) + # version drives the per-device ZTP firmware symlink (see + # export_firmware_link), independent of config_version above. + version = _get_sonic_parameter(device, "version") + if version: + logger.debug( + f"Device {device.name} has SONiC firmware version: {version}" + ) + # Skip devices without HWSKU if not hwsku: logger.debug(f"Skipping device {device.name}: no HWSKU configured") @@ -237,7 +248,11 @@ def sync_sonic(device_name=None, task_id=None, show_diff=True): # Export the generated configuration to local file (only if changed) file_changed = export_config_to_file(device, sonic_config) - if netbox_changed or file_changed: + # Reconcile the per-device ZTP firmware symlink (only when a + # sonic_parameters.version is configured) + firmware_changed = export_firmware_link(device, version) + + if netbox_changed or file_changed or firmware_changed: logger.info(f"Configuration updated for device {device.name}") else: logger.info(f"No configuration changes for device {device.name}") diff --git a/tests/unit/tasks/conductor/sonic/test_exporter.py b/tests/unit/tasks/conductor/sonic/test_exporter.py index 5185a213..0eb495ac 100644 --- a/tests/unit/tasks/conductor/sonic/test_exporter.py +++ b/tests/unit/tasks/conductor/sonic/test_exporter.py @@ -3,14 +3,16 @@ """Unit tests for ``osism.tasks.conductor.sonic.exporter``. Covers ``save_config_to_netbox`` (NetBox local-context persistence with diff -checking + journal logging) and ``export_config_to_file`` (on-disk export with -diff checking, filename selection, and the serial-number→hostname symlink). +checking + journal logging), ``export_config_to_file`` (on-disk export with +diff checking, filename selection, and the serial-number→hostname symlink), and +``export_firmware_link`` (the per-device ZTP firmware symlink pointing at the +version-specific image). ``save_config_to_netbox`` reuses the shared ``mock_nb`` fixture (it patches ``osism.utils.nb``, which ``exporter.utils.nb`` resolves to). The file-export -tests drive a real filesystem under ``tmp_path`` where that reads more clearly -than asserting on mocked ``os`` calls, and fall back to patching ``os.*`` only -to inject the two error paths (symlink / makedirs failures). +and firmware-link tests drive a real filesystem under ``tmp_path`` where that +reads more clearly than asserting on mocked ``os`` calls, and fall back to +patching ``os.*`` only to inject the error paths (symlink / makedirs failures). """ import json @@ -22,6 +24,7 @@ from osism.tasks.conductor.sonic.exporter import ( export_config_to_file, + export_firmware_link, save_config_to_netbox, ) @@ -560,3 +563,206 @@ def test_export_makedirs_failure_raises( export_config_to_file(device, {"PORT": {}}) assert _has_log(loguru_logs, "ERROR", "Failed to export config") + + +# --------------------------------------------------------------------------- +# export_firmware_link +# --------------------------------------------------------------------------- + +_FW_PREFIX = "sonic-broadcom-enterprise-base_" +_FW_SUFFIX = ".bin" + + +@pytest.fixture +def firmware_settings(mocker): + """Patch the four ``SONIC_FIRMWARE_*`` settings the firmware link reads.""" + + def _set( + firmware_dir, + identifier="serial-number", + prefix=_FW_PREFIX, + suffix=_FW_SUFFIX, + ): + mocker.patch( + "osism.tasks.conductor.sonic.exporter.settings.SONIC_FIRMWARE_DIR", + str(firmware_dir), + ) + mocker.patch( + "osism.tasks.conductor.sonic.exporter.settings.SONIC_FIRMWARE_PREFIX", + prefix, + ) + mocker.patch( + "osism.tasks.conductor.sonic.exporter.settings.SONIC_FIRMWARE_SUFFIX", + suffix, + ) + mocker.patch( + "osism.tasks.conductor.sonic.exporter.settings.SONIC_FIRMWARE_IDENTIFIER", + identifier, + ) + + return _set + + +# --- version gating --------------------------------------------------------- + + +@pytest.mark.parametrize("version", ["", None]) +def test_firmware_no_version_skips( + tmp_path, firmware_settings, patch_hostname, loguru_logs, version +): + """No ``sonic_parameters.version`` → firmware management is skipped and no + link is created.""" + firmware_settings(tmp_path) + device = SimpleNamespace(name="sw-1", serial="ABC123") + + assert export_firmware_link(device, version) is False + + assert list(tmp_path.iterdir()) == [] + assert _has_log(loguru_logs, "DEBUG", "No SONiC firmware version configured") + + +# --- identifier / target naming -------------------------------------------- + + +def test_firmware_serial_identifier_links_to_version_image( + tmp_path, firmware_settings, patch_hostname +): + firmware_settings(tmp_path, identifier="serial-number") + device = SimpleNamespace(name="sw-1", serial="ABC123") + + assert export_firmware_link(device, "4.4.2") is True + + link = tmp_path / f"{_FW_PREFIX}ABC123{_FW_SUFFIX}" + assert link.is_symlink() + assert os.readlink(link) == f"{_FW_PREFIX}4.4.2{_FW_SUFFIX}" + + +def test_firmware_hostname_identifier_uses_hostname( + tmp_path, firmware_settings, patch_hostname +): + firmware_settings(tmp_path, identifier="hostname") + device = SimpleNamespace(name="sw-1", serial="ABC123") + + assert export_firmware_link(device, "4.4.2") is True + + link = tmp_path / f"{_FW_PREFIX}sw-1{_FW_SUFFIX}" + assert link.is_symlink() + assert os.readlink(link) == f"{_FW_PREFIX}4.4.2{_FW_SUFFIX}" + + +@pytest.mark.parametrize("serial", ["", None]) +def test_firmware_serial_missing_falls_back_to_hostname( + tmp_path, firmware_settings, patch_hostname, loguru_logs, serial +): + """An empty or absent serial in serial-number mode warns and names the link + by hostname instead.""" + firmware_settings(tmp_path, identifier="serial-number") + device = ( + SimpleNamespace(name="sw-1", serial=serial) + if serial is not None + else SimpleNamespace(name="sw-1") + ) + + assert export_firmware_link(device, "4.4.2") is True + + link = tmp_path / f"{_FW_PREFIX}sw-1{_FW_SUFFIX}" + assert link.is_symlink() + assert os.readlink(link) == f"{_FW_PREFIX}4.4.2{_FW_SUFFIX}" + assert _has_log(loguru_logs, "WARNING", "Serial number not found") + + +def test_firmware_link_name_equals_version_image_skips( + tmp_path, firmware_settings, patch_hostname +): + """When the identifier equals the version the link name equals the image + name — no self-referential symlink may shadow the actual image.""" + firmware_settings(tmp_path, identifier="serial-number") + device = SimpleNamespace(name="sw-1", serial="4.4.2") + + assert export_firmware_link(device, "4.4.2") is False + + assert list(tmp_path.iterdir()) == [] + + +# --- reconciliation --------------------------------------------------------- + + +def test_firmware_existing_correct_symlink_left_untouched( + tmp_path, firmware_settings, patch_hostname, mocker +): + firmware_settings(tmp_path, identifier="serial-number") + device = SimpleNamespace(name="sw-1", serial="ABC123") + link = tmp_path / f"{_FW_PREFIX}ABC123{_FW_SUFFIX}" + link.symlink_to(f"{_FW_PREFIX}4.4.2{_FW_SUFFIX}") + remove_spy = mocker.spy(os, "remove") + symlink_spy = mocker.spy(os, "symlink") + + assert export_firmware_link(device, "4.4.2") is False + + remove_spy.assert_not_called() + symlink_spy.assert_not_called() + assert os.readlink(link) == f"{_FW_PREFIX}4.4.2{_FW_SUFFIX}" + + +def test_firmware_repoints_stale_symlink(tmp_path, firmware_settings, patch_hostname): + """A link pointing at the wrong version image is repointed (e.g. after the + device's version changed in NetBox).""" + firmware_settings(tmp_path, identifier="serial-number") + device = SimpleNamespace(name="sw-1", serial="ABC123") + link = tmp_path / f"{_FW_PREFIX}ABC123{_FW_SUFFIX}" + link.symlink_to(f"{_FW_PREFIX}4.4.1{_FW_SUFFIX}") + + assert export_firmware_link(device, "4.4.2") is True + + assert os.readlink(link) == f"{_FW_PREFIX}4.4.2{_FW_SUFFIX}" + + +def test_firmware_replaces_existing_regular_file_with_symlink( + tmp_path, firmware_settings, patch_hostname +): + """A regular file at the link path is removed before the symlink is + created.""" + firmware_settings(tmp_path, identifier="serial-number") + device = SimpleNamespace(name="sw-1", serial="ABC123") + link = tmp_path / f"{_FW_PREFIX}ABC123{_FW_SUFFIX}" + link.write_bytes(b"stale image") + + assert export_firmware_link(device, "4.4.2") is True + + assert link.is_symlink() + assert os.readlink(link) == f"{_FW_PREFIX}4.4.2{_FW_SUFFIX}" + + +# --- error handling --------------------------------------------------------- + + +def test_firmware_symlink_failure_raises( + tmp_path, firmware_settings, patch_hostname, mocker, loguru_logs +): + firmware_settings(tmp_path, identifier="serial-number") + mocker.patch( + "osism.tasks.conductor.sonic.exporter.os.symlink", + side_effect=OSError("permission denied"), + ) + device = SimpleNamespace(name="sw-1", serial="ABC123") + + with pytest.raises(OSError, match="permission denied"): + export_firmware_link(device, "4.4.2") + + assert _has_log(loguru_logs, "ERROR", "Failed to create firmware symlink") + + +def test_firmware_makedirs_failure_raises( + tmp_path, firmware_settings, patch_hostname, mocker, loguru_logs +): + firmware_settings(tmp_path, identifier="serial-number") + mocker.patch( + "osism.tasks.conductor.sonic.exporter.os.makedirs", + side_effect=OSError("read-only filesystem"), + ) + device = SimpleNamespace(name="sw-1", serial="ABC123") + + with pytest.raises(OSError, match="read-only filesystem"): + export_firmware_link(device, "4.4.2") + + assert _has_log(loguru_logs, "ERROR", "Failed to create firmware symlink") diff --git a/tests/unit/tasks/conductor/sonic/test_sync.py b/tests/unit/tasks/conductor/sonic/test_sync.py index 1b13f626..e32f41de 100644 --- a/tests/unit/tasks/conductor/sonic/test_sync.py +++ b/tests/unit/tasks/conductor/sonic/test_sync.py @@ -19,7 +19,7 @@ import pytest -from osism.tasks.conductor.sonic.sync import sync_sonic +from osism.tasks.conductor.sonic.sync import _get_sonic_parameter, sync_sonic def _has_log(records, level, substring): @@ -32,6 +32,7 @@ def make_device( role_slug="leaf", hwsku="Accton-AS7326-56X", config_version=None, + version=None, ): """Build a NetBox-shaped device the orchestrator can consume. @@ -43,6 +44,8 @@ def make_device( params["hwsku"] = hwsku if config_version is not None: params["config_version"] = config_version + if version is not None: + params["version"] = version role = SimpleNamespace(slug=role_slug) if role_slug is not None else None return SimpleNamespace( id=device_id, @@ -83,6 +86,7 @@ def patch(name, **kw): ), save_config_to_netbox=patch("save_config_to_netbox", return_value=(True, None)), export_config_to_file=patch("export_config_to_file", return_value=False), + export_firmware_link=patch("export_firmware_link", return_value=False), clear_interface_cache=patch("clear_interface_cache"), clear_all_caches=patch("clear_all_caches"), clear_vip_addresses_cache=patch("clear_vip_addresses_cache"), @@ -94,6 +98,35 @@ def patch(name, **kw): ) +# --------------------------------------------------------------------------- +# _get_sonic_parameter +# --------------------------------------------------------------------------- + + +def test_get_sonic_parameter_returns_value_when_present(): + device = SimpleNamespace(custom_fields={"sonic_parameters": {"hwsku": "X"}}) + assert _get_sonic_parameter(device, "hwsku") == "X" + + +@pytest.mark.parametrize( + "custom_fields", + [ + {}, # no sonic_parameters key + {"sonic_parameters": None}, # present but null + {"sonic_parameters": {}}, # present but empty + {"sonic_parameters": {"hwsku": "X"}}, # other keys only + None, # no custom_fields at all + ], +) +def test_get_sonic_parameter_missing_yields_none(custom_fields): + device = SimpleNamespace(custom_fields=custom_fields) + assert _get_sonic_parameter(device, "version") is None + + +def test_get_sonic_parameter_without_custom_fields_attribute(): + assert _get_sonic_parameter(SimpleNamespace(), "version") is None + + # --------------------------------------------------------------------------- # Cache lifecycle # --------------------------------------------------------------------------- @@ -301,6 +334,46 @@ def test_config_version_read_from_custom_fields(mock_nb, patch_sync_deps): assert deps.generate_sonic_config.call_args.args[3] == "4_2_0" +def test_firmware_version_drives_firmware_link(mock_nb, patch_sync_deps): + """``sonic_parameters.version`` is passed to ``export_firmware_link`` so the + per-device ZTP firmware symlink is reconciled, independent of the config.""" + deps = patch_sync_deps + device = make_device(name="sw-1", role_slug="leaf", version="4.4.2") + mock_nb.dcim.devices.get.return_value = device + + sync_sonic(device_name="sw-1") + + deps.export_firmware_link.assert_called_once_with(device, "4.4.2") + + +def test_firmware_link_called_with_none_when_version_absent(mock_nb, patch_sync_deps): + """A device without ``sonic_parameters.version`` still calls the firmware + link with ``None`` so the function can skip management itself.""" + deps = patch_sync_deps + device = make_device(name="sw-1", role_slug="leaf") + mock_nb.dcim.devices.get.return_value = device + + sync_sonic(device_name="sw-1") + + deps.export_firmware_link.assert_called_once_with(device, None) + + +def test_firmware_link_failure_reports_nonzero_rc( + mock_nb, patch_sync_deps, loguru_logs +): + """A raising ``export_firmware_link`` surfaces as rc=1, like the other + per-device export steps.""" + deps = patch_sync_deps + device = make_device(name="sw-1", role_slug="leaf", version="4.4.2") + mock_nb.dcim.devices.get.return_value = device + deps.export_firmware_link.side_effect = OSError("permission denied") + + sync_sonic(device_name="sw-1", task_id="t-1") + + deps.finish_task_output.assert_called_once_with("t-1", rc=1) + assert _has_log(loguru_logs, "ERROR", "Failed to sync SONiC configuration") + + # --------------------------------------------------------------------------- # AS mapping # ---------------------------------------------------------------------------