From e8ef6a07babba4045fec2c0564b284633436bd21 Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Thu, 5 Mar 2026 08:19:10 +0100 Subject: [PATCH] fibre_channel: fix crash when attempting to dereference invalid counters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The statistics counters of a disabled FC host cannot be read from sysfs and any read attempt fails with errno ENOENT. Therefore, all statistics counters returned by procfs are nil in such a case. This results in a crash on s390x Linux when a zFCP host is disabled with `chzdev -d `. Crash stacktrace in a GDB ------------------------- $ gdb /root/node_exporter/node_exporter ... Thread 4 "node_exporter" received signal SIGSEGV, Segmentation fault. [Switching to LWP 35579] 0x00000000007be86c in github.com/prometheus/node_exporter/collector.(*fibrechannelCollector).Update (c=0xc00012fef0, ch=0xc0002fc8c0, ~r0=...) at /root/node_exporter/collector/fibrechannel_linux.go:133 133 c.pushCounter(ch, "dumped_frames_total", *host.Counters.DumpedFrames, *host.Name) (gdb) bt at /root/node_exporter/collector/fibrechannel_linux.go:133 (gdb) p host $1 = {Name = 0xc00038faf0, Speed = 0xc00038fb00, PortState = 0xc00038fb10, PortType = 0xc00038fb30, SymbolicName = 0xc00038fba0, NodeName = 0xc00038fb40, PortID = 0xc00038fb50, PortName = 0xc00038fb60, FabricName = 0xc00038fb80, DevLossTMO = 0xc00038fb90, SupportedClasses = 0xc00038fbb0, SupportedSpeeds = 0xc00038fbd0, Counters = 0xc0001cb2d0} (gdb) p host.Name $2 = (string *) 0xc00038faf0 (gdb) p *host.Name $3 = "host0" (gdb) p *host.Counters.DumpedFrames ❌️ Cannot access memory at address 0x0 (gdb) p *host.Counters $4 = {DumpedFrames = 0x0, ErrorFrames = 0x0, InvalidCRCCount = 0x0, RXFrames = 0x0, RXWords = 0x0, TXFrames = 0x0, TXWords = 0x0, SecondsSinceLastReset = 0x0, InvalidTXWordCount = 0x0, LinkFailureCount = 0x0, LossOfSyncCount = 0x0, LossOfSignalCount = 0x0, NosCount = 0x0, FCPPacketAborts = 0x0} (gdb) bt at /root/node_exporter/collector/fibrechannel_linux.go:133 name="fibrechannel", c=..., ch=0xc0002fc8c0, logger=0xc0002104a0) at /root/node_exporter/collector/collector.go:160 (gdb) p *host.Counters $5 = {DumpedFrames = 0x0, ErrorFrames = 0x0, InvalidCRCCount = 0x0, RXFrames = 0x0, RXWords = 0x0, TXFrames = 0x0, TXWords = 0x0, SecondsSinceLastReset = 0x0, InvalidTXWordCount = 0x0, LinkFailureCount = 0x0, LossOfSyncCount = 0x0, LossOfSignalCount = 0x0, NosCount = 0x0, FCPPacketAborts = 0x0} (gdb) Signed-off-by: Alexander Egorenkov --- collector/fibrechannel_linux.go | 19 ++++++++++++++++++- collector/fixtures/e2e-output.txt | 2 +- collector/fixtures/sys.ttar | 5 +++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/collector/fibrechannel_linux.go b/collector/fibrechannel_linux.go index a0528d168a..79548f165b 100644 --- a/collector/fibrechannel_linux.go +++ b/collector/fibrechannel_linux.go @@ -129,7 +129,24 @@ func (c *fibrechannelCollector) Update(ch chan<- prometheus.Metric) error { )...) // Then the counters - // Note: `procfs` guarantees these a safe dereference for these counters. + // Note: `procfs` does not guarantee a safe dereference for these counters. + // A disabled host returns no statistics counters. + if host.PortState == nil || *host.PortState == "Unknown" { + host.Counters.DumpedFrames = new(uint64) + host.Counters.ErrorFrames = new(uint64) + host.Counters.InvalidCRCCount = new(uint64) + host.Counters.RXFrames = new(uint64) + host.Counters.RXWords = new(uint64) + host.Counters.TXFrames = new(uint64) + host.Counters.TXWords = new(uint64) + host.Counters.SecondsSinceLastReset = new(uint64) + host.Counters.InvalidTXWordCount = new(uint64) + host.Counters.LinkFailureCount = new(uint64) + host.Counters.LossOfSyncCount = new(uint64) + host.Counters.LossOfSignalCount = new(uint64) + host.Counters.NosCount = new(uint64) + host.Counters.FCPPacketAborts = new(uint64) + } c.pushCounter(ch, "dumped_frames_total", *host.Counters.DumpedFrames, *host.Name) c.pushCounter(ch, "error_frames_total", *host.Counters.ErrorFrames, *host.Name) c.pushCounter(ch, "invalid_crc_total", *host.Counters.InvalidCRCCount, *host.Name) diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 380e812c98..6ac6ace066 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -1397,7 +1397,7 @@ node_fibrechannel_error_frames_total{fc_host="host1"} 19 node_fibrechannel_fcp_packet_aborts_total{fc_host="host0"} 19 # HELP node_fibrechannel_info Non-numeric data from /sys/class/fc_host/, value is always 1. # TYPE node_fibrechannel_info gauge -node_fibrechannel_info{dev_loss_tmo="",fabric_name="",fc_host="host1",port_id="",port_name="",port_state="",port_type="",speed="8 Gbit",supported_classes="",supported_speeds="",symbolic_name=""} 1 +node_fibrechannel_info{dev_loss_tmo="",fabric_name="",fc_host="host1",port_id="",port_name="",port_state="Online",port_type="",speed="8 Gbit",supported_classes="",supported_speeds="",symbolic_name=""} 1 node_fibrechannel_info{dev_loss_tmo="30",fabric_name="0",fc_host="host0",port_id="000002",port_name="1000e0071bce95f2",port_state="Online",port_type="Point-To-Point (direct nport connection)",speed="16 Gbit",supported_classes="Class 3",supported_speeds="4 Gbit, 8 Gbit, 16 Gbit",symbolic_name="Emulex SN1100E2P FV12.4.270.3 DV12.4.0.0. HN:gotest. OS:Linux"} 1 # HELP node_fibrechannel_invalid_crc_total Invalid Cyclic Redundancy Check count # TYPE node_fibrechannel_invalid_crc_total counter diff --git a/collector/fixtures/sys.ttar b/collector/fixtures/sys.ttar index bc8744cbe7..c64234c7cc 100644 --- a/collector/fixtures/sys.ttar +++ b/collector/fixtures/sys.ttar @@ -1130,6 +1130,11 @@ Mode: 644 Directory: sys/class/fc_host/host1 Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/fc_host/host1/port_state +Lines: 1 +Online +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/class/fc_host/host1/speed Lines: 1 8 Gbit