From 0a8d90c01abecd1aaacc690d5c85d65e8cdb73ed Mon Sep 17 00:00:00 2001 From: James Rizzo Date: Thu, 2 Apr 2026 13:16:35 +0530 Subject: [PATCH 1/3] scsi: use NUMA-local allocation for sdev and starget Allocate scsi_device and scsi_target on the same NUMA node as the host adapter's DMA device to improve memory locality and reduce cross-node traffic. Signed-off-by: James Rizzo Signed-off-by: Sumit Saxena --- drivers/scsi/scsi_scan.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 7b11bc7de0e3..466112ff5273 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -286,9 +287,10 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, int display_failure_msg = 1, ret; struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); struct queue_limits lim; + int node = dev_to_node(shost->dma_dev); - sdev = kzalloc(sizeof(*sdev) + shost->transportt->device_size, - GFP_KERNEL); + sdev = kzalloc_node(sizeof(*sdev) + shost->transportt->device_size, + GFP_KERNEL, node); if (!sdev) goto out; @@ -501,8 +503,9 @@ static struct scsi_target *scsi_alloc_target(struct device *parent, struct scsi_target *starget; struct scsi_target *found_target; int error, ref_got; + int node = dev_to_node(shost->dma_dev); - starget = kzalloc(size, GFP_KERNEL); + starget = kzalloc_node(size, GFP_KERNEL, node); if (!starget) { printk(KERN_ERR "%s: allocation failure\n", __func__); return NULL; From 940244795951edeaa5af9ba69980ec36c6c48db2 Mon Sep 17 00:00:00 2001 From: James Rizzo Date: Thu, 2 Apr 2026 13:16:36 +0530 Subject: [PATCH 2/3] block: align nr_active_requests_shared_tags to avoid cache line contention Place nr_active_requests_shared_tags on its own cache line so it does not share a cache line with nr_requests and other hot fields, avoiding significant performance hits from false sharing on some CPU architectures. Signed-off-by: James Rizzo Signed-off-by: Sumit Saxena --- include/linux/blkdev.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d463b9b5a0a5..7ed566c81c1b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -561,7 +561,9 @@ struct request_queue { struct timer_list timeout; struct work_struct timeout_work; - atomic_t nr_active_requests_shared_tags; + /* ensure nr_active_requests_shared_tags and nr_requests are on different cache lines + to avoid significant performance hits on cache line contention on some CPU architectures */ + atomic_t nr_active_requests_shared_tags ____cacheline_aligned_in_smp; struct blk_mq_tags *sched_shared_tags; From 937e9025acaf9d4ec512ac6ffbb185ee699d12d6 Mon Sep 17 00:00:00 2001 From: James Rizzo Date: Thu, 2 Apr 2026 13:16:37 +0530 Subject: [PATCH 3/3] scsi: align scsi_device iodone_cnt to avoid cache line contention Place iodone_cnt on its own cache line so it does not share a cache line with iorequest_cnt, avoiding significant performance hits from false sharing when request and completion paths update these counters on some CPU architectures. Signed-off-by: James Rizzo Signed-off-by: Sumit Saxena --- include/scsi/scsi_device.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index d32f5841f4f8..43fefaab4c98 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -272,7 +272,9 @@ struct scsi_device { #define SCSI_DEFAULT_DEVICE_BLOCKED 3 atomic_t iorequest_cnt; - atomic_t iodone_cnt; + /* ensure iorequest_cnt and iodone_cnt are on different cache lines to avoid significant + performance hits on cache line contention on some CPU architectures */ + atomic_t iodone_cnt ____cacheline_aligned_in_smp; atomic_t ioerr_cnt; atomic_t iotmo_cnt;