diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 7b11bc7de0e39..466112ff52731 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -286,9 +287,10 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, int display_failure_msg = 1, ret; struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); struct queue_limits lim; + int node = dev_to_node(shost->dma_dev); - sdev = kzalloc(sizeof(*sdev) + shost->transportt->device_size, - GFP_KERNEL); + sdev = kzalloc_node(sizeof(*sdev) + shost->transportt->device_size, + GFP_KERNEL, node); if (!sdev) goto out; @@ -501,8 +503,9 @@ static struct scsi_target *scsi_alloc_target(struct device *parent, struct scsi_target *starget; struct scsi_target *found_target; int error, ref_got; + int node = dev_to_node(shost->dma_dev); - starget = kzalloc(size, GFP_KERNEL); + starget = kzalloc_node(size, GFP_KERNEL, node); if (!starget) { printk(KERN_ERR "%s: allocation failure\n", __func__); return NULL; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d463b9b5a0a59..7ed566c81c1bd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -561,7 +561,9 @@ struct request_queue { struct timer_list timeout; struct work_struct timeout_work; - atomic_t nr_active_requests_shared_tags; + /* ensure nr_active_requests_shared_tags and nr_requests are on different cache lines + to avoid significant performance hits on cache line contention on some CPU architectures */ + atomic_t nr_active_requests_shared_tags ____cacheline_aligned_in_smp; struct blk_mq_tags *sched_shared_tags; diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index d32f5841f4f85..43fefaab4c980 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -272,7 +272,9 @@ struct scsi_device { #define SCSI_DEFAULT_DEVICE_BLOCKED 3 atomic_t iorequest_cnt; - atomic_t iodone_cnt; + /* ensure iorequest_cnt and iodone_cnt are on different cache lines to avoid significant + performance hits on cache line contention on some CPU architectures */ + atomic_t iodone_cnt ____cacheline_aligned_in_smp; atomic_t ioerr_cnt; atomic_t iotmo_cnt;