diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 7b11bc7de0e39..466112ff52731 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -34,6 +34,7 @@
 #include <linux/kthread.h>
 #include <linux/spinlock.h>
 #include <linux/async.h>
+#include <linux/topology.h>
 #include <linux/slab.h>
 #include <linux/unaligned.h>
 
@@ -286,9 +287,10 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
 	int display_failure_msg = 1, ret;
 	struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
 	struct queue_limits lim;
+	int node = dev_to_node(shost->dma_dev);
 
-	sdev = kzalloc(sizeof(*sdev) + shost->transportt->device_size,
-		       GFP_KERNEL);
+	sdev = kzalloc_node(sizeof(*sdev) + shost->transportt->device_size,
+		       GFP_KERNEL, node);
 	if (!sdev)
 		goto out;
 
@@ -501,8 +503,9 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
 	struct scsi_target *starget;
 	struct scsi_target *found_target;
 	int error, ref_got;
+	int node = dev_to_node(shost->dma_dev);
 
-	starget = kzalloc(size, GFP_KERNEL);
+	starget = kzalloc_node(size, GFP_KERNEL, node);
 	if (!starget) {
 		printk(KERN_ERR "%s: allocation failure\n", __func__);
 		return NULL;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d463b9b5a0a59..7ed566c81c1bd 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -561,7 +561,9 @@ struct request_queue {
 	struct timer_list	timeout;
 	struct work_struct	timeout_work;
 
-	atomic_t		nr_active_requests_shared_tags;
+	/* ensure nr_active_requests_shared_tags and nr_requests are on different cache lines
+	   to avoid significant performance hits on cache line contention on some CPU architectures */
+	atomic_t		nr_active_requests_shared_tags ____cacheline_aligned_in_smp;
 
 	struct blk_mq_tags	*sched_shared_tags;
 
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index d32f5841f4f85..43fefaab4c980 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -272,7 +272,9 @@ struct scsi_device {
 #define SCSI_DEFAULT_DEVICE_BLOCKED	3
 
 	atomic_t iorequest_cnt;
-	atomic_t iodone_cnt;
+	/* ensure iorequest_cnt and iodone_cnt are on different cache lines to avoid significant
+	   performance hits on cache line contention on some CPU architectures */
+	atomic_t iodone_cnt ____cacheline_aligned_in_smp;
 	atomic_t ioerr_cnt;
 	atomic_t iotmo_cnt;