cciss: make rebuild_lun_table behave better

This patch makes the rebuild_lun_table smart enough to not rip a logical
volume out from under the OS. Without this fix if a customer is running
hpacucli to monitor their storage the driver will blindly remove and re-add
the disks whenever the utility calls the CCISS_REGNEWD ioctl. Unfortunately,
both hpacucli and ACUXE call the ioctl repeatedly. Customers have reported
IO coming to a standstill. Calling the ioctl is the problem, this patch is
the fix.

Signed-off-by: Stephen M. Cameron <scameron@beardog.cca.cpqcorp.net>
Signed-off-by: Mike Miller <mike.miller@hp.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index aeaf465..f9f10a1 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1330,13 +1330,46 @@
 	spin_unlock_irqrestore(&h->lock, flags);
 }
 
+/* This function gets the serial number of a logical drive via
+ * inquiry page 0x83.  Serial no. is 16 bytes.  If the serial
+ * number cannot be had, for whatever reason, 16 bytes of 0xff
+ * are returned instead.
+ */
+static void cciss_get_serial_no(int ctlr, int logvol, int withirq,
+				unsigned char *serial_no, int buflen)
+{
+#define PAGE_83_INQ_BYTES 64
+	int rc;
+	unsigned char *buf;
+
+	if (buflen > 16)
+		buflen = 16;
+	memset(serial_no, 0xff, buflen);
+	buf = kzalloc(PAGE_83_INQ_BYTES, GFP_KERNEL);
+	if (!buf)
+		return;
+	memset(serial_no, 0, buflen);
+	if (withirq)
+		rc = sendcmd_withirq(CISS_INQUIRY, ctlr, buf,
+			PAGE_83_INQ_BYTES, 1, logvol, 0x83, TYPE_CMD);
+	else
+		rc = sendcmd(CISS_INQUIRY, ctlr, buf,
+			PAGE_83_INQ_BYTES, 1, logvol, 0x83, NULL, TYPE_CMD);
+	if (rc == IO_OK)
+		memcpy(serial_no, &buf[8], buflen);
+	kfree(buf);
+	return;
+}
+
 /* This function will check the usage_count of the drive to be updated/added.
- * If the usage_count is zero then the drive information will be updated and
- * the disk will be re-registered with the kernel.  If not then it will be
- * left alone for the next reboot.  The exception to this is disk 0 which
- * will always be left registered with the kernel since it is also the
- * controller node.  Any changes to disk 0 will show up on the next
- * reboot.
+ * If the usage_count is zero and it is a heretofore unknown drive, or,
+ * the drive's capacity, geometry, or serial number has changed,
+ * then the drive information will be updated and the disk will be
+ * re-registered with the kernel.  If these conditions don't hold,
+ * then it will be left alone for the next reboot.  The exception to this
+ * is disk 0 which will always be left registered with the kernel since it
+ * is also the controller node.  Any changes to disk 0 will show up on
+ * the next reboot.
  */
 static void cciss_update_drive_info(int ctlr, int drv_index)
 {
@@ -1347,9 +1380,65 @@
 	sector_t total_size;
 	unsigned long flags = 0;
 	int ret = 0;
+	drive_info_struct *drvinfo;
+
+	/* Get information about the disk and modify the driver structure */
+	inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL);
+	drvinfo = kmalloc(sizeof(*drvinfo), GFP_KERNEL);
+	if (inq_buff == NULL || drvinfo == NULL)
+		goto mem_msg;
+
+	/* testing to see if 16-byte CDBs are already being used */
+	if (h->cciss_read == CCISS_READ_16) {
+		cciss_read_capacity_16(h->ctlr, drv_index, 1,
+			&total_size, &block_size);
+
+	} else {
+		cciss_read_capacity(ctlr, drv_index, 1,
+				    &total_size, &block_size);
+
+		/* if read_capacity returns all F's this volume is >2TB */
+		/* in size so we switch to 16-byte CDB's for all */
+		/* read/write ops */
+		if (total_size == 0xFFFFFFFFULL) {
+			cciss_read_capacity_16(ctlr, drv_index, 1,
+			&total_size, &block_size);
+			h->cciss_read = CCISS_READ_16;
+			h->cciss_write = CCISS_WRITE_16;
+		} else {
+			h->cciss_read = CCISS_READ_10;
+			h->cciss_write = CCISS_WRITE_10;
+		}
+	}
+
+	cciss_geometry_inquiry(ctlr, drv_index, 1, total_size, block_size,
+			       inq_buff, drvinfo);
+	drvinfo->block_size = block_size;
+	drvinfo->nr_blocks = total_size + 1;
+
+	cciss_get_serial_no(ctlr, drv_index, 1, drvinfo->serial_no,
+			sizeof(drvinfo->serial_no));
+
+	/* Is it the same disk we already know, and nothing's changed? */
+	if (h->drv[drv_index].raid_level != -1 &&
+		((memcmp(drvinfo->serial_no,
+				h->drv[drv_index].serial_no, 16) == 0) &&
+		drvinfo->block_size == h->drv[drv_index].block_size &&
+		drvinfo->nr_blocks == h->drv[drv_index].nr_blocks &&
+		drvinfo->heads == h->drv[drv_index].heads &&
+		drvinfo->sectors == h->drv[drv_index].sectors &&
+		drvinfo->cylinders == h->drv[drv_index].cylinders)) {
+			/* The disk is unchanged, nothing to update */
+			goto freeret;
+	}
+
+	/* Not the same disk, or something's changed, so we need to */
+	/* deregister it, and re-register it, if it's not in use. */
 
 	/* if the disk already exists then deregister it before proceeding */
-	if (h->drv[drv_index].raid_level != -1) {
+	/* (unless it's the first disk (for the controller node). */
+	if (h->drv[drv_index].raid_level != -1 && drv_index != 0) {
+		printk(KERN_WARNING "disk %d has changed.\n", drv_index);
 		spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
 		h->drv[drv_index].busy_configuring = 1;
 		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
@@ -1364,43 +1453,23 @@
 
 	/* If the disk is in use return */
 	if (ret)
-		return;
+		goto freeret;
 
-	/* Get information about the disk and modify the driver structure */
-	inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL);
-	if (inq_buff == NULL)
-		goto mem_msg;
-
- 	/* testing to see if 16-byte CDBs are already being used */
- 	if (h->cciss_read == CCISS_READ_16) {
- 		cciss_read_capacity_16(h->ctlr, drv_index, 1,
- 			&total_size, &block_size);
- 		goto geo_inq;
- 	}
-
-	cciss_read_capacity(ctlr, drv_index, 1,
-			    &total_size, &block_size);
-
-  	/* if read_capacity returns all F's this volume is >2TB in size */
-  	/* so we switch to 16-byte CDB's for all read/write ops */
-  	if (total_size == 0xFFFFFFFFULL) {
-		cciss_read_capacity_16(ctlr, drv_index, 1,
-		&total_size, &block_size);
-		h->cciss_read = CCISS_READ_16;
-		h->cciss_write = CCISS_WRITE_16;
-	} else {
-		h->cciss_read = CCISS_READ_10;
-		h->cciss_write = CCISS_WRITE_10;
-	}
-geo_inq:
-	cciss_geometry_inquiry(ctlr, drv_index, 1, total_size, block_size,
-			       inq_buff, &h->drv[drv_index]);
+	/* Save the new information from cciss_geometry_inquiry */
+	/* and serial number inquiry. */
+	h->drv[drv_index].block_size = drvinfo->block_size;
+	h->drv[drv_index].nr_blocks = drvinfo->nr_blocks;
+	h->drv[drv_index].heads = drvinfo->heads;
+	h->drv[drv_index].sectors = drvinfo->sectors;
+	h->drv[drv_index].cylinders = drvinfo->cylinders;
+	h->drv[drv_index].raid_level = drvinfo->raid_level;
+	memcpy(h->drv[drv_index].serial_no, drvinfo->serial_no, 16);
 
 	++h->num_luns;
 	disk = h->gendisk[drv_index];
 	set_capacity(disk, h->drv[drv_index].nr_blocks);
 
-	/* if it's the controller it's already added */
+	/* if it's the controller (if drv_index == 0) it's already added */
 	if (drv_index) {
 		disk->queue = blk_init_queue(do_cciss_request, &h->lock);
 		sprintf(disk->disk_name, "cciss/c%dd%d", ctlr, drv_index);
@@ -1437,6 +1506,7 @@
 
       freeret:
 	kfree(inq_buff);
+	kfree(drvinfo);
 	return;
       mem_msg:
 	printk(KERN_ERR "cciss: out of memory\n");
@@ -1478,7 +1548,6 @@
 	int ctlr = h->ctlr;
 	int num_luns;
 	ReportLunData_struct *ld_buff = NULL;
-	drive_info_struct *drv = NULL;
 	int return_code;
 	int listlength = 0;
 	int i;
@@ -1494,98 +1563,117 @@
 		return -EBUSY;
 	}
 	h->busy_configuring = 1;
+	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
 
-	/* if del_disk is NULL then we are being called to add a new disk
-	 * and update the logical drive table.  If it is not NULL then
-	 * we will check if the disk is in use or not.
-	 */
-	if (del_disk != NULL) {
-		drv = get_drv(del_disk);
-		drv->busy_configuring = 1;
-		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
-		return_code = deregister_disk(del_disk, drv, 1);
-		drv->busy_configuring = 0;
-		h->busy_configuring = 0;
-		return return_code;
-	} else {
-		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
-		if (!capable(CAP_SYS_RAWIO))
-			return -EPERM;
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
 
-		ld_buff = kzalloc(sizeof(ReportLunData_struct), GFP_KERNEL);
-		if (ld_buff == NULL)
-			goto mem_msg;
+	ld_buff = kzalloc(sizeof(ReportLunData_struct), GFP_KERNEL);
+	if (ld_buff == NULL)
+		goto mem_msg;
 
-		return_code = sendcmd_withirq(CISS_REPORT_LOG, ctlr, ld_buff,
-					      sizeof(ReportLunData_struct), 0,
-					      0, 0, TYPE_CMD);
+	return_code = sendcmd_withirq(CISS_REPORT_LOG, ctlr, ld_buff,
+				      sizeof(ReportLunData_struct), 0,
+				      0, 0, TYPE_CMD);
 
-		if (return_code == IO_OK) {
-			listlength =
-				be32_to_cpu(*(__be32 *) ld_buff->LUNListLength);
-		} else {	/* reading number of logical volumes failed */
-			printk(KERN_WARNING "cciss: report logical volume"
-			       " command failed\n");
-			listlength = 0;
-			goto freeret;
-		}
+	if (return_code == IO_OK)
+		listlength = be32_to_cpu(*(__be32 *) ld_buff->LUNListLength);
+	else {	/* reading number of logical volumes failed */
+		printk(KERN_WARNING "cciss: report logical volume"
+		       " command failed\n");
+		listlength = 0;
+		goto freeret;
+	}
 
-		num_luns = listlength / 8;	/* 8 bytes per entry */
-		if (num_luns > CISS_MAX_LUN) {
-			num_luns = CISS_MAX_LUN;
-			printk(KERN_WARNING "cciss: more luns configured"
-			       " on controller than can be handled by"
-			       " this driver.\n");
-		}
+	num_luns = listlength / 8;	/* 8 bytes per entry */
+	if (num_luns > CISS_MAX_LUN) {
+		num_luns = CISS_MAX_LUN;
+		printk(KERN_WARNING "cciss: more luns configured"
+		       " on controller than can be handled by"
+		       " this driver.\n");
+	}
 
-		/* Compare controller drive array to drivers drive array.
-		 * Check for updates in the drive information and any new drives
-		 * on the controller.
-		 */
-		for (i = 0; i < num_luns; i++) {
-			int j;
-
-			drv_found = 0;
-
-			lunid = (0xff &
-				 (unsigned int)(ld_buff->LUN[i][3])) << 24;
-			lunid |= (0xff &
-				  (unsigned int)(ld_buff->LUN[i][2])) << 16;
-			lunid |= (0xff &
-				  (unsigned int)(ld_buff->LUN[i][1])) << 8;
-			lunid |= 0xff & (unsigned int)(ld_buff->LUN[i][0]);
-
-			/* Find if the LUN is already in the drive array
-			 * of the controller.  If so then update its info
-			 * if not is use.  If it does not exist then find
-			 * the first free index and add it.
-			 */
-			for (j = 0; j <= h->highest_lun; j++) {
-				if (h->drv[j].LunID == lunid) {
-					drv_index = j;
-					drv_found = 1;
-				}
+	/* Compare controller drive array to driver's drive array */
+	/* to see if any drives are missing on the controller due */
+	/* to action of Array Config Utility (user deletes drive) */
+	/* and deregister logical drives which have disappeared.  */
+	for (i = 0; i <= h->highest_lun; i++) {
+		int j;
+		drv_found = 0;
+		for (j = 0; j < num_luns; j++) {
+			memcpy(&lunid, &ld_buff->LUN[j][0], 4);
+			lunid = le32_to_cpu(lunid);
+			if (h->drv[i].LunID == lunid) {
+				drv_found = 1;
+				break;
 			}
+		}
+		if (!drv_found) {
+			/* Deregister it from the OS, it's gone. */
+			spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+			h->drv[i].busy_configuring = 1;
+			spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+			return_code = deregister_disk(h->gendisk[i],
+				&h->drv[i], 1);
+			h->drv[i].busy_configuring = 0;
+		}
+	}
 
-			/* check if the drive was found already in the array */
-			if (!drv_found) {
-				drv_index = cciss_find_free_drive_index(ctlr);
-				if (drv_index == -1)
-					goto freeret;
+	/* Compare controller drive array to driver's drive array.
+	 * Check for updates in the drive information and any new drives
+	 * on the controller due to ACU adding logical drives, or changing
+	 * a logical drive's size, etc.  Reregister any new/changed drives
+	 */
+	for (i = 0; i < num_luns; i++) {
+		int j;
 
-				/*Check if the gendisk needs to be allocated */
+		drv_found = 0;
+
+		memcpy(&lunid, &ld_buff->LUN[i][0], 4);
+		lunid = le32_to_cpu(lunid);
+
+		/* Find if the LUN is already in the drive array
+		 * of the driver.  If so then update its info
+		 * if not in use.  If it does not exist then find
+		 * the first free index and add it.
+		 */
+		for (j = 0; j <= h->highest_lun; j++) {
+			if (h->drv[j].raid_level != -1 &&
+				h->drv[j].LunID == lunid) {
+				drv_index = j;
+				drv_found = 1;
+				break;
+			}
+		}
+
+		/* check if the drive was found already in the array */
+		if (!drv_found) {
+			drv_index = cciss_find_free_drive_index(ctlr);
+			if (drv_index == -1)
+				goto freeret;
+			/*Check if the gendisk needs to be allocated */
+			if (!h->gendisk[drv_index]) {
+				h->gendisk[drv_index] =
+					alloc_disk(1 << NWD_SHIFT);
 				if (!h->gendisk[drv_index]){
-					h->gendisk[drv_index] = alloc_disk(1 << NWD_SHIFT);
-					if (!h->gendisk[drv_index]){
-						printk(KERN_ERR "cciss: could not allocate new disk %d\n", drv_index);
-						goto mem_msg;
-					}
+					printk(KERN_ERR "cciss: could not "
+						"allocate new disk %d\n",
+						drv_index);
+					goto mem_msg;
 				}
 			}
 			h->drv[drv_index].LunID = lunid;
-			cciss_update_drive_info(ctlr, drv_index);
-		}		/* end for */
-	}			/* end else */
+
+			/* Don't need to mark this busy because nobody
+			 * else knows about this disk yet to contend
+			 * for access to it.
+			 */
+			h->drv[drv_index].busy_configuring = 0;
+			wmb();
+
+		}
+		cciss_update_drive_info(ctlr, drv_index);
+	}		/* end for */
 
       freeret:
 	kfree(ld_buff);
@@ -1597,6 +1685,7 @@
 	return -1;
       mem_msg:
 	printk(KERN_ERR "cciss: out of memory\n");
+	h->busy_configuring = 0;
 	goto freeret;
 }
 
@@ -1652,15 +1741,15 @@
 		 * other than disk 0 we will call put_disk.  We do not
 		 * do this for disk 0 as we need it to be able to
 		 * configure the controller.
-		*/
+		 */
 		if (clear_all){
 			/* This isn't pretty, but we need to find the
 			 * disk in our array and NULL our the pointer.
 			 * This is so that we will call alloc_disk if
 			 * this index is used again later.
-			*/
+			 */
 			for (i=0; i < CISS_MAX_LUN; i++){
-				if(h->gendisk[i] == disk){
+				if (h->gendisk[i] == disk) {
 					h->gendisk[i] = NULL;
 					break;
 				}
@@ -1688,7 +1777,7 @@
 		if (drv == h->drv + h->highest_lun) {
 			/* if so, find the new hightest lun */
 			int i, newhighest = -1;
-			for (i = 0; i < h->highest_lun; i++) {
+			for (i = 0; i <= h->highest_lun; i++) {
 				/* if the disk has size > 0, it is available */
 				if (h->drv[i].heads)
 					newhighest = i;
@@ -3318,6 +3407,9 @@
 			cciss_geometry_inquiry(cntl_num, i, 0, total_size,
 					       block_size, inq_buff,
 					       &hba[cntl_num]->drv[i]);
+			cciss_get_serial_no(cntl_num, i, 0,
+				hba[cntl_num]->drv[i].serial_no,
+				sizeof(hba[cntl_num]->drv[i].serial_no));
 		} else {
 			/* initialize raid_level to indicate a free space */
 			hba[cntl_num]->drv[i].raid_level = -1;