[SCSI] hpsa: complete the ioaccel raidmap code
Load balance across members of a N-way mirror set, and
handle the meta-RAID levels: R10, R50, R60.
Signed-off-by: Scott Teel <scott.teel@hp.com>
Signed-off-by: Stephen M. Cameron <scameron@beardog.cce.hp.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 41c7173..7cd2871 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -423,6 +423,13 @@
static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG",
"1(ADM)", "UNKNOWN"
};
+#define HPSA_RAID_0 0
+#define HPSA_RAID_4 1
+#define HPSA_RAID_1 2 /* also used for RAID 10 */
+#define HPSA_RAID_5 3 /* also used for RAID 50 */
+#define HPSA_RAID_51 4
+#define HPSA_RAID_6 5 /* also used for RAID 60 */
+#define HPSA_RAID_ADM 6 /* also used for RAID 1+0 ADM */
#define RAID_UNKNOWN (ARRAY_SIZE(raid_label) - 1)
static ssize_t raid_level_show(struct device *dev,
@@ -2941,6 +2948,31 @@
cdb, cdb_len, scsi3addr);
}
+static void raid_map_helper(struct raid_map_data *map,
+ int offload_to_mirror, u32 *map_index, u32 *current_group)
+{
+ if (offload_to_mirror == 0) {
+ /* use physical disk in the first mirrored group. */
+ *map_index %= map->data_disks_per_row;
+ return;
+ }
+ do {
+ /* determine mirror group that *map_index indicates */
+ *current_group = *map_index / map->data_disks_per_row;
+ if (offload_to_mirror == *current_group)
+ continue;
+ if (*current_group < (map->layout_map_count - 1)) {
+ /* select map index from next group */
+ *map_index += map->data_disks_per_row;
+ (*current_group)++;
+ } else {
+ /* select map index from first group */
+ *map_index %= map->data_disks_per_row;
+ *current_group = 0;
+ }
+ } while (offload_to_mirror != *current_group);
+}
+
/*
* Attempt to perform offload RAID mapping for a logical volume I/O.
*/
@@ -2959,6 +2991,14 @@
u64 first_row, last_row;
u32 first_row_offset, last_row_offset;
u32 first_column, last_column;
+ u64 r0_first_row, r0_last_row;
+ u32 r5or6_blocks_per_row;
+ u64 r5or6_first_row, r5or6_last_row;
+ u32 r5or6_first_row_offset, r5or6_last_row_offset;
+ u32 r5or6_first_column, r5or6_last_column;
+ u32 total_disks_per_row;
+ u32 stripesize;
+ u32 first_group, last_group, current_group;
u32 map_row;
u32 disk_handle;
u64 disk_block;
@@ -2968,6 +3008,7 @@
#if BITS_PER_LONG == 32
u64 tmpdiv;
#endif
+ int offload_to_mirror;
BUG_ON(!(dev->offload_config && dev->offload_enabled));
@@ -3070,19 +3111,140 @@
return IO_ACCEL_INELIGIBLE;
/* proceeding with driver mapping */
+ total_disks_per_row = map->data_disks_per_row +
+ map->metadata_disks_per_row;
map_row = ((u32)(first_row >> map->parity_rotation_shift)) %
map->row_cnt;
- map_index = (map_row * (map->data_disks_per_row +
- map->metadata_disks_per_row)) + first_column;
- if (dev->raid_level == 2) {
- /* simple round-robin balancing of RAID 1+0 reads across
- * primary and mirror members. this is appropriate for SSD
- * but not optimal for HDD.
+ map_index = (map_row * total_disks_per_row) + first_column;
+
+ switch (dev->raid_level) {
+ case HPSA_RAID_0:
+ break; /* nothing special to do */
+ case HPSA_RAID_1:
+ /* Handles load balance across RAID 1 members.
+ * (2-drive R1 and R10 with even # of drives.)
+ * Appropriate for SSDs, not optimal for HDDs
*/
+ BUG_ON(map->layout_map_count != 2);
if (dev->offload_to_mirror)
map_index += map->data_disks_per_row;
dev->offload_to_mirror = !dev->offload_to_mirror;
+ break;
+ case HPSA_RAID_ADM:
+ /* Handles N-way mirrors (R1-ADM)
+ * and R10 with # of drives divisible by 3.)
+ */
+ BUG_ON(map->layout_map_count != 3);
+
+ offload_to_mirror = dev->offload_to_mirror;
+ raid_map_helper(map, offload_to_mirror,
+ &map_index, ¤t_group);
+ /* set mirror group to use next time */
+ offload_to_mirror =
+ (offload_to_mirror >= map->layout_map_count - 1)
+ ? 0 : offload_to_mirror + 1;
+ /* FIXME: remove after debug/dev */
+ BUG_ON(offload_to_mirror >= map->layout_map_count);
+ dev_warn(&h->pdev->dev,
+ "DEBUG: Using physical disk map index %d from mirror group %d\n",
+ map_index, offload_to_mirror);
+ dev->offload_to_mirror = offload_to_mirror;
+ /* Avoid direct use of dev->offload_to_mirror within this
+ * function since multiple threads might simultaneously
+ * increment it beyond the range of dev->layout_map_count -1.
+ */
+ break;
+ case HPSA_RAID_5:
+ case HPSA_RAID_6:
+ if (map->layout_map_count <= 1)
+ break;
+
+ /* Verify first and last block are in same RAID group */
+ r5or6_blocks_per_row =
+ map->strip_size * map->data_disks_per_row;
+ BUG_ON(r5or6_blocks_per_row == 0);
+ stripesize = r5or6_blocks_per_row * map->layout_map_count;
+#if BITS_PER_LONG == 32
+ tmpdiv = first_block;
+ first_group = do_div(tmpdiv, stripesize);
+ tmpdiv = first_group;
+ (void) do_div(tmpdiv, r5or6_blocks_per_row);
+ first_group = tmpdiv;
+ tmpdiv = last_block;
+ last_group = do_div(tmpdiv, stripesize);
+ tmpdiv = last_group;
+ (void) do_div(tmpdiv, r5or6_blocks_per_row);
+ last_group = tmpdiv;
+#else
+ first_group = (first_block % stripesize) / r5or6_blocks_per_row;
+ last_group = (last_block % stripesize) / r5or6_blocks_per_row;
+ if (first_group != last_group)
+#endif
+ return IO_ACCEL_INELIGIBLE;
+
+ /* Verify request is in a single row of RAID 5/6 */
+#if BITS_PER_LONG == 32
+ tmpdiv = first_block;
+ (void) do_div(tmpdiv, stripesize);
+ first_row = r5or6_first_row = r0_first_row = tmpdiv;
+ tmpdiv = last_block;
+ (void) do_div(tmpdiv, stripesize);
+ r5or6_last_row = r0_last_row = tmpdiv;
+#else
+ first_row = r5or6_first_row = r0_first_row =
+ first_block / stripesize;
+ r5or6_last_row = r0_last_row = last_block / stripesize;
+#endif
+ if (r5or6_first_row != r5or6_last_row)
+ return IO_ACCEL_INELIGIBLE;
+
+
+ /* Verify request is in a single column */
+#if BITS_PER_LONG == 32
+ tmpdiv = first_block;
+ first_row_offset = do_div(tmpdiv, stripesize);
+ tmpdiv = first_row_offset;
+ first_row_offset = (u32) do_div(tmpdiv, r5or6_blocks_per_row);
+ r5or6_first_row_offset = first_row_offset;
+ tmpdiv = last_block;
+ r5or6_last_row_offset = do_div(tmpdiv, stripesize);
+ tmpdiv = r5or6_last_row_offset;
+ r5or6_last_row_offset = do_div(tmpdiv, r5or6_blocks_per_row);
+ tmpdiv = r5or6_first_row_offset;
+ (void) do_div(tmpdiv, map->strip_size);
+ first_column = r5or6_first_column = tmpdiv;
+ tmpdiv = r5or6_last_row_offset;
+ (void) do_div(tmpdiv, map->strip_size);
+ r5or6_last_column = tmpdiv;
+#else
+ first_row_offset = r5or6_first_row_offset =
+ (u32)((first_block % stripesize) %
+ r5or6_blocks_per_row);
+
+ r5or6_last_row_offset =
+ (u32)((last_block % stripesize) %
+ r5or6_blocks_per_row);
+
+ first_column = r5or6_first_column =
+ r5or6_first_row_offset / map->strip_size;
+ r5or6_last_column =
+ r5or6_last_row_offset / map->strip_size;
+#endif
+ if (r5or6_first_column != r5or6_last_column)
+ return IO_ACCEL_INELIGIBLE;
+
+ /* Request is eligible */
+ map_row = ((u32)(first_row >> map->parity_rotation_shift)) %
+ map->row_cnt;
+
+ map_index = (first_group *
+ (map->row_cnt * total_disks_per_row)) +
+ (map_row * total_disks_per_row) + first_column;
+ break;
+ default:
+ return IO_ACCEL_INELIGIBLE;
}
+
disk_handle = dd[map_index].ioaccel_handle;
disk_block = map->disk_starting_blk + (first_row * map->strip_size) +
(first_row_offset - (first_column * map->strip_size));