/*
 *   Copyright (c) International Business Machines  Corp., 2001
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 * Module: mdregmgr
 * File: raid5_mgr.c
 *
 * Description: This file contains all of the required engine-plugin APIs
 *              for the Raid5 MD region manager.
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <time.h>
#include <plugin.h>
#include <sys/ioctl.h>
#define MY_PLUGIN raid5_plugin
#include "md.h"
#include "xor.h"
#include "raid5_mgr.h"
#include "raid5_discover.h"



/* Global variables */
static engine_mode_t        open_mode;

kill_sectors_t * kill_sector_list_head = NULL;


/* Forward references */

static int raid5_write( storage_object_t * region,
			lsn_t              lsn,
			sector_count_t     count,
			void             * buffer );


/* Function: raid5_setup_evms_plugin
 *
 *  This function gets called shortly after the plugin is loaded by the
 *  Engine. It performs all tasks that are necessary before the initial
 *  discovery pass.
 */
static int raid5_setup_evms_plugin( engine_mode_t        mode,
				    engine_functions_t * functions) {
	int rc = 0;

	/* Parameter check */
	if (!functions) {
		return EINVAL;
	}

	open_mode = mode;
	EngFncs = functions;

	LOG_ENTRY;
	rc = md_register_name_space();

	if (rc != 0) {
		LOG_SERIOUS("Failed to register the MD name space.\n");
	}

	RETURN(rc);
}


/****** Region Checking Functions ******/


/* All of the following md_can_ functions return 0 if they are able to
 * perform the specified action, or non-zero if they cannot.
 */


/* Function: raid5_can_delete
 *
 *  Can we remove the specified MD logical volume
 */
static int raid5_can_delete( storage_object_t * region ) {

	LOG_ENTRY;
	RETURN(0);
}


/* Function: raid5_can_expand
 *
 * RAID5 cannot expand.  Don't add this object to the expand point list and
 * don't pass it down to any child objects.  Just return 0, smile, and be happy.
 */
static int raid5_can_expand( storage_object_t * region,
			     u_int64_t        * expand_limit,
			     dlist_t            expansion_points ) {
	LOG_ENTRY;
	RETURN(0);
}


/* Function: raid5_can_expand_by
 *
 * RAID5 cannot expand by any amount.  Return an error.
 */

static int raid5_can_expand_by( storage_object_t * region,
				u_int64_t        * size ) {
	LOG_ENTRY;

	if (region->plugin != raid5_plugin) {
		LOG_ERROR("Region %s is not owned by MD.\n", region->name);
		RETURN(EINVAL);
	}

	RETURN(ENOSYS);
}


/* Function: raid5_can_shrink
 *
 * RAID5 cannot shrink.  Don't add this object to the shrink point list and
 * don't pass it down to any child objects.  Just return 0, smile, and be happy.
 */
static int raid5_can_shrink( storage_object_t * region,
			     u_int64_t        * shrink_limit,
			     dlist_t            shrink_points ) {
	LOG_ENTRY;
	RETURN(0);
}


/* Function: raid5_can_shrink_by
 *
 * RAID5 cannot shrink by any amount.  Return an error
 */
static int raid5_can_shrink_by( storage_object_t * region,
				u_int64_t        * size ) {
	LOG_ENTRY;
	RETURN(ENOSYS);
}


/* Function: raid5_can_move
 *
 */
static int raid5_can_move( storage_object_t * region ) {

	LOG_ENTRY;
	LOG("Not yet implemented\n");
	RETURN(ENOSYS);
}


/* Function: raid5_can_set_volume
 *
 *   Sure.  RAID5 doesn't care.
 */
static int raid5_can_set_volume( storage_object_t * region,
				 boolean            flag ) {
	LOG_ENTRY;
	RETURN(0);
}


/* Function: raid5_discover
 *
 *  Examine all disk segments and find MD PVs. Assemble volume groups
 *  and export all MD logical volumes as EVMS regions.
 *
 *  All newly created regions must be added to the output list, and all
 *  segments from the input list must either be claimed or moved to the
 *  output list.
 */
static int raid5_discover( dlist_t input_list,
			   dlist_t output_list,
			   boolean final_call ) {
	int count = 0;

	LOG_ENTRY;

	/* Parameter check */
	if (!input_list || !output_list) {
		RETURN(EFAULT);
	}

	md_discover_volumes(input_list, output_list);
	LOG_DETAILS("PV discovery complete.\n");

	/* LV discovery and exporting */
	raid5_discover_regions(output_list, &count, final_call);
	LOG_DETAILS("Volume discovery complete.\n");

	RETURN(count);
}


/*
 * Input: a 'big' sector number,
 * Output: index of the data and parity disk, and the sector # in them.
 */
static lsn_t raid5_compute_sector(lsn_t vol_sector,
				  unsigned int raid_disks, unsigned int data_disks,
				  unsigned int * dd_idx, unsigned int * pd_idx,
				  raid5_conf_t * conf) {

	unsigned long long stripe;
	unsigned long long chunk_number;
	lsn_t              chunk_offset;
	lsn_t              new_sector;
	unsigned long long sectors_per_chunk = (unsigned long long) (conf->chunk_size >> 9);

	LOG_ENTRY;

	/*
	 * Compute the chunk number and the sector offset inside the chunk
	 */
	chunk_number = (unsigned long long) (vol_sector / sectors_per_chunk);
	chunk_offset = (lsn_t) (vol_sector % sectors_per_chunk);

	/*
	 * Compute the stripe number
	 */
	stripe = (unsigned long long) (chunk_number / data_disks);

	/*
	 * Compute the data disk and parity disk indexes inside the stripe
	 */
	*dd_idx = chunk_number % data_disks;

	/*
	 * Select the parity disk based on the user selected algorithm.
	 */
	if (conf->level == 4)
		*pd_idx = data_disks;
	else switch (conf->algorithm) {
		case ALGORITHM_LEFT_ASYMMETRIC:
			*pd_idx = data_disks - stripe % raid_disks;
			if (*dd_idx >= *pd_idx)
				(*dd_idx)++;
			break;
		case ALGORITHM_RIGHT_ASYMMETRIC:
			*pd_idx = stripe % raid_disks;
			if (*dd_idx >= *pd_idx)
				(*dd_idx)++;
			break;
		case ALGORITHM_LEFT_SYMMETRIC:
			*pd_idx = data_disks - stripe % raid_disks;
			*dd_idx = (*pd_idx + 1 + *dd_idx) % raid_disks;
			break;
		case ALGORITHM_RIGHT_SYMMETRIC:
			*pd_idx = stripe % raid_disks;
			*dd_idx = (*pd_idx + 1 + *dd_idx) % raid_disks;
			break;
		default:
			LOG_WARNING("raid5: unsupported algorithm %d\n", conf->algorithm);
			break;
		}

	/*
	 * Finally, compute the new sector number.
	 */
	new_sector = (lsn_t) (stripe * sectors_per_chunk + chunk_offset);
	LOG_PROC("Exiting: new sector is %llu.\n", new_sector);
	return new_sector;
}


static int get_child_run( md_volume_t       * volume,
			  lsn_t               lsn,
			  sector_count_t      count,
			  storage_object_t ** child_object,
			  lsn_t             * child_lsn,
			  sector_count_t    * child_count) {

	raid5_conf_t * conf = mdvol_to_conf(volume);
	unsigned int data_disk_index;
	unsigned int parity_disk_index;
	sector_count_t sectors_per_chunk = conf->chunk_size >> EVMS_VSECTOR_SIZE_SHIFT;

	LOG_ENTRY;

	*child_lsn = raid5_compute_sector(lsn,
					  conf->raid_disks, conf->raid_disks - 1,
					  &data_disk_index, &parity_disk_index,
					  conf);

	*child_object = conf->disks[data_disk_index].dev;

	*child_count = min(count, sectors_per_chunk - (*child_lsn & (sectors_per_chunk - 1)));

	RETURN(0);
}



/****** Region Functions ******/


static int raid5_get_create_options( option_array_t * options,
				     char          ** spare_disk,
				     unsigned int   * chunk_size,
				     unsigned int   * raid_level,
				     unsigned int   * parity_algorithm ) {
	int i;
	int rc = 0;

	LOG_ENTRY;

	for (i = 0; i < options->count; i++) {

		if (options->option[i].is_number_based) {

			switch (options->option[i].number) {
			
			case MD_OPTION_SPARE_DISK_INDEX:
				/*
				 * Not worth validation, will catch errors when
				 * we try to find the original.
				 */
				*spare_disk = options->option[i].value.s;
				break;

			case MD_OPTION_CHUNK_SIZE_INDEX:
				*chunk_size = options->option[i].value.ui32;
				break;

			case MD_OPTION_RAID_LEVEL_INDEX:
				if (strcmp(options->option[i].value.s, RAID4_LEVEL_NAME) == 0) {
					*raid_level = 4;
				} else if (strcmp(options->option[i].value.s, RAID5_LEVEL_NAME) == 0) {
					*raid_level = 5;
				}
				break;

			case MD_OPTION_PARITY_ALGORITHM_INDEX:
				if (strcmp(options->option[i].value.s, ALGORITHM_LEFT_ASYMMETRIC_NAME) == 0) {
					*parity_algorithm = ALGORITHM_LEFT_ASYMMETRIC;
				} else if (strcmp(options->option[i].value.s, ALGORITHM_RIGHT_ASYMMETRIC_NAME) == 0) {
					*parity_algorithm = ALGORITHM_RIGHT_ASYMMETRIC;
				} else if (strcmp(options->option[i].value.s, ALGORITHM_LEFT_SYMMETRIC_NAME) == 0) {
					*parity_algorithm = ALGORITHM_LEFT_SYMMETRIC;
				} else if (strcmp(options->option[i].value.s, ALGORITHM_RIGHT_SYMMETRIC_NAME) == 0) {
					*parity_algorithm = ALGORITHM_RIGHT_SYMMETRIC;
				}
				break;

			default:
				break;

			}

		} else {
			if (strcmp(options->option[i].name, MD_OPTION_SPARE_DISK_NAME) == 0) {
				*spare_disk = options->option[i].value.s;
			} else if (strcmp(options->option[i].name, MD_OPTION_CHUNK_SIZE_NAME) == 0) {
				*chunk_size = options->option[i].value.ui32;

			} else if (strcmp(options->option[i].name, MD_OPTION_RAID_LEVEL_NAME) == 0) {
				if (strcmp(options->option[i].value.s, RAID4_LEVEL_NAME) == 0) {
					*raid_level = 4;
				} else if (strcmp(options->option[i].value.s, RAID5_LEVEL_NAME) == 0) {
					*raid_level = 5;
				}

			} else if (strcmp(options->option[i].name, MD_OPTION_PARITY_ALGORITHM_NAME) == 0) {
				if (strcmp(options->option[i].value.s, ALGORITHM_LEFT_ASYMMETRIC_NAME) == 0) {
					*parity_algorithm = ALGORITHM_LEFT_ASYMMETRIC;
				} else if (strcmp(options->option[i].value.s, ALGORITHM_RIGHT_ASYMMETRIC_NAME) == 0) {
					*parity_algorithm = ALGORITHM_RIGHT_ASYMMETRIC;
				} else if (strcmp(options->option[i].value.s, ALGORITHM_LEFT_SYMMETRIC_NAME) == 0) {
					*parity_algorithm = ALGORITHM_LEFT_SYMMETRIC;
				} else if (strcmp(options->option[i].value.s, ALGORITHM_RIGHT_SYMMETRIC_NAME) == 0) {
					*parity_algorithm = ALGORITHM_RIGHT_SYMMETRIC;
				}
			}
		}
	}

	RETURN(rc);
}


/* get the list of objects, search for the one requested.  If found, RETURN the top object
 otherwise RETURN NULL */
static storage_object_t * find_object( char * name ) {

	int rc = 0;
	storage_object_t * object = NULL;
	dlist_t objects;
	int size, tag;

	LOG_ENTRY;
	if (!name) {
		RETURN(NULL);
	}

	/* Gget the list of objects. */
	rc = EngFncs->get_object_list(DISK | SEGMENT | REGION,
				 DATA_TYPE,
				 NULL,
				 VALID_INPUT_OBJECT,
				 &objects);
	if (!rc) {
		while (BlindExtractObject(objects, &size, (TAG *) &tag, NULL, (void**)&object)==0) {
			if (!strncmp(object->name, name, EVMS_VOLUME_NAME_SIZE + 1)) {
				break;
			}
		}
		DestroyList(&objects, FALSE);
	} else {
		LOG_ERROR("Error getting object list = %d....\n",rc);
	}
	RETURN(object);
}

/* Function: raid5_create
 *
 *  Create a new MD volume.
 */
static int raid5_create( dlist_t          objects,
			 option_array_t * options,
			 dlist_t          new_region_list ) {
	md_volume_t * volume = NULL;
	storage_object_t * object;
	int nr_disks;
	unsigned long size = -1;
	int tag, waste;
	int i, spare_disks=0, spare_index = 0, index = 0;
	int rc = 0;
	mdp_disk_t disk;
	storage_object_t * spare=NULL;
	char * spare_disk = NULL;
	int chunk_size = MD_DEFAULT_CHUNK_SIZE;
	unsigned int raid_level = 5;
	unsigned int parity_algorithm = ALGORITHM_LEFT_SYMMETRIC;

	LOG_ENTRY;

	/* Parameter check */
	if (!objects || !options || !new_region_list) {
		RETURN(EFAULT);
	}

	rc = GetListSize(objects, &nr_disks);
	if (rc) {
		LOG_CRITICAL("DLIST error getting number of objects rc = %d.\n",rc);
		RETURN(EINVAL);
	}

	if (nr_disks > MAX_MD_DEVICES) {
		LOG_ERROR("Too many objects (%d) given. Maximum is %d.\n", nr_disks, MAX_MD_DEVICES);
		RETURN(EINVAL);
	}

	if (md_allocate_memory((void**)&volume, sizeof(md_volume_t) )) {
		LOG_CRITICAL("Memory error new volume structure.\n");
		RETURN(ENOMEM);
	}

	while (!(rc = BlindExtractObject(objects, &waste, (TAG *)&tag, NULL, (void *)&object))) {
		size = min(size, object->size);	 /* Track smallest object for super block */
		volume->child_object[index] = object;
		index ++;
	}

	raid5_get_create_options(options, &spare_disk, &chunk_size, &raid_level, &parity_algorithm);

	if (spare_disk) {
		spare = find_object( spare_disk);
	}
	if (spare) {
		size = min(size, spare->size);	/* Track smallest object for super block */
		volume->child_object[index] = spare;
		nr_disks++;
		spare_disks = 1;
		spare_index = index;
	}
	disk.number = 0;
	disk.raid_disk = 0;
	disk.state = (1 << MD_DISK_ACTIVE) | (1 << MD_DISK_SYNC);

	size = MD_CHUNK_ALIGN_NEW_SIZE_BLOCKS(chunk_size, (size << EVMS_VSECTOR_SIZE_SHIFT) / BLOCK_SIZE);

	md_create_first_superblock(volume, disk, raid_level, chunk_size, size, nr_disks, spare_disks, 0);

	volume->super_block->level = raid_level;
	if (raid_level == 5) {
		volume->super_block->layout = parity_algorithm;
	} else {
		volume->super_block->layout = 0;
	}

	if (spare) {
		/* Set the state to inactive for the spare disk. */
		volume->super_block->disks[spare_index].state = 0;
	}

	for (i = 0; i < nr_disks; i++) {
		rc = md_clone_superblock(volume, i);
	}

	volume->personality = RAID5;
	volume->nr_disks = nr_disks;
	volume->next = volume_list_head;
	volume_list_head = volume;
	volume->removed_disks = CreateList();
	volume->added_disks = CreateList();

	rc = raid5_create_region(volume, new_region_list, TRUE);
	RETURN(rc);
}


static int forward_kill_sectors() {

	int rc = 0;
	kill_sectors_t * ks = kill_sector_list_head;
	storage_object_t * child_object;
	lsn_t              child_lsn;
	sector_count_t     child_count;

	LOG_ENTRY;

	while ((rc == 0) && (ks != NULL)) {
		md_volume_t * volume = (md_volume_t *) ks->region->private_data;

		while ((rc == 0) && (ks->count > 0)) {
			get_child_run(volume, ks->lsn, ks->count,
				      &child_object, &child_lsn, &child_count);

			rc = KILL_SECTORS(child_object, child_lsn, child_count);

			if (rc == 0) {
				ks->count -= child_count;
				ks->lsn += child_count;
			}
		}

		kill_sector_list_head = ks->next;
		free(ks);
		ks = kill_sector_list_head;
	}

	RETURN(rc);
}


/* Function: raid5_delete
 *
 *  Remove the specified region and consolidate all of its space into
 *  the appropriate freespace region.
 */
static int raid5_delete( storage_object_t * region,
			 dlist_t            children ) {

	int     rc;
	md_volume_t * volume = (md_volume_t *) region->private_data;
	raid5_conf_t * conf = mdvol_to_conf(volume);

	LOG_ENTRY;

	/* Check that this region can be removed. */
	if ((rc = raid5_can_delete(region))) {
		RETURN(rc);
	}
	volume = region->private_data;

	rc = forward_kill_sectors();

	if (rc == 0) {
		/* Remove the parent/child associations with the PVs. */
		md_clear_child_list(region, children);

		/* Delete the volume. */
		free(conf);
		md_delete_volume(volume);
		EngFncs->free_region(region);
	}

	RETURN(rc);
}


/* Function: raid5_expand
 * RAID5 cannot be expanded.
 */

static int raid5_expand( storage_object_t    * region,
			 storage_object_t    * expand_object,
			 dlist_t               input_objects,
			 option_array_t      * options ) {
	LOG_ENTRY;
	RETURN(ENOSYS);
}


/* Function: raid5_shrink
 * RAID5 cannot be shrunk.
 */
static int raid5_shrink( storage_object_t * region,
			 storage_object_t * shrink_object,
			 dlist_t            input_objects,
			 option_array_t   * options ) {
	LOG_ENTRY;
	RETURN(ENOSYS);
}


/* Function: raid5_move
 */
static int raid5_move( storage_object_t * source,
		       storage_object_t * target,
		       option_array_t   * options ) {
	LOG_ENTRY;
	LOG("Not yet implemented\n");
	RETURN(ENOSYS);
}


/* Function: raid5_set_volume
 *
 *  MD doesn't really care if its regions are made into volumes.
 */
static void raid5_set_volume( storage_object_t    * region,
			      boolean               flag ) {
	LOG_ENTRY;
	LOG_EXIT(0);
}


/* Function: raid5_add_sectors_to_kill_list
 *
 *  The kill_sectors list contains a list of sectors that need to be zeroed
 *  during the next commit. This function is very similar to read/write.
 */
static int raid5_add_sectors_to_kill_list( storage_object_t * region,
					   lsn_t              lsn,
					   sector_count_t     count ) {

	int              rc = 0;
	md_volume_t    * volume = (md_volume_t *)region->private_data;
	kill_sectors_t * ks;

	LOG_ENTRY;

	if (volume->flags & MD_CORRUPT) {
		MESSAGE("MD Object %s is corrupt, writing data is not allowed\n ",volume->name);
		RETURN(EIO);
	}
	if ((lsn + count) > region->size) {
		LOG_ERROR("Attempt to write past end of region %s sector=%d\n ",volume->name,lsn+count);
		RETURN(EINVAL);
	}

	ks = malloc(sizeof(kill_sectors_t));

	if (ks != NULL) {

		ks->region = region;
		ks->lsn    = lsn;
		ks->count  = count;

		ks->next = kill_sector_list_head;
		kill_sector_list_head = ks;

		/*
		 * Mark the region dirty so that it will get called to commit
		 * the kill sectors.
		 */
		region->flags |= SOFLAG_DIRTY;

	} else {
		rc = ENOMEM;
	}

	RETURN(rc);
}


/*
 * Process the kill sectors list.
 */
static int kill_sectors(void) {

	int rc = 0;
	kill_sectors_t * ks;
	unsigned char  * buffer = NULL;
	sector_count_t   buffer_size = 0;

	LOG_ENTRY;

	/*
	 * Copy the kill sector list head and NULL out the gloabal variable.
	 * This function uses raid5_write() to write out the kill sectors,
	 * but raid5_write() has a check to write kill sectors before it does
	 * any writing.  We could end up in infinite recursion between
	 * kill_sectors() and raid5_write().  raid5_write() has a check to
	 * see if there are any kill sectors on the global list.  By having
	 * this function remove the kill sectors from the global list the
	 * recursion is stopped.
	 */
	ks = kill_sector_list_head;
	kill_sector_list_head = NULL;

	while ((rc == 0) && (ks != NULL)) {
		if (buffer_size < ks->count) {
			if (buffer != NULL) {
				free(buffer);
			}
			buffer = calloc(1, EVMS_VSECTOR_SIZE * ks->count);

			if (buffer != NULL) {
				buffer_size = ks->count;
			} else {
				buffer_size = 0;
				rc = ENOMEM;
			}
		}

		if (rc == 0) {
			kill_sectors_t * ks_prev = ks;

			LOG_DEBUG("Killing %lld sectors on %s at sector offset %lld.\n", ks->count, ks->region->name, ks->lsn);
			rc = raid5_write(ks->region, ks->lsn, ks->count, buffer);

			ks = ks->next;
			free(ks_prev);
		}
	}

	RETURN(rc);
}


/* Function: raid5_commit_changes
 *
 *  All other commit operations are done in commit_container_changes.
 */
static int raid5_commit_changes( storage_object_t * region,
				 uint               phase ) {
	md_volume_t * volume = (md_volume_t *)region->private_data;
	int         rc = 0;

	LOG_ENTRY;

	/* Make sure this region belongs to MD, and is dirty. */
	if (region->plugin != raid5_plugin) {
		LOG_ERROR("Region %s does not belong to MD.\n", region->name);
		RETURN(EINVAL);
	}
	if (!(region->flags & SOFLAG_DIRTY)) {
		LOG_WARNING("Region %s is not dirty - not committing.\n", region->name);
		RETURN(0);
	}

	switch (phase) {
	case SETUP:
		rc = md_process_modify_list(volume, EVMS_MD_REMOVE);
		rc = md_process_modify_list(volume, EVMS_MD_ADD);
		break;
	case FIRST_METADATA_WRITE:
		kill_sectors();
		rc = md_write_sbs_to_disk(volume);		 // write super blocks
		region->flags &= ~SOFLAG_DIRTY;			 // mark clean only after all phases done
		break;
	case POST_REDISCOVER:
	default :
		break;
	}


	RETURN(rc);
}


/* Function: raid5_get_option_count
 *
 *  Determine the type of Task that is being performed, and return
 *  the number of options that are available for that Task.
 */
static int raid5_get_option_count( task_context_t * task ) {
	int count = 0;

	LOG_ENTRY;

	switch (task->action) {
	case EVMS_Task_Create:
		count = MD_CREATE_OPTIONS_COUNT;
		break;

	case EVMS_Task_Set_Info:
		count = MD_SET_INFO_OPTIONS_COUNT;
		break;

	default:
		count = -1;
		break;
	}

	RETURN(count);
}


/* Get a list of spare disks in the array. */
static int get_spare_list( value_list_t ** value_list, task_context_t * context) {
	int rc = 0;
	int count, i, j;
	storage_object_t * region = context->object;
	md_volume_t * volume = (md_volume_t *)region->private_data;

	LOG_ENTRY;

	if (*value_list) {
		for (i = 0; i < (*value_list)->count; i++) {
			if ((*value_list)->value[i].s) {
				EngFncs->engine_free((*value_list)->value[i].s);
			}
		}
		EngFncs->engine_free(*value_list);
	}
	*value_list = NULL;

	count = volume->super_block->spare_disks + 1; /* Plus 1 for 'None' selection */
	/*
	 * If there is only one spare and the array is in degrade mode, then
	 * the spare cannot be removed.
	 */
	if (volume->super_block->spare_disks <= 1) {
		if ((volume->super_block->active_disks != volume->super_block->raid_disks) ||
		    ((context->option_descriptors->option[MD_OPTION_MARK_FAULTY_INDEX].value.s != NULL) &&
		     (strcmp(context->option_descriptors->option[MD_OPTION_MARK_FAULTY_INDEX].value.s, MD_NO_SELECTION) != 0))){
			count = 1;
		}
	}

	*value_list = EngFncs->engine_alloc((count - 1) * sizeof(value_t) + sizeof(value_list_t));
	if (*value_list) {
		(*value_list)->count = count;
		i=0;
		SET_STRING((*value_list)->value[i].s, MD_NO_SELECTION);
		i++;

		if (count > 1) {
			for (j = 0; j < MAX_MD_DEVICES; j++ ) {
				/* Check for null object, if missing, skip. */
				if (volume->child_object[j]) {
					if ((volume->super_block->disks[j].state & ~(1 << MD_DISK_NEW)) == 0) {
						(*value_list)->value[i].s = EngFncs->engine_alloc(strlen(volume->child_object[j]->name) + 1);
						strcpy((*value_list)->value[i].s, volume->child_object[j]->name);
						i++;
					}
				}
			}
		}
	}

	RETURN(rc);
}


/* Get a list active disks to put in the remove active list. */
static int get_active_list( value_list_t ** value_list, task_context_t * context) {
	int rc = 0;
	int count, i, j;
	BOOLEAN can_remove = FALSE;
	storage_object_t * region = context->object;
	md_volume_t * volume = (md_volume_t *)region->private_data;

	LOG_ENTRY;

	if (*value_list) {
		for (i = 0; i < (*value_list)->count; i++) {
			if ((*value_list)->value[i].s) {
				EngFncs->engine_free((*value_list)->value[i].s);
			}
		}
		EngFncs->engine_free(*value_list);
	}
	*value_list = NULL;

	/* Region must not be new. */
	/* Must not be running in degrade mode. */
	if (!(region->flags & SOFLAG_NEW) &&
	    (volume->super_block->active_disks == volume->super_block->raid_disks)) {

		/* Must have a spare already in the array, i.e., not new */
                for (i = 0; !can_remove && (i < MAX_MD_DEVICES); i++ ) {
			/* Check for null object, if missing, skip. */
			if (volume->child_object[i]) {
				if (volume->super_block->disks[i].state == 0) {
					/* Is the spare currently marked for removal? */
					if ((context->option_descriptors->option[MD_OPTION_REMOVE_SPARE_INDEX].value.s != NULL) &&
					    (strcmp(volume->child_object[i]->name, context->option_descriptors->option[MD_OPTION_REMOVE_SPARE_INDEX].value.s) != 0)) {
						can_remove = TRUE;
					}
				}
			}
		}
	}

	count = 1;	/* For 'None' */
	if (can_remove) {
		count += volume->super_block->active_disks;
	}
	*value_list = EngFncs->engine_alloc((count - 1) * sizeof(value_t) + sizeof(value_list_t));
	if (*value_list) {
		(*value_list)->count = count;
		i=0;
		SET_STRING((*value_list)->value[i].s,MD_NO_SELECTION);
		i++;

		if (count > 1) {
			for (j = 0; j < MAX_MD_DEVICES; j++ ) {
				/* Check for null object, if missing, skip. */
				if (volume->child_object[j]) {
					if (volume->super_block->disks[j].state & (1 << MD_DISK_ACTIVE)) {
						(*value_list)->value[i].s = EngFncs->engine_alloc(strlen(volume->child_object[j]->name) + 1);
						strcpy((*value_list)->value[i].s, volume->child_object[j]->name);
						i++;
					}
				}
			}
		}
	}

	RETURN(rc);
}


/*
 * Get a list faulty disks to put in the faulty list.
 * This includes those disks which are active but not sync since the kernel
 * treats these just like faulty.
 */
static int get_faulty_list( value_list_t ** value_list, task_context_t * context) {
	int rc = 0;
	int count, i, j;
	storage_object_t * region = context->object;
	md_volume_t * volume = (md_volume_t *)region->private_data;

	LOG_ENTRY;

	if (*value_list) {
		for (i = 0; i < (*value_list)->count; i++) {
			if ((*value_list)->value[i].s) {
				EngFncs->engine_free((*value_list)->value[i].s);
			}
		}
		EngFncs->engine_free(*value_list);
	}
	*value_list = NULL;

	count = volume->super_block->nr_disks + 1; /* Plus 1 for 'None' selection */
	*value_list = EngFncs->engine_alloc((count - 1) * sizeof(value_t) + sizeof(value_list_t));
	if (*value_list) {
		(*value_list)->count = count;
		i = 0;
		SET_STRING((*value_list)->value[i].s,MD_NO_SELECTION);
		i++;
		for (j = 0; j < MAX_MD_DEVICES; j++ ) {
			if (volume->child_object[j]) {
				if ( (volume->super_block->disks[j].state & (1 << MD_DISK_FAULTY)) ||
				     ((volume->super_block->disks[j].state & (1 << MD_DISK_ACTIVE)) &&
				      !(volume->super_block->disks[j].state & (1 << MD_DISK_SYNC) ))) {
					(*value_list)->value[i].s = EngFncs->engine_alloc(strlen(volume->child_object[j]->name) + 1);
					strcpy((*value_list)->value[i].s, volume->child_object[j]->name);
					i++;
				}
			}
		}
	}

	RETURN(rc);
}


/* Get the list of objects on the system that we can use. */
static int get_object_list( value_list_t ** value_list,
			    dlist_t         selected_objects,
			    sector_count_t  min_size) {

	int rc = 0;
	storage_object_t * object;
	dlist_t tmp_list, selected_tmp_list;
	int count, i, tag, size;

	LOG_ENTRY;

	rc = EngFncs->get_object_list(DISK | SEGMENT | REGION,
				      DATA_TYPE,
				      NULL,
				      VALID_INPUT_OBJECT,
				      &tmp_list);

	/*
	 * Loop through the selected objects, removing those objects from
	 * tmp_list.
	 */
	selected_tmp_list = CreateList();
	if (!selected_tmp_list) {
		LOG_ERROR("Error DLIST Create failed, give up \n");
		RETURN(ENOMEM);
	}
	rc = CopyList(selected_tmp_list, selected_objects, InsertAtStart);
	if (rc) {
		LOG_ERROR("Error copying list\n");
	}

	GoToStartOfList(selected_tmp_list);
	while (!BlindExtractObject(selected_tmp_list,&size, (TAG *)&tag, NULL, (void **)&object)) {
		LOG_DETAILS("Object %s selected, removing from spare list\n",object->name);
		rc = DeleteObject(tmp_list, object);
		if (rc) {
			LOG_ERROR("Error removimg object %s from list of top objects rc = %d\n",object->name,rc);
			rc = 0;	  /* Needed to fudge list on modify */
		}
	}
	DestroyList(&selected_tmp_list, FALSE);

	if (*value_list) {
		for (i = 0; i < (*value_list)->count; i++) {
			if ((*value_list)->value[i].s) {
				EngFncs->engine_free((*value_list)->value[i].s);
			}
		}
		EngFncs->engine_free(*value_list);
	}
	*value_list = NULL;

	GetListSize(tmp_list, &count);
	/* Increment count to hold the 'None' selection. */
	count++;
	*value_list = EngFncs->engine_alloc(count * sizeof(value_t) + sizeof(value_list_t));  /* yeah it's too big, but so what */
	if (!rc) {
		i = 0;
		SET_STRING((*value_list)->value[i].s, MD_NO_SELECTION);
		i++;
		while (BlindExtractObject(tmp_list, &size,(TAG *) &tag, NULL, (void**)&object)==0) {
			if (object->size >= min_size) {
				(*value_list)->value[i].s = EngFncs->engine_alloc(strlen(object->name) + 1);
				strcpy((*value_list)->value[i].s, object->name);
				i++;
			}
		}
		(*value_list)->count = i;
	}
	rc = DestroyList(&tmp_list, FALSE);

	RETURN(rc);
}


static int get_raid_level_list(value_list_t * * raid_level_list) {

	int rc = 0;

	LOG_ENTRY;

	*raid_level_list = EngFncs->engine_alloc(sizeof(value_list_t) + sizeof(value_t));

	if (*raid_level_list != NULL) {
		(*raid_level_list)->count = 2;

		SET_STRING((*raid_level_list)->value[0].s, RAID4_LEVEL_NAME);
		SET_STRING((*raid_level_list)->value[1].s, RAID5_LEVEL_NAME);

	} else {
		RETURN(ENOMEM);
	}

	RETURN(rc);
}


static int get_algorithm_list(value_list_t * * algorithm_list) {

	int rc = 0;

	LOG_ENTRY;

	*algorithm_list = EngFncs->engine_alloc(sizeof(value_list_t) + 3 * sizeof(value_t));

	if (*algorithm_list != NULL) {
		(*algorithm_list)->count = 4;

		SET_STRING((*algorithm_list)->value[ALGORITHM_LEFT_ASYMMETRIC].s,  ALGORITHM_LEFT_ASYMMETRIC_NAME);
		SET_STRING((*algorithm_list)->value[ALGORITHM_RIGHT_ASYMMETRIC].s, ALGORITHM_RIGHT_ASYMMETRIC_NAME);
		SET_STRING((*algorithm_list)->value[ALGORITHM_LEFT_SYMMETRIC].s,   ALGORITHM_LEFT_SYMMETRIC_NAME);
		SET_STRING((*algorithm_list)->value[ALGORITHM_RIGHT_SYMMETRIC].s,  ALGORITHM_RIGHT_SYMMETRIC_NAME);

	} else {
		RETURN(ENOMEM);
	}

	RETURN(rc);
}


/* Function: raid5_init_task
 *
 *  Determine the type of Task that is being performed, and set up the
 *  context structure with the appropriate initial values.
 */
static int raid5_init_task( task_context_t * context ) {

	int                  rc = 0;
	dlist_t              tmp_list;
	void               * waste;
	md_volume_t        * volume;
	raid5_conf_t       * conf;
	evms_md_array_info_t md_info = {0};

	LOG_ENTRY;

	/* Parameter check */
	if (!context) {
		RETURN(EFAULT);
	}

	switch (context->action) {
	
	case EVMS_Task_Create:

		context->option_descriptors->count = MD_CREATE_OPTIONS_COUNT;

		/* Spare disk option */
		context->option_descriptors->option[MD_OPTION_SPARE_DISK_INDEX].flags = EVMS_OPTION_FLAGS_NOT_REQUIRED;
		/* Get the list of disks that can be spares. */
		get_object_list((value_list_t **)&context->option_descriptors->option[MD_OPTION_SPARE_DISK_INDEX].constraint.list,
				context->selected_objects,
				0);
		context->option_descriptors->option[MD_OPTION_SPARE_DISK_INDEX].constraint_type = EVMS_Collection_List;
		context->option_descriptors->option[MD_OPTION_SPARE_DISK_INDEX].help = NULL;
		SET_STRING(context->option_descriptors->option[MD_OPTION_SPARE_DISK_INDEX].name, MD_OPTION_SPARE_DISK_NAME );
		context->option_descriptors->option[MD_OPTION_SPARE_DISK_INDEX].size = EVMS_VOLUME_NAME_SIZE + 1;
		SET_STRING(context->option_descriptors->option[MD_OPTION_SPARE_DISK_INDEX].tip, "Object to use as a spare disk in the array" );
		SET_STRING(context->option_descriptors->option[MD_OPTION_SPARE_DISK_INDEX].title, "Spare Disk" );
		context->option_descriptors->option[MD_OPTION_SPARE_DISK_INDEX].type = EVMS_Type_String;
		context->option_descriptors->option[MD_OPTION_SPARE_DISK_INDEX].unit = EVMS_Unit_None;
		context->option_descriptors->option[MD_OPTION_SPARE_DISK_INDEX].value.s = calloc(1, EVMS_VOLUME_NAME_SIZE + 1);

		/* Chunk size option */
		context->option_descriptors->option[MD_OPTION_CHUNK_SIZE_INDEX].flags = 0;
		SET_POWER2_LIST(context->option_descriptors->option[MD_OPTION_CHUNK_SIZE_INDEX].constraint.list, MD_MIN_CHUNK_SIZE, MD_MAX_CHUNK_SIZE);
		context->option_descriptors->option[MD_OPTION_CHUNK_SIZE_INDEX].constraint_type = EVMS_Collection_List;
		context->option_descriptors->option[MD_OPTION_CHUNK_SIZE_INDEX].help = NULL;
		SET_STRING(context->option_descriptors->option[MD_OPTION_CHUNK_SIZE_INDEX].name, MD_OPTION_CHUNK_SIZE_NAME );
		context->option_descriptors->option[MD_OPTION_CHUNK_SIZE_INDEX].size = sizeof(u_int32_t);
		SET_STRING(context->option_descriptors->option[MD_OPTION_CHUNK_SIZE_INDEX].tip, "Size of the chunks in the RAID array" );
		SET_STRING(context->option_descriptors->option[MD_OPTION_CHUNK_SIZE_INDEX].title, "Chunk size" );
		context->option_descriptors->option[MD_OPTION_CHUNK_SIZE_INDEX].type = EVMS_Type_Unsigned_Int32;
		context->option_descriptors->option[MD_OPTION_CHUNK_SIZE_INDEX].unit = EVMS_Unit_Kilobytes;
		context->option_descriptors->option[MD_OPTION_CHUNK_SIZE_INDEX].value.ui32 = MD_DEFAULT_CHUNK_SIZE;

		/* RAID level option */
		context->option_descriptors->option[MD_OPTION_RAID_LEVEL_INDEX].flags = 0;
		get_raid_level_list(&context->option_descriptors->option[MD_OPTION_RAID_LEVEL_INDEX].constraint.list);
		context->option_descriptors->option[MD_OPTION_RAID_LEVEL_INDEX].constraint_type = EVMS_Collection_List;
		context->option_descriptors->option[MD_OPTION_RAID_LEVEL_INDEX].help = NULL;
		SET_STRING(context->option_descriptors->option[MD_OPTION_RAID_LEVEL_INDEX].name, MD_OPTION_RAID_LEVEL_NAME);
		context->option_descriptors->option[MD_OPTION_RAID_LEVEL_INDEX].size = 19;
		SET_STRING(context->option_descriptors->option[MD_OPTION_RAID_LEVEL_INDEX].tip, "RAID Level -- RAID4 or RAID5" );
		SET_STRING(context->option_descriptors->option[MD_OPTION_RAID_LEVEL_INDEX].title, "RAID level" );
		context->option_descriptors->option[MD_OPTION_RAID_LEVEL_INDEX].type = EVMS_Type_String;
		context->option_descriptors->option[MD_OPTION_RAID_LEVEL_INDEX].unit = EVMS_Unit_None;
		SET_STRING(context->option_descriptors->option[MD_OPTION_RAID_LEVEL_INDEX].value.s, RAID5_LEVEL_NAME);

		/* Parity algorithm option */
		context->option_descriptors->option[MD_OPTION_PARITY_ALGORITHM_INDEX].flags = 0;
		get_algorithm_list(&context->option_descriptors->option[MD_OPTION_PARITY_ALGORITHM_INDEX].constraint.list);
		context->option_descriptors->option[MD_OPTION_PARITY_ALGORITHM_INDEX].constraint_type = EVMS_Collection_List;
		context->option_descriptors->option[MD_OPTION_PARITY_ALGORITHM_INDEX].help = NULL;
		SET_STRING(context->option_descriptors->option[MD_OPTION_PARITY_ALGORITHM_INDEX].name, MD_OPTION_PARITY_ALGORITHM_NAME);
		context->option_descriptors->option[MD_OPTION_PARITY_ALGORITHM_INDEX].size = 19;
		SET_STRING(context->option_descriptors->option[MD_OPTION_PARITY_ALGORITHM_INDEX].tip, "RAID5 Parity algorithm" );
		SET_STRING(context->option_descriptors->option[MD_OPTION_PARITY_ALGORITHM_INDEX].title, "RAID5 Algorithm" );
		context->option_descriptors->option[MD_OPTION_PARITY_ALGORITHM_INDEX].type = EVMS_Type_String;
		context->option_descriptors->option[MD_OPTION_PARITY_ALGORITHM_INDEX].unit = EVMS_Unit_None;
		context->option_descriptors->option[MD_OPTION_PARITY_ALGORITHM_INDEX].value.s = EngFncs->engine_alloc(20);
		if (context->option_descriptors->option[MD_OPTION_PARITY_ALGORITHM_INDEX].value.s != NULL) {
			strcpy(context->option_descriptors->option[MD_OPTION_PARITY_ALGORITHM_INDEX].value.s, ALGORITHM_LEFT_SYMMETRIC_NAME);
		} else {
			RETURN(ENOMEM);
		}

		/* Get a list of all valid input disks, segments, and regions. */
		EngFncs->get_object_list(DISK | SEGMENT | REGION,
					 DATA_TYPE,
					 NULL,
					 VALID_INPUT_OBJECT,
					 &tmp_list);

		/* Move these items to the acceptable objects list. */
		md_transfer_list(tmp_list, context->acceptable_objects);
		DestroyList(&tmp_list, FALSE);

		context->min_selected_objects = 2;
		context->max_selected_objects = MAX_MD_DEVICES;
		break;

	case EVMS_Task_Set_Info:

		volume = (md_volume_t *) context->object->private_data;
		conf = mdvol_to_conf(volume);

		if (md_get_kernel_info(volume,&md_info) == 0) {
			if (md_info.state & EVMS_MD_ARRAY_SYNCING) {
				int    answer = 0;
				char * choice_text[2] = { "OK", NULL};
				EngFncs->user_message(my_plugin, &answer,choice_text,
						      "Object %s is currently being synced.  "
						      "It can not be modified until the sync is complete.\n",
						      volume->name);
				rc = EBUSY;
				break;
			}
		}

		if (raid5_verify_and_fix_array(volume, 0, 0)) {
			int    answer = 0;
			char * choice_text[3] = { "Don't Fix", "Fix", NULL};
			EngFncs->user_message(my_plugin, &answer,choice_text,
					      "Errors have been detected on MD region %s.  "
					      "Check the message display for messages detailing the errors requiring fixing.  "
					      "Selecting \"Fix\" will permanently change the array.  "
					      "If you elect not to fix the region at this time, you may not modify the region.\n",
					      volume->name);
			if (answer) {
				raid5_verify_and_fix_array(volume, 1, 0);
			} else {
				RETURN(EINVAL);
			}
		}

		context->min_selected_objects = 1;
		context->max_selected_objects = 1;
		context->option_descriptors->count = MD_SET_INFO_OPTIONS_COUNT;

		/*
		 * Add the reference object (the region we are modifying) to the
		 * acceptable and selected objects lists to be able to parse
		 * this object out of option lists.
		 */
		InsertObject(context->acceptable_objects, sizeof(storage_object_t),context->object,
			     REGION_TAG, NULL, InsertAtStart, TRUE, &waste);
		InsertObject(context->selected_objects, sizeof(storage_object_t),context->object,
			     REGION_TAG, NULL, InsertAtStart, TRUE, &waste);

		context->option_descriptors->option[MD_OPTION_ADD_SPARE_INDEX].flags = EVMS_OPTION_FLAGS_NOT_REQUIRED;
		/* Get the list of disks that can be spares. */
		get_object_list((value_list_t **)&context->option_descriptors->option[MD_OPTION_ADD_SPARE_INDEX].constraint.list,
				context->selected_objects,
				(volume->super_block->size * BLOCK_SIZE) >> EVMS_VSECTOR_SIZE_SHIFT);
		context->option_descriptors->option[MD_OPTION_ADD_SPARE_INDEX].constraint_type = EVMS_Collection_List;
		context->option_descriptors->option[MD_OPTION_ADD_SPARE_INDEX].help = NULL;
		SET_STRING(context->option_descriptors->option[MD_OPTION_ADD_SPARE_INDEX].name, MD_OPTION_ADD_SPARE_NAME );
		context->option_descriptors->option[MD_OPTION_ADD_SPARE_INDEX].size = EVMS_VOLUME_NAME_SIZE + 1;
		SET_STRING(context->option_descriptors->option[MD_OPTION_ADD_SPARE_INDEX].tip, "Add this object to the array as a spare disk." );
		SET_STRING(context->option_descriptors->option[MD_OPTION_ADD_SPARE_INDEX].title, "Add spare disk" );
		context->option_descriptors->option[MD_OPTION_ADD_SPARE_INDEX].type = EVMS_Type_String;
		context->option_descriptors->option[MD_OPTION_ADD_SPARE_INDEX].unit = EVMS_Unit_None;
		context->option_descriptors->option[MD_OPTION_ADD_SPARE_INDEX].value.s = EngFncs->engine_alloc(EVMS_VOLUME_NAME_SIZE + 1);
		strcpy(context->option_descriptors->option[MD_OPTION_ADD_SPARE_INDEX].value.s, MD_NO_SELECTION);

		context->option_descriptors->option[MD_OPTION_REMOVE_SPARE_INDEX].flags = EVMS_OPTION_FLAGS_NOT_REQUIRED;
		/* Get the list of disks that are spares. */
		get_spare_list((value_list_t **)&context->option_descriptors->option[MD_OPTION_REMOVE_SPARE_INDEX].constraint.list, context);
		context->option_descriptors->option[MD_OPTION_REMOVE_SPARE_INDEX].constraint_type = EVMS_Collection_List;
		context->option_descriptors->option[MD_OPTION_REMOVE_SPARE_INDEX].help = NULL;
		SET_STRING(context->option_descriptors->option[MD_OPTION_REMOVE_SPARE_INDEX].name, MD_OPTION_REMOVE_SPARE_NAME );
		context->option_descriptors->option[MD_OPTION_REMOVE_SPARE_INDEX].size = EVMS_VOLUME_NAME_SIZE + 1;
		SET_STRING(context->option_descriptors->option[MD_OPTION_REMOVE_SPARE_INDEX].tip, "Remove this spare disk from the array." );
		SET_STRING(context->option_descriptors->option[MD_OPTION_REMOVE_SPARE_INDEX].title, "Remove spare disk" );
		context->option_descriptors->option[MD_OPTION_REMOVE_SPARE_INDEX].type = EVMS_Type_String;
		context->option_descriptors->option[MD_OPTION_REMOVE_SPARE_INDEX].unit = EVMS_Unit_None;
		context->option_descriptors->option[MD_OPTION_REMOVE_SPARE_INDEX].value.s = EngFncs->engine_alloc(EVMS_VOLUME_NAME_SIZE + 1);
		strcpy(context->option_descriptors->option[MD_OPTION_REMOVE_SPARE_INDEX].value.s, MD_NO_SELECTION);

		context->option_descriptors->option[MD_OPTION_REMOVE_FAULTY_INDEX].flags = EVMS_OPTION_FLAGS_NOT_REQUIRED;
		/* Get the list of disks that are faulty. */
		get_faulty_list((value_list_t **)&context->option_descriptors->option[2].constraint.list, context);
		context->option_descriptors->option[MD_OPTION_REMOVE_FAULTY_INDEX].constraint_type = EVMS_Collection_List;
		context->option_descriptors->option[MD_OPTION_REMOVE_FAULTY_INDEX].help = NULL;
		SET_STRING(context->option_descriptors->option[MD_OPTION_REMOVE_FAULTY_INDEX].name, MD_OPTION_REMOVE_FAULTY_NAME);
		context->option_descriptors->option[MD_OPTION_REMOVE_FAULTY_INDEX].size = EVMS_VOLUME_NAME_SIZE + 1;
		SET_STRING(context->option_descriptors->option[MD_OPTION_REMOVE_FAULTY_INDEX].tip, "Remove this faulty disk from the array." );
		SET_STRING(context->option_descriptors->option[MD_OPTION_REMOVE_FAULTY_INDEX].title, "Remove faulty disk" );
		context->option_descriptors->option[MD_OPTION_REMOVE_FAULTY_INDEX].type = EVMS_Type_String;
		context->option_descriptors->option[MD_OPTION_REMOVE_FAULTY_INDEX].unit = EVMS_Unit_None;
		context->option_descriptors->option[MD_OPTION_REMOVE_FAULTY_INDEX].value.s = EngFncs->engine_alloc(EVMS_VOLUME_NAME_SIZE + 1);
		strcpy(context->option_descriptors->option[MD_OPTION_REMOVE_FAULTY_INDEX].value.s, MD_NO_SELECTION);

		context->option_descriptors->option[MD_OPTION_MARK_FAULTY_INDEX].flags = EVMS_OPTION_FLAGS_NOT_REQUIRED;
		/* Get the list of disks that are active. */
		get_active_list((value_list_t **)&context->option_descriptors->option[MD_OPTION_MARK_FAULTY_INDEX].constraint.list, context);
		context->option_descriptors->option[MD_OPTION_MARK_FAULTY_INDEX].constraint_type = EVMS_Collection_List;
		context->option_descriptors->option[MD_OPTION_MARK_FAULTY_INDEX].help = NULL;
		SET_STRING(context->option_descriptors->option[MD_OPTION_MARK_FAULTY_INDEX].name, MD_OPTION_MARK_FAULTY_NAME);
		context->option_descriptors->option[MD_OPTION_MARK_FAULTY_INDEX].size = EVMS_VOLUME_NAME_SIZE + 1;
		SET_STRING(context->option_descriptors->option[MD_OPTION_MARK_FAULTY_INDEX].tip, "Mark an active disk faulty so that it will be removed from the array.  The array must not be running in degrade mode and there must be at least one spare disk available to replace it." );
		SET_STRING(context->option_descriptors->option[MD_OPTION_MARK_FAULTY_INDEX].title, "Mark faulty" );
		context->option_descriptors->option[MD_OPTION_MARK_FAULTY_INDEX].type = EVMS_Type_String;
		context->option_descriptors->option[MD_OPTION_MARK_FAULTY_INDEX].unit = EVMS_Unit_None;
		context->option_descriptors->option[MD_OPTION_MARK_FAULTY_INDEX].value.s = EngFncs->engine_alloc(EVMS_VOLUME_NAME_SIZE + 1);
		strcpy(context->option_descriptors->option[MD_OPTION_MARK_FAULTY_INDEX].value.s, MD_NO_SELECTION);

		break;

	default:
		break;
	}

	RETURN(rc);
}


#define PERCENT_WARN_THRESHOLD  5

static void warn_if_big_objects( task_context_t * context ) {

	int rc;
	storage_object_t * obj = NULL;
	storage_object_t * spare = NULL;
	u_int64_t smallest_size = 0;
	uint size;
	TAG tag;

	LOG_ENTRY;

	/* Find the smallest object. */
	GoToStartOfList(context->selected_objects);

	rc = BlindGetObject(context->selected_objects,
			    &size,
			    &tag,
			    NULL,
			    TRUE,
			    (ADDRESS *)&obj);

	if (rc == DLIST_SUCCESS) {
		smallest_size = MD_NEW_SIZE_SECTORS(obj->size);

		rc = NextItem(context->selected_objects);
	}

	while (rc == DLIST_SUCCESS) {

		rc = BlindGetObject(context->selected_objects,
				    &size,
				    &tag,
				    NULL,
				    TRUE,
				    (ADDRESS *)&obj);

		if (rc == DLIST_SUCCESS) {
			smallest_size = min(smallest_size, MD_NEW_SIZE_SECTORS(obj->size));

			rc = NextItem(context->selected_objects);
		}
	}

	/*
	 * If we got a smallest size, then check the size of the spare, if one
	 * is specified and see if it is the smallest.
	 */
	if (smallest_size != 0) {
		if (context->option_descriptors->option[MD_OPTION_SPARE_DISK_INDEX].value.s != NULL) {
			spare = find_object(context->option_descriptors->option[MD_OPTION_SPARE_DISK_INDEX].value.s);

			if (spare != NULL) {
				smallest_size = min(smallest_size, MD_NEW_SIZE_SECTORS(spare->size));
			}
		}
	}

	/*
	 * Now go through the objects again and issue a warnign message for
	 * any object whose size exceeds the threshold over the smallest
	 * object size.
	 */
	if (smallest_size != 0) {
		u_int64_t diffsize;

		GoToStartOfList(context->selected_objects);

		do {
			rc = BlindGetObject(context->selected_objects,
					    &size,
					    &tag,
					    NULL,
					    TRUE,
					    (ADDRESS *) &obj);

			if (rc == DLIST_SUCCESS) {
				diffsize = MD_NEW_SIZE_SECTORS(obj->size) - smallest_size;

				if (diffsize > (smallest_size * PERCENT_WARN_THRESHOLD) / 100) {
					EngFncs->user_message(&raid5_plugin_record,
							      NULL,
							      NULL,
							      "The %s object is %lld KB larger than the smallest object in the RAID array.  "
							      "The extra space will not be used.",
							      obj->name, diffsize * EVMS_VSECTOR_SIZE / 1024);
				}

				rc = NextItem(context->selected_objects);
			}

		} while (rc == DLIST_SUCCESS);

		/*
		 * If we have a spare, check its size too.
		 */
		if (spare != NULL) {
			diffsize = MD_NEW_SIZE_SECTORS(spare->size) - smallest_size;

			if (diffsize > (smallest_size * PERCENT_WARN_THRESHOLD) / 100) {
				EngFncs->user_message(&raid5_plugin_record,
						      NULL,
						      NULL,
						      "The %s object is %lld KB larger than the smallest object in the RAID array.  "
						      "The extra space will not be used.",
						      spare->name, diffsize * EVMS_VSECTOR_SIZE / 1024);
			}
		}
	}

	LOG_EXIT(0);
}

/* Function: raid5_set_option
 *
 *  Determine the type of Task that is being performed. Then examine the
 *  desired option (using the index), and verify that the given value is
 *  appropriate. Reset the value if necessary and possible. Adjust other
 *  options as appropriate.
 */
static int raid5_set_option( task_context_t * context,
			     u_int32_t        index,
			     value_t        * value,
			     task_effect_t  * effect ) {
	int rc = 0;

	LOG_ENTRY;

	/* Parameter check */
	if (!context || !value || !effect) {
		RETURN(EFAULT);
	}

	*effect = 0;

	switch (context->action) {
	
	case EVMS_Task_Create:
		switch (index) {
		
		case MD_OPTION_SPARE_DISK_INDEX:
			/*
			 * Not worth validation, will catch when we try to find
			 * the original.
			 */
			strcpy(context->option_descriptors->option[index].value.s, value->s);
			warn_if_big_objects(context);
			break;

		case MD_OPTION_CHUNK_SIZE_INDEX:
			if ((value->ui32 < MD_MIN_CHUNK_SIZE) ||
			    (value->ui32 > MD_MAX_CHUNK_SIZE)) {
				/* Chunk size is out of bounds. */
				rc = EINVAL;

			} else {
				/*
				 * Chunk size must be a power of 2.
				 * calc_log2 returns -1 if the number is not a
				 * power of 2.
				 */
				if (calc_log2((long) value->ui32) == -1) {
					rc = EINVAL;
				}
			}
			break;

		case MD_OPTION_RAID_LEVEL_INDEX:
			if (strcmp(value->s, RAID4_LEVEL_NAME) == 0) {
				strcpy(context->option_descriptors->option[index].value.s, value->s);
				/*
				 * RAID4 does not have a parity algorithm.
				 * Disable the algorithm option.
				 */
				context->option_descriptors->option[MD_OPTION_PARITY_ALGORITHM_INDEX].flags |= (EVMS_OPTION_FLAGS_NOT_REQUIRED | EVMS_OPTION_FLAGS_INACTIVE);

				*effect |= EVMS_Effect_Reload_Options;

			} else if (strcmp(value->s, RAID5_LEVEL_NAME) == 0) {
				strcpy(context->option_descriptors->option[index].value.s, value->s);
				/*
				 * RAID5 does have a parity algorithm.
				 * Make sure the algorthm option is active and required.
				 */
				context->option_descriptors->option[MD_OPTION_PARITY_ALGORITHM_INDEX].flags &= ~(EVMS_OPTION_FLAGS_NOT_REQUIRED | EVMS_OPTION_FLAGS_INACTIVE);

				*effect |= EVMS_Effect_Reload_Options;

			} else {
				rc = EINVAL;
			}
			break;

		case MD_OPTION_PARITY_ALGORITHM_INDEX:
			if ((strcmp(value->s,ALGORITHM_LEFT_ASYMMETRIC_NAME) == 0)  ||
			    (strcmp(value->s,ALGORITHM_RIGHT_ASYMMETRIC_NAME) == 0) ||
			    (strcmp(value->s,ALGORITHM_LEFT_SYMMETRIC_NAME) == 0)   ||
			    (strcmp(value->s,ALGORITHM_RIGHT_SYMMETRIC_NAME) == 0) ) {
				strcpy(context->option_descriptors->option[index].value.s, value->s);

			} else {
				rc = EINVAL;
			}

			break;

		default:
			break;
		}
		break;

	case EVMS_Task_Set_Info:
		switch (index) {
		case MD_OPTION_ADD_SPARE_INDEX:
		case MD_OPTION_MARK_FAULTY_INDEX:
			strcpy(context->option_descriptors->option[index].value.s, value->s);
			break;

		case MD_OPTION_REMOVE_SPARE_INDEX:
			strcpy(context->option_descriptors->option[index].value.s, value->s);
			/* This option can affect the MArk Faulty option. */
			get_active_list((value_list_t **)&context->option_descriptors->option[MD_OPTION_MARK_FAULTY_INDEX].constraint.list, context);
			*effect |= EVMS_Effect_Reload_Options;
			break;

		case MD_OPTION_REMOVE_FAULTY_INDEX:
			strcpy(context->option_descriptors->option[index].value.s, value->s);
			/* This option can affect the Remove Spare option. */
			get_spare_list((value_list_t **)&context->option_descriptors->option[MD_OPTION_REMOVE_SPARE_INDEX].constraint.list, context);
			*effect |= EVMS_Effect_Reload_Options;
			break;

		default:
			break;
		}

	default:
		break;
	}
	RETURN(rc);
}


/* Function: raid5_set_objects
 *
 *  Determine the type of task, and then validate that the objects on the
 *  "selected" list are valid for that task. If so, adjust the option
 *  descriptor as appropriate.
 */
static int raid5_set_objects( task_context_t * context,
			      dlist_t          declined_objects,
			      task_effect_t  * effect ) {
	int rc = 0;

	LOG_ENTRY;

	/* Parameter check */
	if (!context || !declined_objects || !effect) {
		RETURN(EFAULT);
	}

	switch (context->action) {
	
	case EVMS_Task_Create:
		get_object_list((value_list_t **)&context->option_descriptors->option[0].constraint.list,
				context->selected_objects,
				0);
		warn_if_big_objects(context);
		*effect = EVMS_Effect_Reload_Options;
		break;

	default:
		break;
	}
	RETURN(rc);
}


/* Function: raid5_get_info
 *
 *  Return MD-specific information about the specified region. If the
 *  name field is set, only return the "extra" information pertaining
 *  to that name.
 */
static int raid5_get_info( storage_object_t       * region,
			   char                   * name,
			   extended_info_array_t ** info_array ) {

	md_volume_t * volume = NULL;
	int           rc= 0;

	LOG_ENTRY;

	/* Parameter check */
	if (!info_array) {
		RETURN(EFAULT);
	}

	/* Make sure this is an MD RAID5 region */
	if (region->plugin != raid5_plugin) {
		LOG_ERROR("Region %s is not owned by MD RAID5\n", region->name);
		RETURN(EINVAL);
	}

	volume = region->private_data;

	rc = md_get_info(volume, name, info_array);

	RETURN(rc);
}


static int get_set_info_options(option_array_t * options, char ** add_spare_disk, char ** remove_spare_disk,
				char ** remove_faulty_disk, char ** mark_faulty_disk) {

	int i, rc = 0;

	LOG_ENTRY;

	for (i = 0; i < options->count; i++) {
		switch (options->option[i].number) {
		
		case MD_OPTION_ADD_SPARE_INDEX:
			if (strcmp(MD_NO_SELECTION, options->option[i].value.s)) {
				SET_STRING(*add_spare_disk, options->option[i].value.s);
			}
			break;

		case MD_OPTION_REMOVE_SPARE_INDEX:
			if (strcmp(MD_NO_SELECTION, options->option[i].value.s)) {
				SET_STRING(*remove_spare_disk, options->option[i].value.s);
			}
			break;

		case MD_OPTION_REMOVE_FAULTY_INDEX:
			if (strcmp(MD_NO_SELECTION, options->option[i].value.s)) {
				SET_STRING(*remove_faulty_disk, options->option[i].value.s);
			}
			break;

		case MD_OPTION_MARK_FAULTY_INDEX:
			if (strcmp(MD_NO_SELECTION, options->option[i].value.s)) {
				SET_STRING(*mark_faulty_disk, options->option[i].value.s);
			}
			break;

		default:
			break;



		}
	}
	RETURN(rc);
}


static int add_spare(md_volume_t * volume, char * avail_disk) {

	int                rc = 0;
	storage_object_t * object;
	mdp_disk_t         disk;
	raid5_conf_t     * conf = mdvol_to_conf(volume);
	int                spare_index;

	LOG_ENTRY;

	spare_index = max(volume->nr_disks, volume->super_block->raid_disks);

	object = find_object(avail_disk);

	if ((rc != 0) || (object == NULL)) {
		if (rc != 0) {
			MESSAGE("Error code %d when creating a new superblock for object %s.\n", rc, avail_disk);
		}
		if (object == NULL) {
			MESSAGE("Could not find object %s to add it to the %s array.\n", avail_disk, volume->name);
			if (rc == 0) {
				rc = ENOENT;
			}
		}
		RETURN(rc);
	}

	rc = md_clone_superblock(volume, spare_index);

	if (rc == 0) {
		volume->child_object[spare_index] = object;
		md_append_region_to_object(volume->region, object);

		get_legacy_dev(volume, object->name, &disk.major, &disk.minor);
		disk.number = spare_index;
		disk.raid_disk = spare_index;
		disk.state = 0;

		memcpy(&volume->super_block->disks[spare_index], &disk, sizeof(mdp_disk_t));
		volume->super_block->disks[spare_index].state |= (1 << MD_DISK_NEW);

		md_add_modify_object_to_list(volume, EVMS_MD_ADD, disk.major, disk.minor);

		/* Update the RAID5 private configuration. */
		conf->disks[spare_index].dev = object;
		conf->disks[spare_index].operational = 0;
		conf->disks[spare_index].number = disk.number;
		conf->disks[spare_index].raid_disk = disk.raid_disk;
		conf->disks[spare_index].write_only = 0;
		conf->disks[spare_index].spare = 1;
		conf->disks[spare_index].used_slot = 1;

		conf->spare_disks++;
		if (conf->spare.used_slot == 0) {
			conf->spare = conf->disks[spare_index];
		}

		/* Update the master superblock fields. */
		volume->super_block->spare_disks++;
		volume->super_block->working_disks++;
		volume->super_block->nr_disks++;

		/* Update the volume counts. */
		volume->nr_disks++;

		md_rediscover_volumes_for_region(volume->region);
	}

	RETURN(rc);
}


static int remove_spare(md_volume_t * volume, char * spare_disk) {

	int            rc = 0;
	int            i, k;
	BOOLEAN        found;
	raid5_conf_t * conf = mdvol_to_conf(volume);

	LOG_ENTRY;

	for (i = 0, found = FALSE; !found && (i < MAX_MD_DEVICES); i++ ) {
		if (volume->child_object[i] == NULL) {
			continue;
		}

		if (!strcmp(spare_disk, volume->child_object[i]->name)) {
			found = TRUE;

			/* Found the disk to remove.  Make sure it is a spare. */
			if ((volume->super_block->disks[i].state & ~(1 << MD_DISK_NEW)) != 0) {
				RETURN(EINVAL);
			}

			md_add_modify_object_to_list(volume, EVMS_MD_REMOVE,
						     volume->super_block->disks[i].major,
						     volume->super_block->disks[i].minor);

			md_remove_region_from_object(volume->region, volume->child_object[i]);
			KILL_SECTORS(volume->child_object[i],
				     MD_NEW_SIZE_SECTORS(volume->child_object[i]->size),
				     MD_RESERVED_SECTORS);
			volume->child_object[i] = NULL;
			md_deallocate_memory(volume->super_array[i]);
			volume->super_array[i] = NULL;

			/*
			 * Collapse super array, object array, and configuration
			 * array, and update the moved disks with their new
			 * numbers.
			 */
			for (k = i; k < volume->super_block->nr_disks - 1; k++) {
				volume->super_array[k]= volume->super_array[k+1];
				volume->child_object[k] = volume->child_object[k+1];
				volume->super_block->disks[k]= volume->super_block->disks[k+1];
				volume->super_block->disks[k].number = k;
				volume->super_block->disks[k].raid_disk = k;
				conf->disks[k] = conf->disks[k+1];
				conf->disks[k].number = k;
				conf->disks[k].raid_disk = k;
			}

			/* Zero out now empty disk entry. */
			memset(&volume->super_block->disks[k], 0, sizeof(mdp_disk_t));

			volume->super_block->working_disks--;
			volume->super_block->spare_disks--;
			volume->super_block->nr_disks--;

			volume->nr_disks--;

			conf->disks[k].dev = NULL;
			conf->disks[k].operational = 0;
			conf->disks[k].used_slot = 0;

			/*
			 * The disk that now occupies the old spare's slot is
			 * either a different spare or nothing.  Either way,
			 * copy it to the spare disk in the configuration.
			 */
			conf->spare = conf->disks[i];

			md_rediscover_volumes_for_region(volume->region);
		}
	}

	if (!found) {
		/* Didn't find a disk with that name in the array. */
		LOG_ERROR("Disk %s was not found in array %s.\n", spare_disk, volume->name);
		rc = EINVAL;
	}

	RETURN(rc);
}


static int remove_faulty(md_volume_t * volume, char * faulty_disk) {

	int      i,k, rc= 0;
	BOOLEAN        found;
	raid5_conf_t * conf = mdvol_to_conf(volume);

	LOG_ENTRY;

	for (i = 0, found = FALSE; !found && (i < MAX_MD_DEVICES); i++ ) {
		if (volume->child_object[i] == NULL) {
			continue;
		}

		if (!strcmp(faulty_disk, volume->child_object[i]->name)) {
			found = TRUE;

			/* Found the disk to remove.  Make sure it is faulty.*/
			if (!(volume->super_block->disks[i].state & ((1 << MD_DISK_FAULTY) | (1 << MD_DISK_REMOVED)))) {
				RETURN(EINVAL);
			}

			md_add_modify_object_to_list(volume, EVMS_MD_REMOVE,
						     volume->super_block->disks[i].major,
						     volume->super_block->disks[i].minor);

			md_remove_region_from_object(volume->region, volume->child_object[i]);
			KILL_SECTORS(volume->child_object[i],
				     MD_NEW_SIZE_SECTORS(volume->child_object[i]->size),
				     MD_RESERVED_SECTORS);
			volume->child_object[i] = NULL;
			md_deallocate_memory(volume->super_array[i]);
			volume->super_array[i] = NULL;

			if (i < volume->super_block->raid_disks) {
				/*
				 * The faulty disk is part of the RAID array.
				 * Make sure its entry is marked faulty and
				 * removed.  The code above wiped out the
				 * superblock on the faulty disk so that the
				 * disk will no longer be discovered as part of
				 * this array.  The volume's nr_disks and
				 * failed_disks counts stay the same since RAID5
				 * counts faulty disks in the array as being
				 * disks anyway.
				 */
				volume->super_block->disks[i].state = (1 << MD_DISK_FAULTY) | (1 << MD_DISK_REMOVED);

			} else {
				/* The faulty disk is past the end of the RAID
				 * array (e.g.  it was a failed disk that was
				 * replaced by a spare disk and the failed disk
				 * was moved to the former spare disk slot).
				 * Remove the disk and collapse any remaining
				 * entries.
				 */
				for (k = i; k < volume->super_block->nr_disks - 1; k++) {
					volume->super_array[k]= volume->super_array[k+1];
					volume->child_object[k] = volume->child_object[k+1];
					volume->super_block->disks[k]= volume->super_block->disks[k+1];
					volume->super_block->disks[k].number = k;
					volume->super_block->disks[k].raid_disk = k;
					conf->disks[k] = conf->disks[k+1];
					conf->disks[k].number = k;
					conf->disks[k].raid_disk = k;

					/* Reset the spare entry if it got moved. */
					if (conf->spare.raid_disk == k+1) {
						conf->spare = conf->disks[k];
					}
				}

				/* Zero out now empty disk entry. */
				memset(&volume->super_block->disks[k], 0, sizeof(mdp_disk_t));

				conf->disks[k].dev = NULL;
				conf->disks[k].operational = 0;
				conf->disks[k].used_slot = 0;
				conf->failed_disks--;

				/* Update disk counts. */
				volume->super_block->nr_disks--;
				volume->super_block->failed_disks--;
			}

			volume->nr_disks--;

			md_rediscover_volumes_for_region(volume->region);
		}
	}

	if (!found) {
		/* Didn't find a disk with that name in the array. */
		LOG_ERROR("Disk %s was not found in array %s.\n", faulty_disk, volume->name);
		rc = EINVAL;
	}

	RETURN(rc);
}


static int mark_faulty(md_volume_t * volume, char * active_disk) {

	int            rc = 0;
	int            i;
	BOOLEAN        found;
	raid5_conf_t * conf = mdvol_to_conf(volume);

	LOG_ENTRY;

	/*
	 * Make sure there is a spare disk available before we remove an active
	 * disk.
	 */
	if (volume->super_block->spare_disks == 0) {
		RETURN(EINVAL);
	}

	for (i = 0, found = FALSE; !found && (i < MAX_MD_DEVICES); i++ ) {
		if (volume->child_object[i] == NULL) {
			continue;
		}

		if (!strcmp(active_disk, volume->child_object[i]->name)) {
			found = TRUE;

			/* Found the disk to remove.  Make sure it is active.*/
			if (volume->super_block->disks[i].state != ((1 << MD_DISK_ACTIVE) | (1 << MD_DISK_SYNC))) {
				RETURN(EINVAL);
			}

			/*
			 * Mark the disk entry faulty.  That will cause the
			 * MD recovery to swap it out with a spare.
			 */
			volume->super_block->disks[i].state = (1 << MD_DISK_FAULTY);

			conf->disks[volume->super_block->disks[i].raid_disk].operational = 0;

			/* Update disk counts. */
			volume->super_block->active_disks--;
			volume->super_block->working_disks--;
			volume->super_block->failed_disks++;

			/* Must rediscover to kick of the recovery. */
			md_rediscover_volumes_for_region(volume->region);
		}
	}

	if (!found) {
		/* Didn't find a disk with that name in the array. */
		LOG_ERROR("Disk %s was not found in array %s.\n", active_disk, volume->name);
		rc = EINVAL;
	}

	RETURN(rc);
}


static int raid5_set_info(storage_object_t * region,
			  option_array_t   * options) {

	int    rc = 0;
	char * add_spare_disk     = NULL;
	char * remove_spare_disk  = NULL;
	char * remove_faulty_disk = NULL;
	char * mark_faulty_disk   = NULL;
	md_volume_t * volume = (md_volume_t *) region->private_data;

	LOG_ENTRY;

	if (raid5_verify_and_fix_array(volume, 0, 0)) {
		int    answer = 0;
		char * choice_text[3] = { "Don't Fix", "Fix", NULL};
		EngFncs->user_message(my_plugin, &answer,choice_text,
				      "Errors have been detected on MD region %s.  "
				      "Check the message display for messages detailing the errors requiring fixing.  "
				      "If you elect not to fix the region at this time, you may not modify the region.\n",
				      volume->name);
		if (answer) {
			raid5_verify_and_fix_array(volume, 1, 0);
		} else {
			RETURN(EINVAL);
		}
	}

	get_set_info_options(options, &add_spare_disk, &remove_spare_disk,
			     &remove_faulty_disk, &mark_faulty_disk);

	/*
	 * Order is important here.  An old spare should be removed before a
	 * new one is added so that the new one ends up in the old one's slot.
	 * A spare should be added before an active disk is removed so that
	 * mark_faulty() won't fail when it check to make sure that a spare
	 * is available.
	 */
	if (remove_faulty_disk != NULL) {
		rc = remove_faulty(volume, remove_faulty_disk);
	}

	if (remove_spare_disk != NULL) {
		rc = remove_spare(volume, remove_spare_disk);
	}

	if (add_spare_disk != NULL) {
		rc = add_spare(volume, add_spare_disk);
	}

	if (mark_faulty_disk != NULL) {
		rc = mark_faulty(volume, mark_faulty_disk);
	}

	if (rc == 0) {
		EngFncs->set_changes_pending();
		region->flags |= SOFLAG_DIRTY;
	}

	RETURN(rc);
}


/* Function: raid5_get_plugin_info
 *
 *  Return information about the MD plugin. There is no "extra"
 *  information about MD, so "name" should always be NULL.
 */
static int raid5_get_plugin_info( char                     * name,
				  extended_info_array_t   ** info_array ) {

	extended_info_array_t   * info = NULL;
	char buffer[50] = {0};
	int i = 0;
	int rc = 0;

	LOG_ENTRY;

	/* Parameter check */
	if (!info_array) {
		RETURN(EFAULT);
	}

	if (!name) {
		/* Get memory for the info array. */
		if (!(info = EngFncs->engine_alloc(sizeof(extended_info_array_t) + sizeof(extended_info_t)*4))) {
			LOG_ERROR("Error allocating memory for info array\n");
			RETURN(ENOMEM);
		}

		SET_STRING(info->info[i].name, "ShortName");
		SET_STRING(info->info[i].title, "Short Name");
		SET_STRING(info->info[i].desc, "A short name given to this plugin");
		info->info[i].type = EVMS_Type_String;
		SET_STRING(info->info[i].value.s, raid5_plugin->short_name);
		i++;

		SET_STRING(info->info[i].name, "LongName");
		SET_STRING(info->info[i].title, "Long Name");
		SET_STRING(info->info[i].desc, "A long name given to this plugin");
		info->info[i].type = EVMS_Type_String;
		SET_STRING(info->info[i].value.s, raid5_plugin->long_name);
		i++;

		SET_STRING(info->info[i].name, "Type");
		SET_STRING(info->info[i].title, "Plugin Type");
		SET_STRING(info->info[i].desc, "There are various types of plugins; each responsible for some kind of storage object.");
		info->info[i].type = EVMS_Type_String;
		SET_STRING(info->info[i].value.s, "Region Manager");
		i++;

		SET_STRING(info->info[i].name, "Version");
		SET_STRING(info->info[i].title, "Plugin Version");
		SET_STRING(info->info[i].desc, "This is the version number of the plugin.");
		info->info[i].type = EVMS_Type_String;
		snprintf(buffer, 50, "%d.%d.%d", MAJOR_VERSION, MINOR_VERSION, PATCH_LEVEL);
		SET_STRING(info->info[i].value.s, buffer);
		i++;

		SET_STRING(info->info[i].name, "Required_Version");
		SET_STRING(info->info[i].title, "Required Plugin API Version");
		SET_STRING(info->info[i].desc, "This is the version of the engine that the plugin requires.It will not run on older versions of the Engine.");
		info->info[i].type = EVMS_Type_String;
		snprintf(buffer, 50, "%d.%d.%d", raid5_plugin->required_api_version.major, raid5_plugin->required_api_version.minor, raid5_plugin->required_api_version.patchlevel);
		SET_STRING(info->info[i].value.s, buffer);
		i++;

	} else {
		LOG_ERROR("No support for extra plugin information about \"%s\"\n", name);
		RETURN(EINVAL);
	}

	info->count = i;
	*info_array = info;
	RETURN(0);
}


static int reconstruct_chunk(raid5_conf_t * conf, stripe_t * stripe, unsigned int dev_index) {

	int rc = 0;
	int i;
	int count;
	xorblock_t xorblock;

	LOG_ENTRY;

	xorblock.buf[0] = stripe->chunk[dev_index].data;

	for (i = 0, count = 1; i < conf->raid_disks; i++) {
		if (i == dev_index) {
			continue;
		}

		xorblock.buf[count] = stripe->chunk[i].data;

		count++;
		if (count == MAX_XOR_BLOCKS) {
			xor_block(count, &xorblock, conf->chunk_size);

			count = 1;
		}
	}

	if (count > 1) {
		xor_block(count, &xorblock, conf->chunk_size);
	}

	RETURN(rc);
}


static int free_stripe(stripe_t * stripe) {

	int rc = 0;
	int i;

	LOG_ENTRY;

	if (stripe->data_size != 0) {

		/* Free up all the buffers in the stripe. */
		for (i = 0; i < MD_SB_DISKS; i++) {
			if (stripe->chunk[i].data != NULL) {
				free(stripe->chunk[i].data);
			}
		}

		memset(stripe, 0, sizeof(stripe_t));
	}

	RETURN(rc);
}


static int get_stripe(md_volume_t * volume, lsn_t lsn, stripe_t * stripe) {

	int rc = 0;
	int i;
	raid5_conf_t * conf = mdvol_to_conf(volume);
	lsn_t curr_lsn;
	sector_count_t data_stripe_size = (conf->chunk_size >> EVMS_VSECTOR_SIZE_SHIFT) * (conf->raid_disks - 1);

	LOG_ENTRY;

	memset(stripe, 0, sizeof(stripe_t));

	/* Set the volume to which the stripe belongs. */
	stripe->volume = volume;

	/* Calculate stripe number and starting sector. */
	stripe->number = lsn / data_stripe_size;
	stripe->start_lsn = stripe->number * data_stripe_size;
	stripe->data_size = data_stripe_size;

	/* Read in all the chunks for the stripe. */
	for (i = 0, curr_lsn = lsn; (rc == 0) && (i < conf->raid_disks); i++) {
		stripe->chunk[i].data = calloc(1, conf->chunk_size);
		if (stripe->chunk[i].data != NULL) {
			/* If the disk is operational, fill in the child device. */
			if (conf->disks[i].operational) {
				stripe->chunk[i].dev = conf->disks[i].dev;
			} else {
				stripe->chunk[i].dev = NULL;
			}
			stripe->chunk[i].lsn_on_dev = stripe->number * (conf->chunk_size >> EVMS_VSECTOR_SIZE_SHIFT);

		} else {
			/* Memory allocation failure */
			rc = ENOMEM;
		}
	}

	if (rc == 0) {
		for (i = 0; (rc == 0) && (i < conf->raid_disks); i++) {
			if (stripe->chunk[i].dev != NULL) {
				LOG_DEBUG("Reading %d sectors from %s at sector offset %lld.\n", conf->chunk_size >> EVMS_VSECTOR_SIZE_SHIFT, stripe->chunk[i].dev->name, stripe->chunk[i].lsn_on_dev);
				rc = READ(stripe->chunk[i].dev,stripe->chunk[i].lsn_on_dev,conf->chunk_size >> EVMS_VSECTOR_SIZE_SHIFT,stripe->chunk[i].data);
			}
		}

		if (rc == 0) {
			if (conf->failed_raid_disks != 0) {
				LOG_DEBUG("Reconstructing data for failed disk %d\n", conf->failed_disk_index);
				reconstruct_chunk(conf, stripe, conf->failed_disk_index);
			}
		}
	}

	if (rc != 0) {
		/* Something went wrong.  Clean up the stripe. */
		free_stripe(stripe);
	}

	RETURN(rc);
}


typedef enum {
	STRIPE_IO_READ,
	STRIPE_IO_WRITE
} stripe_io_cmd_t;


static int stripe_io(stripe_io_cmd_t  cmd,
		     md_volume_t    * volume,
		     stripe_t       * stripe,
		     lsn_t            lsn,
		     sector_count_t   sectors,
		     unsigned char  * buffer,
		     sector_count_t * sectors_read) {

	lsn_t dev_offset;
	lsn_t sector_offset_in_chunk;
	unsigned int byte_offset_in_chunk;
	unsigned int chunk_index;
	unsigned int parity_index;
	sector_count_t stripe_end_lsn = stripe->start_lsn + stripe->data_size - 1;
	raid5_conf_t * conf = mdvol_to_conf(volume);
	sector_count_t chunk_size_in_sectors = conf->chunk_size >> EVMS_VSECTOR_SIZE_SHIFT;
	sector_count_t sectors_to_copy;
	unsigned int bytes_to_copy;

	LOG_ENTRY;

	if (cmd > STRIPE_IO_WRITE) {
		RETURN(EINVAL);
	}

	if (sectors == 0) {
		/* Nothing to read.  We're finished. */
		RETURN(0);
	}

	if ((lsn < stripe->start_lsn) ||
	    (lsn > stripe_end_lsn)) {
		RETURN(EINVAL);
	}

	*sectors_read = 0;

	do {
		dev_offset = raid5_compute_sector(lsn,
						  conf->raid_disks, conf->raid_disks - 1,
						  &chunk_index, &parity_index,
						  conf);
		sector_offset_in_chunk = dev_offset & (chunk_size_in_sectors - 1);
		byte_offset_in_chunk = (unsigned int) (sector_offset_in_chunk << EVMS_VSECTOR_SIZE_SHIFT);

		sectors_to_copy = min(sectors, chunk_size_in_sectors - (sector_offset_in_chunk & (chunk_size_in_sectors - 1)));
		bytes_to_copy = (unsigned int) (sectors_to_copy << EVMS_VSECTOR_SIZE_SHIFT);

		if (cmd == STRIPE_IO_READ) {
			LOG_DEBUG("Reading %d bytes from stripe %ld, chunk %d, offset %d.\n", bytes_to_copy, stripe->number, chunk_index, byte_offset_in_chunk);
			LOG_DEBUG("AKA: Reading %lld sectors from (%s) at sector offset %lld.\n", sectors_to_copy, (conf->disks[chunk_index].dev != NULL) ? conf->disks[chunk_index].dev->name : "nul", dev_offset);
			memcpy(buffer,
			       stripe->chunk[chunk_index].data + byte_offset_in_chunk,
			       bytes_to_copy);

		} else if (cmd == STRIPE_IO_WRITE) {
			LOG_DEBUG("Writing %d bytes to stripe %ld, chunk %d, offset %d.\n", bytes_to_copy, stripe->number, chunk_index, byte_offset_in_chunk);
			LOG_DEBUG("AKA: Writing %lld sectors from (%s) at sector offset %lld.\n", sectors_to_copy, (conf->disks[chunk_index].dev != NULL) ? conf->disks[chunk_index].dev->name : "nul", dev_offset);
			memcpy(stripe->chunk[chunk_index].data + byte_offset_in_chunk,
			       buffer,
			       bytes_to_copy);
		}

		sectors -= sectors_to_copy;
		*sectors_read += sectors_to_copy;
		lsn += sectors_to_copy;
		buffer += bytes_to_copy;



	} while ((sectors != 0) && (lsn <= stripe_end_lsn));

	RETURN(0);
}


/* Function: raid5_read
 *
 *  Perform a logical-to-physical remapping, and send the read down to
 *  the next plugin.
 */
static int raid5_read( storage_object_t * region,
		       lsn_t              lsn,
		       sector_count_t     count,
		       void             * buffer ) {

	int                rc = 0;
	md_volume_t *      volume = (md_volume_t *)region->private_data;
	stripe_t           stripe = {0};
	unsigned long      buffer_offset;
	storage_object_t * child_object;
	lsn_t              child_lsn;
	sector_count_t     child_count;

	LOG_ENTRY;

	LOG_DEBUG("Request to read %lld sectors from %s at sector offset %lld.\n", count, region->name, lsn);

	/* Parameter check */
	if (!buffer) {
		RETURN(EFAULT);
	}

	if (volume->flags & MD_CORRUPT) {
		memset(buffer, 0x0, count * EVMS_VSECTOR_SIZE);
		LOG_ERROR("MD Object %s is corrupt, data is suspect \n ",volume->name);
		RETURN(0);
	}

	if ((lsn + count) > region->size) {
		LOG_ERROR("Attempt to read past end of region %s sector=%d\n ",volume->name,lsn+count);
		RETURN(EINVAL);
	}

	buffer_offset = 0;
	if (!(volume->flags & MD_DEGRADED)) {
		while ((count != 0) && (rc == 0)) {
			rc = get_child_run(volume, lsn, count,
					   &child_object, &child_lsn, &child_count);
			if (rc == 0) {
				LOG_DEBUG("Reading %lld sectors from %s at sector offset %lld.\n", child_count, child_object->name, child_lsn);
				rc = READ(child_object, child_lsn, child_count, buffer + buffer_offset);
				count -= child_count;
				lsn += child_count;
				buffer_offset += child_count << EVMS_VSECTOR_SIZE_SHIFT;
			}
		}

	} else {
		while ((count != 0) && (rc == 0)) {
			if ((lsn >= stripe.start_lsn) &&
			    (lsn < stripe.start_lsn + stripe.data_size)) {
				rc = stripe_io(STRIPE_IO_READ,
					       volume, &stripe,
					       lsn, count,
					       buffer + buffer_offset, &child_count);
				count -= child_count;
				lsn += child_count;
				buffer_offset += child_count << EVMS_VSECTOR_SIZE_SHIFT;

			} else {
				free_stripe(&stripe);

				/* Read a new stripe and try again. */
				rc = get_stripe(volume, lsn, &stripe);
			}
		}
	}

	if (stripe.data_size != 0) {
		free_stripe(&stripe);
	}

	RETURN(rc);
}


static int write_stripe(md_volume_t * volume, stripe_t * stripe) {

	int rc = 0;
	int i;
	unsigned int data_index;
	unsigned int parity_index;
	raid5_conf_t * conf = mdvol_to_conf(volume);

	LOG_ENTRY;

	/* Find which one is the parity disk. */
	raid5_compute_sector(stripe->start_lsn,
			     conf->raid_disks, conf->raid_disks - 1,
			     &data_index, &parity_index,
			     conf);

	/* Recalcluate the parity. */
	LOG_DEBUG("Reconstructing parity on disk %d.\n", parity_index);
	memset(stripe->chunk[parity_index].data, 0, conf->chunk_size);
	reconstruct_chunk(conf, stripe, parity_index);

	/* Write the stripe to the disks. */
	for (i = 0; (rc == 0) && (i < conf->raid_disks); i++) {
		chunk_t * chunk = &stripe->chunk[i];

		/*
		 * One of the devices may be missing or faulty. If so,
		 * its dev field won't be filled in in the chunk
		 * structure.  Skip it since there is no device to
		 * write to.
		 */
		if (chunk->dev != NULL) {
			LOG_DEBUG("Writing %d sectors to %s at sector offset %lld.\n", conf->chunk_size >> EVMS_VSECTOR_SIZE_SHIFT, chunk->dev->name, chunk->lsn_on_dev);
			rc = WRITE(chunk->dev, chunk->lsn_on_dev, conf->chunk_size >> EVMS_VSECTOR_SIZE_SHIFT, chunk->data);
		}
	}

	RETURN(rc);
}


/* Function: raid5_write
 *
 *  Perform a logical-to-physical remapping, and send the write down to
 *  the next plugin.
 */
static int raid5_write( storage_object_t * region,
			lsn_t              lsn,
			sector_count_t     count,
			void             * buffer ) {

	int                     rc = 0;
	md_volume_t             * volume = (md_volume_t *)region->private_data;
	stripe_t                stripe = {0};
	sector_count_t          sectors_written;
	evms_plugin_ioctl_t     plugin_ioctl = {0};
	evms_md_ioctl_t         md_ioctl = {0};
	raid5_ioctl_init_io_t   init_io_ioctl = {0};

	LOG_ENTRY;

	LOG_DEBUG("Request to write %lld sectors to %s at sector offset %lld.\n", count, region->name, lsn);

	/* Parameter check */
	if (!buffer) {
		RETURN(EFAULT);
	}

	if (volume->flags & MD_CORRUPT) {
		LOG_ERROR("MD Object %s is corrupt, writing data is not allowed\n ",volume->name);
		RETURN(EIO);
	}
	if ((lsn + count) > region->size) {
		LOG_ERROR("Attempt to write past end of region %s sector=%d\n ",volume->name,lsn+count);
		RETURN(EINVAL);
	}

	if (kill_sector_list_head != NULL) {
		kill_sectors();
	}

	/* Use the MD init io ioctl to write the data.  If the ioctl fails, do it manually. */
	plugin_ioctl.feature_id = MD_SET_PLUGIN_ID,
	plugin_ioctl.feature_command = EVMS_MD_PERS_IOCTL_CMD,
	plugin_ioctl.feature_ioctl_data = &md_ioctl;

	md_ioctl.mddev_idx = volume->super_block->md_minor;
	md_ioctl.cmd = EVMS_MD_RAID5_INIT_IO;
	md_ioctl.arg = &init_io_ioctl;

	init_io_ioctl.rw = WRITE;
	init_io_ioctl.lsn = lsn;
	init_io_ioctl.nr_sects = count;
	init_io_ioctl.data = buffer;

	rc = EngFncs->ioctl_evms_kernel(EVMS_PLUGIN_IOCTL, &plugin_ioctl);

	/* If the ioctl failed we can try do to the write by hand. */
	if (rc != 0) {
		/*
		 *Get a real error code other than the -1 which ioctl()
		 * returns.
		 */
		if (plugin_ioctl.status != 0) {
			rc = plugin_ioctl.status;
		} else {
			rc = errno;
		}

		/* Check for acceptable error codes. */
		if ((abs(rc) == ENOPKG) ||		/* MD kernel module is not loaded */
		    (abs(rc) == ENODEV)) {  /* MD region does not exist */
			rc = 0;
		}

		while ((count != 0) && (rc == 0)) {
			if ((lsn >= stripe.start_lsn) &&
			    (lsn < stripe.start_lsn + stripe.data_size)) {
				rc = stripe_io(STRIPE_IO_WRITE,
					       volume, &stripe,
					       lsn, count,
					       buffer, &sectors_written);
				count -= sectors_written;
				lsn += sectors_written;
				buffer += sectors_written << EVMS_VSECTOR_SIZE_SHIFT;

			} else {
				if (stripe.data_size != 0) {
					write_stripe(volume, &stripe);
					free_stripe(&stripe);
				}

				/* Read a new stripe and try again. */
				rc = get_stripe(volume, lsn, &stripe);
			}
		}

		if (stripe.data_size != 0) {
			write_stripe(volume, &stripe);
			free_stripe(&stripe);
		}
	}

	RETURN(rc);
}


/* Function: raid5_direct_plugin_communication
 */
static int raid5_direct_plugin_communication( void    * thing,
					      boolean   target_kernel_plugin,
					      void    * arg ) {
	LOG_ENTRY;
	LOG("Not yet implemented\n");
	RETURN(ENOSYS);
}


static int free_region (ADDRESS object,
			TAG     object_tag,
			uint    object_size,
			ADDRESS object_handle,
			ADDRESS parameters) {

	int i;
	int nr_disks = 0;
	storage_object_t * region = (storage_object_t *) object;
	md_volume_t * volume = (md_volume_t *)region->private_data;
	raid5_conf_t * conf = mdvol_to_conf(volume);

	md_deallocate_memory(conf);

	for (i = 0; (i < MAX_MD_DEVICES) && (nr_disks < volume->nr_disks); i++) {
		if (volume->super_array[i]) {
			nr_disks++;
			md_deallocate_memory(volume->super_array[i]);
		}
	}
	md_deallocate_memory(volume->super_block);
	if (volume->added_disks != NULL) {
		DestroyList(&volume->added_disks, TRUE);
	}
	if (volume->removed_disks != NULL) {
		DestroyList(&volume->removed_disks, TRUE);
	}
	if (volume->activated_disks != NULL) {
		DestroyList(&volume->activated_disks, TRUE);
	}
	if (volume->deactivated_disks != NULL) {
		DestroyList(&volume->deactivated_disks, TRUE);
	}
	md_remove_volume_from_list(volume);
	md_deallocate_memory(volume);

	RETURN(DLIST_SUCCESS);
}


void raid5_plugin_cleanup(void) {

	int rc;
	dlist_t raid5_regions_list = NULL;

	LOG_ENTRY;

	rc = EngFncs->get_object_list(REGION, DATA_TYPE, MY_PLUGIN, 0, &raid5_regions_list);

	if (rc == 0) {
		ForEachItem(raid5_regions_list, free_region, NULL, TRUE);

		DestroyList(&raid5_regions_list, FALSE);
	}

	LOG_EXIT(0);
	return;
}



/* Function tables for the MD Region Manager */
static plugin_functions_t raid5_functions = {
	cleanup_evms_plugin     : raid5_plugin_cleanup,
	setup_evms_plugin       : raid5_setup_evms_plugin,
	can_delete              : raid5_can_delete,
	can_expand              : raid5_can_expand,
	can_expand_by           : raid5_can_expand_by,
	can_shrink              : raid5_can_shrink,
	can_shrink_by           : raid5_can_shrink_by,
	can_move                : raid5_can_move,
	can_set_volume          : raid5_can_set_volume,
	discover                : raid5_discover,
	create                  : raid5_create,
	delete                  : raid5_delete,
	expand                  : raid5_expand,
	shrink                  : raid5_shrink,
	move                    : raid5_move,
	set_volume              : raid5_set_volume,
	add_sectors_to_kill_list: raid5_add_sectors_to_kill_list,
	commit_changes          : raid5_commit_changes,
	get_option_count        : raid5_get_option_count,
	init_task               : raid5_init_task,
	set_option              : raid5_set_option,
	set_objects             : raid5_set_objects,
	get_info                : raid5_get_info,
	set_info                : raid5_set_info,
	get_plugin_info         : raid5_get_plugin_info,
	read                    : raid5_read,
	write                   : raid5_write,
	direct_plugin_communication : raid5_direct_plugin_communication
};



/* Function: PluginInit
 *
 *  Initializes the local plugin record
 */

plugin_record_t raid5_plugin_record = {
	id:                     SetPluginID(EVMS_OEM_IBM, EVMS_REGION_MANAGER, 7),

	version:                {major:         MAJOR_VERSION,
				 minor:         MINOR_VERSION,
				 patchlevel:    PATCH_LEVEL},

	required_api_version:   {major:         3,
				 minor:         0,
				 patchlevel:    0},

	short_name:             "MDRaid5RegMgr",
	long_name:              "MD RAID 4/5 Region Manager",
	oem_name:               "IBM",
	functions:              {plugin:        &raid5_functions},
	container_functions:    NULL
};




