/*
 *  RapidMiner
 *
 *  Copyright (C) 2001-2009 by Rapid-I and the contributors
 *
 *  Complete list of developers available at our web site:
 *
 *       http://rapid-i.com
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Affero General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package com.rapidminer.operator.features.selection;

import java.util.LinkedList;
import java.util.List;

import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.features.CoarsePopulationEvaluator;
import com.rapidminer.operator.features.FeatureOperator;
import com.rapidminer.operator.features.Individual;
import com.rapidminer.operator.features.KeepBest;
import com.rapidminer.operator.features.Population;
import com.rapidminer.operator.features.PopulationEvaluator;
import com.rapidminer.operator.features.PopulationOperator;
import com.rapidminer.operator.features.RedundanceRemoval;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.UndefinedParameterError;

/*
 * A faster version of the {@link FeatureSelectionOperator} designed specifically
 * for HHH. Uses the {@link CoarsePopulationEvaluator} which assigns a performance
 * vector to each individual in the population. However, unlike the usual 
 * {@link SimplePopulationEvaluator} this evaluator evaluates an individual only
 * if it is sufficiently dissimilar to the best individual of the last 
 * generation ("parent"), otherwise, it is assigned
 * the performance vector of this parent, saving time. Thus, system calls that
 * are extremely rare do not cause expensive new evaluations. This is
 * justified because they are very unlikely to produce a different
 * set of HHH and in effect a different performance vector.
 *
 * Simple hillclimbing: To be used for forward selection and keeping the 
 * single best individual in each generation.
 * 
 * @author Peter Fricke, Simon Fischer, Ingo Mierswa
 *          ingomierswa Exp $
 */
public class CoarseFeatureSelectionOperator extends FeatureOperator {

	/** The parameter name for &quot;Stop after n generations without improval of the performance (-1: stops if the maximum_number_of_generations is reached).&quot; */
	public static final String PARAMETER_GENERATIONS_WITHOUT_IMPROVAL = "generations_without_improval";

	/** The parameter name for &quot;Delivers the maximum amount of generations (-1: might use or deselect all features).&quot; */
	public static final String PARAMETER_MAXIMUM_NUMBER_OF_GENERATIONS = "maximum_number_of_generations";
	
	/** The parameter name for &quot;Minimum dissimilarity to previous best for an individual in order to be evaluated.&quot;  */
	public static final String PARAMETER_THRESHOLD = "evaluation_threshold";
	
	public static final int FORWARD_SELECTION = 0;

	private int generationsWOImp;

	private int maxGenerations;
	
	private double threshold;

	
	public CoarseFeatureSelectionOperator(OperatorDescription description) {
		super(description);
	}

	public IOObject[] apply() throws OperatorException {
		this.maxGenerations = getParameterAsInt(PARAMETER_MAXIMUM_NUMBER_OF_GENERATIONS);
		this.generationsWOImp = getParameterAsInt(PARAMETER_GENERATIONS_WITHOUT_IMPROVAL);
		this.threshold = getParameterAsDouble(PARAMETER_THRESHOLD);
		return super.apply();
	}

	int getDefaultDirection() {
		return FORWARD_SELECTION;
	}

	/**
	 * May <tt>es</tt> have <i>n</i> features. The initial population
	 * contains <i>n</i> elements with exactly 1 feature switched on.
	 */
	public Population createInitialPopulation(ExampleSet es) throws UndefinedParameterError {		
		Population initP = new Population();
		for (int a = 0; a < es.getAttributes().size(); a++) {
			double[] weights = new double[es.getAttributes().size()];
			weights[a] = 1.0d;
			initP.add(new Individual(weights));
		}
		return initP;
	}

	/**
	 * The operators performs two steps:
	 * <ol>
	 * <li>forward selection
	 * <li>kick out all but the <tt>keep_best</tt> individual
	 * <li>remove redundant individuals
	 * </ol>
	 */
	public List<PopulationOperator> getPreEvaluationPopulationOperators(ExampleSet input) throws OperatorException {		
		int keepBest = 1; //
		List<PopulationOperator> preOp = new LinkedList<PopulationOperator>();
		preOp.add(new KeepBest(keepBest));
		preOp.add(new ForwardSelection()); 
		if (this.maxGenerations <= 0)
			this.maxGenerations = input.getAttributes().size() - 1;
		else
			this.maxGenerations--; // ensures the correct number of
									// features
		preOp.add(new RedundanceRemoval());
		return preOp;
	}

	/** empty list */
	public List<PopulationOperator> getPostEvaluationPopulationOperators(ExampleSet input) throws OperatorException {
		return new LinkedList<PopulationOperator>();
	}

	/**
	 * Returns true if the best individual is not better than the last
	 * generation's best individual.
	 */
	public boolean solutionGoodEnough(Population pop) throws OperatorException {
		return pop.empty() || ((generationsWOImp > 0) && (pop.getGenerationsWithoutImproval() >= generationsWOImp)) || (pop.getGeneration() >= maxGenerations);
	}
	
	
	// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
	protected PopulationEvaluator getPopulationEvaluator(ExampleSet exampleSet) throws UndefinedParameterError {
		return new CoarsePopulationEvaluator(this, getInput(), exampleSet, threshold);
	}
	//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

	public List<ParameterType> getParameterTypes() {
		List<ParameterType> types = super.getParameterTypes();
		types.add(new ParameterTypeInt(PARAMETER_GENERATIONS_WITHOUT_IMPROVAL, "Stop after n generations without improval of the performance (-1: stops if the maximum_number_of_generations is reached).", -1, Integer.MAX_VALUE, 1));
		types.add(new ParameterTypeInt(PARAMETER_MAXIMUM_NUMBER_OF_GENERATIONS, "Delivers the maximum amount of generations (-1: might use or deselect all features).", -1, Integer.MAX_VALUE, -1));
		types.add(new ParameterTypeDouble(PARAMETER_THRESHOLD, "Minimum dissimilarity to previous best for an individual in order to be evaluated.", 0, 1, 0.001));		
		return types;
	}
}
