/*
 *  RapidMiner
 *
 *  Copyright (C) 2001-2010 by Rapid-I and the contributors
 *
 *  Complete list of developers available at our web site:
 *
 *       http://rapid-i.com
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Affero General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package com.rapidminer.operator.learner.meta;

import java.util.Iterator;
import java.util.List;
import java.util.Vector;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.SplittedExampleSet;
import com.rapidminer.operator.IOObjectCollection;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ValueDouble;
import com.rapidminer.operator.learner.PredictionModel;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.tools.RandomGenerator;
import com.rapidminer.tools.Tools;


/**
 * <p>This operator trains an ensemble of classifiers for boolean target
 * attributes. In each iteration the training set is reweighted, so that
 * previously discovered patterns and other kinds of prior knowledge are
 * &quot;sampled out&quot; {@rapidminer.cite Scholz/2005b}. An inner classifier,
 * typically a rule or decision tree induction algorithm, is sequentially
 * applied several times, and the models are combined to a single global model.
 * The number of models to be trained maximally are specified by the parameter
 * <code>iterations</code>.</p>
 * 
 * <p>If the parameter <code>rescale_label_priors</code> is set, then the example
 * set is reweighted, so that all classes are equally probable (or frequent).
 * For two-class problems this turns the problem of fitting models to maximize
 * weighted relative accuracy into the more common task of classifier induction
 * {@rapidminer.cite Scholz/2005a}. Applying a rule induction algorithm as an inner
 * learner allows to do subgroup discovery. This option is also recommended for
 * data sets with class skew, if a &quot;very weak learner&quot; like a decision
 * stump is used. If <code>rescale_label_priors</code> is not set, then the
 * operator performs boosting based on probability estimates.</p>
 * 
 * <p>The estimates used by this operator may either be computed using the same set
 * as for training, or in each iteration the training set may be split randomly,
 * so that a model is fitted based on the first subset, and the probabilities
 * are estimated based on the second. The first solution may be advantageous in
 * situations where data is rare. Set the parameter
 * <code>ratio_internal_bootstrap</code> to 1 to use the same set for training
 * as for estimation. Set this parameter to a value of lower than 1 to use the
 * specified subset of data for training, and the remaining examples for
 * probability estimation.</p>
 * 
 * <p>If the parameter <code>allow_marginal_skews</code> is <em>not</em> set,
 * then the support of each subset defined in terms of common base model
 * predictions does not change from one iteration to the next. Analogously the
 * class priors do not change. This is the procedure originally described in
 * {@rapidminer.cite Scholz/2005b} in the context of subgroup discovery.</p>
 * 
 * <p>Setting the <code>allow_marginal_skews</code> option to <code>true</code>
 * leads to a procedure that changes the marginal weights/probabilities of
 * subsets, if this is beneficial in a boosting context, and stratifies the two
 * classes to be equally likely. As for AdaBoost, the total weight upper-bounds
 * the training error in this case. This bound is reduced more quickly by the
 * BayesianBoosting operator, however.</p>
 * 
 * <p>In sum, to reproduce the sequential sampling, or knowledge-based sampling, 
 * from {@rapidminer.cite Scholz/2005b} for subgroup discovery, two of the 
 * default parameter settings of this operator have to be changed: 
 * <code>rescale_label_priors</code> must 
 * be set to <code>true</code>, and <code>allow_marginal_skews</code> must 
 * be set to <code>false</code>. In addition, a boolean (binomial) label 
 * has to be used.</p>
 *  
 * <p>The operator requires an example set as its input. To sample out prior
 * knowledge of a different form it is possible to provide another model as an
 * optional additional input. The predictions of this model are used to weight
 * produce an initial weighting of the training set. The ouput of the operator
 * is a classification model applicable for estimating conditional class
 * probabilities or for plain crisp classification. It contains up to the
 * specified number of inner base models. In the case of an optional initial
 * model, this model will also be stored in the output model, in order to
 * produce the same initial weighting during model application.</p>
 * 
 * @author Martin Scholz, Marius Helf
 */
public class BayesianBoostingSql extends AbstractBayesianBoosting {

	/**
	 * Name of the variable specifying the maximal number of iterations of the
	 * learner.
	 */
	public static final String PARAMETER_ITERATIONS = "iterations";




	/**
	 * Integer parameter that defines how often a new sample is drawn.
	 */
	public static final String PARAMETER_NEW_SAMPLE_INTERVAL = "new_sample_interval";

	/**
	 * Each "model_output_interval" iterations the current model is output to a collection.
	 * This allows to examine the performance of different iteration counts with only one
	 * run. 
	 */
	public static final String PARAMETER_MODEL_OUTPUT_INTERVAL = "model_output_interval";
	
	
	/** Discard models with an advantage of less than the specified value. */
	public static final double MIN_ADVANTAGE = 0.001;
	
	protected OutputPort modelCollectionOutput;

	/** Constructor. */
	public BayesianBoostingSql(OperatorDescription description) {
		super(description);

		addValue(new ValueDouble("performance", "The performance.") {
			@Override
			public double getDoubleValue() {
				return performance;
			}
		});
		
		modelCollectionOutput = getOutputPorts().createPort( "model_collection_output" );
	}
	

	

	/**
	 * Constructs a <code>Model</code> repeatedly running a weak learner,
	 * reweighting the training example set accordingly, and combining the
	 * hypothesis using the available weighted performance values. If the input
	 * contains a model, then this model is used as a starting point for
	 * weighting the examples.
	 */
	public Model learn(ExampleSet exampleSet) throws OperatorException {
		// Read start model if present.
		this.readOptionalParameters();


		double[] classPriors = this.prepareWeights(exampleSet);

		// check whether only one or no class is present		
		double maxPrior  = Double.NEGATIVE_INFINITY;
		double sumPriors = 0;
		for (int i=0; i<classPriors.length; i++) {
			if (classPriors[i] > maxPrior)
				maxPrior = classPriors[i];
			sumPriors += classPriors[i];
		}
		
		// the resulting model of this operator
		Model model;
		if (Tools.isEqual(sumPriors, maxPrior)) {	
			// we have only one class in our example set
			// => nothing to do, return an empty ensemble model
			model = new BayBoostModel(exampleSet, new Vector<BayBoostBaseModelInfo>(), classPriors);
		}
		else {
			// only in this case boosting makes sense
			model = this.trainBoostingModel(exampleSet, classPriors);
		}

		
		// restore original weights
		// (or delete weight attribute, if originally none was present):
		if (this.oldWeights != null) { // need to reset weights
			Iterator<Example> reader = exampleSet.iterator();
			int i = 0;
			while (reader.hasNext() && i < this.oldWeights.length) {
				reader.next().setWeight(this.oldWeights[i++]);
			}
		} else { // need to delete the weights attribute
			Attribute weight = exampleSet.getAttributes().getWeight();
			exampleSet.getAttributes().remove(weight);
			exampleSet.getExampleTable().removeAttribute(weight);
		}

		return model;
	}





	/** Main method for training the ensemble classifier */
	@Override
	protected BayBoostModel trainBoostingModel(ExampleSet trainingSet, final double[] classPriors) throws OperatorException {
		fuzzyReweighting = getParameterAsBoolean(PARAMETER_FUZZY_EXAMPLE_REWEIGHTING);
		
		try {
			if ( getParameterAsBoolean(PARAMETER_FUZZY_PARTITION_SIZES) ) {
				pmClass = FuzzyWeightedPerformanceMeasures.class;
			}
			else {
				pmClass = StandardWeightedPerformanceMeasures.class;
			}
			pmConstructor = pmClass.getConstructor(ExampleSet.class);
		} catch (Exception e) {
			pmConstructor = null;
		}
		
		
		// for models and their probability estimates
		Vector<BayBoostBaseModelInfo> modelInfo = new Vector<BayBoostBaseModelInfo>();

		// if present apply the start model first
		this.applyPriorModel(trainingSet, modelInfo);

		// check whether to use the complete training set for training
		final double splitRatio = this.getParameterAsDouble(PARAMETER_USE_SUBSET_FOR_TRAINING);
		final boolean bootstrap = ((splitRatio > 0) && (splitRatio < 1.0));
		log(bootstrap ? "Bootstrapping enabled." : "Bootstrapping disabled.");

		final boolean allowSkew = this.getParameterAsBoolean(PARAMETER_ALLOW_MARGINAL_SKEWS);

		SplittedExampleSet splittedSet = null;
		if (bootstrap == true) {
			splittedSet = new SplittedExampleSet(trainingSet, splitRatio, SplittedExampleSet.SHUFFLED_SAMPLING, getParameterAsBoolean(RandomGenerator.PARAMETER_USE_LOCAL_RANDOM_SEED), getParameterAsInt(RandomGenerator.PARAMETER_LOCAL_RANDOM_SEED));
		}
		
		// how often shall a new random sample be drawn?
		int newSampleInterval = getParameterAsInt(PARAMETER_NEW_SAMPLE_INTERVAL);
		int modelOutputInterval = getParameterAsInt(PARAMETER_MODEL_OUTPUT_INTERVAL);
		IOObjectCollection<Model> modelCollection = new IOObjectCollection<Model>();

		// maximum number of iterations
		final int iterations = this.getParameterAsInt(PARAMETER_ITERATIONS);
		L: for (int i = 0; i < iterations; i++) {
			this.currentIteration = i;
			
			// should a new sample be drawn? (first iteration is 0, so we need to add +1)
			if ( (currentIteration+1) % newSampleInterval == 0) {
				trainingSet = getNewSample( modelInfo );
			}

			Model model;
			AbstractWeightedPerformanceMeasures wp;
			// this object is cloned, but nevertheless (it seems) the underlying data table
			// and thus the example weights are persistent over iterations.
			// But why then do we clone at all??
			ExampleSet iterationSet = (ExampleSet)trainingSet.clone();
			if (bootstrap == true) {

				splittedSet.selectSingleSubset(0); // switch to learning subset 
				model = this.trainBaseModel(splittedSet);

				// apply model to all examples
				iterationSet = model.apply(iterationSet);
				
				wp = reweightExamplesWrapper(splittedSet, bootstrap);

//				this.performance = // performance should be estimated based on the hold-out set 
//					WeightedPerformanceMeasures.reweightExamples(splittedSet, wp.getContingencyMatrix(), allowSkew);
				
				try {
					wp = (AbstractWeightedPerformanceMeasures)pmConstructor.newInstance(splittedSet);
					this.performance = (Double)pmClass.getMethod(
							"reweightExamples", 
							ExampleSet.class, 
							ContingencyMatrix.class, 
							boolean.class, 
							boolean.class).invoke(
									null, 
									splittedSet, 
									wp.getContingencyMatrix(), 
									allowSkew, 
									fuzzyReweighting);
				} catch (Exception e) {
					throw new OperatorException("cannot call reweightExamples"); 
				}

			}
			else {
				// train one model per iteration
				model = this.trainBaseModel(iterationSet);
				iterationSet = model.apply(iterationSet);
				wp = reweightExamplesWrapper( iterationSet, bootstrap );
			}

			PredictionModel.removePredictedLabel(iterationSet);

			if (classPriors.length == 2) {
				//this.debugMessage(wp);
			}
			
			// output model?
			if ( modelOutputInterval > 0 && (currentIteration+1) % modelOutputInterval == 0 ) {
				// first create deep copy of modelInfo:
				Vector<BayBoostBaseModelInfo> currentModelInfo = new Vector<BayBoostBaseModelInfo>();
				for ( BayBoostBaseModelInfo c : modelInfo ) {
					currentModelInfo.add(c);
				}
				modelCollection.add( new BayBoostModel(trainingSet, currentModelInfo, classPriors) );
			}

			// Stop if only one class is present/left.
			if (wp.getNumberOfNonEmptyClasses() < 2) {
				// Using the model here is just necessary to avoid a
				// NullPointerException if this is the first iteration.
				// One could use an empty model instead:
				modelInfo.add(new BayBoostBaseModelInfo(model, wp.getContingencyMatrix()));

				break L; // No more iterations!
			}

			final ContingencyMatrix cm = wp.getContingencyMatrix();

			// Add the new model and its weights to the collection of models:
			modelInfo.add(new BayBoostBaseModelInfo(model, cm));

			if (this.isModelUseful(cm) == false) {
				// If the model is not considered to be useful (low advantage)
				// then discard it and stop.
				log("Discard model because of low advantage on training data.");
				modelInfo.remove(modelInfo.size() - 1);
				break L;
			}

			// Stop if weight is null, because all examples have been explained
			// "deterministically".
			if (this.performance == 0) {
				break L;
			}

			inApplyLoop();
		}

		modelCollectionOutput.deliver( modelCollection );
		
		// Build a Model object. Last parameter is "crispPredictions", nowadays
		// always true.
		return new BayBoostModel(trainingSet, modelInfo, classPriors);
	}




	/**
	 * Helper method to decide whether a model improves the training error
	 * enough to be considered. Returns always true.
	 * 
	 * @param cm
	 *            the lift ratio matrix as returned by the getter of the
	 *            WeightedPerformance class
	 * @return <code>true</code> iff the advantage is high enough to consider
	 *         the model to be useful
	 */
	private boolean isModelUseful(ContingencyMatrix cm) {
		// should rather be decided offline by properly setting
		// the number of iterations
		return true; 
	}

	/**
	 * Adds the parameters &quot;number of iterations&quot; and &quot;model
	 * file&quot;.
	 */
	@Override
	public List<ParameterType> getParameterTypes() {
		List<ParameterType> types = super.getParameterTypes();
		ParameterType type = new ParameterTypeInt(PARAMETER_ITERATIONS, "The maximum number of iterations.", 1, Integer.MAX_VALUE, 10);
		type.setExpert(false);
		types.add(type);

		types.add(new ParameterTypeInt(PARAMETER_NEW_SAMPLE_INTERVAL, "Specifies how often a new sample is drawn via the inner process (each X iterations)", 1, Integer.MAX_VALUE) );
		types.add(new ParameterTypeInt(PARAMETER_MODEL_OUTPUT_INTERVAL, "Specifies how often the current ensemble is output (each X iterations, 0 for never)", 0, Integer.MAX_VALUE) );

		return types;
	}
}
