/**
 * 
 */
package com.rapidminer.operator.features;

import hitters.multi.Weights;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.IOContainer;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.performance.PerformanceVector;

/**
 * 
 * A faster version of the {@link SimplePopulationEvaluator} designed specifically
 * for HHH. To be used by {@link CoarseFeatureSelectionOperator}.
 * Assigns a performance vector to each individual in the population.
 * The individual is evaluated if it is sufficiently dissimilar from
 * the best individual of the last generation ("parent"), otherwise, it is assigned
 * the performance vector of this parent, saving time. Thus, system calls that
 * are extremely rare do not cause expensive new evaluations. This is
 * justified, because they are very unlikely to produce a different
 * set of HHH and in effect a different performance vector.
 *  
 * @author Peter Fricke, Sebastian Land, Ingo Mierswa
 *
 * @version $Id$
 */
public class CoarsePopulationEvaluator implements PopulationEvaluator {

	private ExampleSet originalSet;
	private IOContainer input;
	private FeatureOperator operator;
	private Weights myWeights = null;
	private double threshold;
	
	public CoarsePopulationEvaluator(FeatureOperator operator, IOContainer input, ExampleSet originalSet) {
		this.originalSet = originalSet;
		this.input = input;
		this.operator = operator;
		try{
			myWeights = new Weights( "Freq.txt", 0.0 );
		}catch(Exception e) { throw new RuntimeException("Can't find my weights!");}				
	}
	
	public CoarsePopulationEvaluator(FeatureOperator operator, IOContainer input, ExampleSet originalSet, double threshold) {
		this( operator, input, originalSet );
		this.threshold = threshold;
	}

	/**
	 * Evaluates a single individual and sets the performance vector for 
	 * this individual accordingly.
	 */
	private void evaluate(Individual individual) throws OperatorException {
		if (individual.getPerformance() == null) {

			double[] weights = individual.getWeights();
			ExampleSet clone = FeatureOperator.createCleanClone(originalSet, weights);

			IOObject[] operatorChainInput = new IOObject[] { clone };
			IOContainer innerResult = input.prepend(operatorChainInput);
			for (int i = 0; i < operator.getNumberOfOperators(); i++) {
				innerResult = operator.getOperator(i).apply(innerResult);
			}

			PerformanceVector performanceVector = innerResult.remove(PerformanceVector.class);
			individual.setPerformance(performanceVector);
		}
	}
	

	private double getSum(Individual ind) {
		
		double sum = 0;
		double[] weights = ind.getWeights();
	
		int i = 0;
		for( Attribute a : originalSet.getAttributes() ){
			if( weights[i++] > 1e-12 ){
				sum += myWeights.getFreq(a.getName());
				//System.out.println( a.getName() );
			}			
		}		
		return sum;
	}

	
	/* (non-Javadoc)
	 * 
	 * Assigns a performance vector to each individual in the population.
	 * The individual is evaluated if it is sufficiently dissimilar from
	 * the best individual of the last generation ("parent"), otherwise, it is assigned
	 * the performance vector of this parent, saving time. Thus, system calls that
	 * are extremely rare do not cause expensive new evaluations. This is
	 * justified, because they are very unlikely to produce a different
	 * set of HHH and in effect a different performance vector.
	 * @see com.rapidminer.operator.features.PopulationEvaluator#evaluate(com.rapidminer.operator.features.Population)
	 */
	public void evaluate(Population population) throws OperatorException {
		
		double sumP, sum;
		Individual parent = population.getCurrentBest(); //best individual in last generation		
		if( parent == null ) sumP = Double.MIN_NORMAL; 
		else sumP = getSum( parent );
		PerformanceVector parentPerformance = null;
		if( parent != null ) parentPerformance = parent.getPerformance();

		for (int i = 0; i < population.getNumberOfIndividuals(); i++) {			
			sum = getSum( population.get(i) );
			//System.out.println( "Sump=" + sumP + " sum=" + sum + " " + population.get(i) + " " + threshold );
			if( ( sum > threshold/1000 && ( sum - sumP ) / sum >= threshold ) || parentPerformance == null ){
				//System.out.println( "evaluating" );
				evaluate(population.get(i));
			}
			else{
				//System.out.println( "not evaluating" );
				population.get(i).setPerformance(parentPerformance);				
			}
		}		
	}

}

