package com.rapidminer.operator.preprocessing.hhhitter;

import hitters.multi.AlgoType;
import hitters.multi.DimType;
import hitters.multi.SysParameter;

import java.util.List;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.AttributeObjectFactory;
import com.rapidminer.example.table.struct.Structures;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.ValueDouble;
import com.rapidminer.operator.ValueString;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeDirectory;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.tools.LogService;
import com.rapidminer.tools.Ontology_struct;
import com.rapidminer.tools.math.similarity.DistanceMeasures;

/**
 * 
 * This operator calculates the set of Hierarchical Heavy Hitters for each Example in
 * the given ExampleSet and returns an ExampleSet containing the sets of HHH. The HHH
 * are stored as Objects in the ExampleSet via mapping: Each Example has
 * an attribute that maps to an Object of Type <code>Set</code> containing 
 * the HHH.  
 * 
 * @author Peter Fricke 
 * @version $Id$
 *  
 */
public abstract class HHHExtraction extends Operator {
	
	protected InputPort exampleSetInput = getInputPorts().createPort("training set");
	protected OutputPort modelOutput = getOutputPorts().createPort("model");
	protected OutputPort resultOutput = getOutputPorts().createPort("result");
	//private final OutputPort performanceOutput = getOutputPorts().createPort("estimated performance", canEstimatePerformance());
	//private final OutputPort weightsOutput = getOutputPorts().createPort("weights", canCalculateWeights());
	protected OutputPort exampleSetOutput = getOutputPorts().createPort("exampleSet");
	
	
	/** The parameter name for &quot;The algorithm used to 
	 * calculate the HHH.&quot; */
	public static final String ALGORITHM = "algorithm";	
	
	/** The parameter name for &quot;The number epsilon used to 
	 * calculate the HHH.&quot; */
	public static final String EPSILON = "epsilon";

	/** The parameter name for &quot;The number phi used to 
	 * calculate the HHH.&quot; */
	public static final String PHI = "phi";

	/** The parameter name for &quot;Path to the directory for the 
	 * files containing the cached HHHs.&quot; */
	public static final String CACHE_PATH = "cache_path";

	/** The parameter name for &quot;Operator should return the 
	 * Hierarchical Heavy Hitters as a ResultObject.&quot; */
	public static final String RESULT = "result";
	
	/** The parameter name for &quot;Cache the HHH for each 
	 * log file and parameter set for later use. Uses only a 
	 * moderate amount of disk space.&quot; */
	public static final String USE_CACHE = "use_cache";
	
	/** The parameter name for &quot;Write the data structure 
	 * obtained when calculating the HHH to the example set.&quot; */	 
	public static final String DATASTRUCT = "write_data_struct";
	
	int cacheHits = 0;
	int sizeExSet = 0;	
	String setSize = "";
	double avgSetSize = 0.0;
	double dsSize = 0.0;
	
	// ###########################################################################################
	// Workaround for a bug in RapidMiner: PluginInit provides the wrong ClassLoader, creating
	// a new name space and thus ClassCastExceptions when using other classes of the 
	// Plugin within the distance measures.
	static {

		System.out.println( "" );
		System.out.println( "Registering DistanceMeasures ***************************" );
		System.out.println( "" );

		DistanceMeasures.registerMeasure(DistanceMeasures.NOMINAL_MEASURES_TYPE, "OGM Similarity", OGMSimilarity.class);  	
		DistanceMeasures.registerMeasure(DistanceMeasures.NOMINAL_MEASURES_TYPE, "Cormode Similarity", CormodeSimilarity.class);
		DistanceMeasures.registerMeasure(DistanceMeasures.NOMINAL_MEASURES_TYPE, "BGM Similarity", BGMSimilarity.class);
		DistanceMeasures.registerMeasure(DistanceMeasures.NOMINAL_MEASURES_TYPE, "BGM2 Similarity", BGM2Similarity.class);
		
		DistanceMeasures.registerMeasure(DistanceMeasures.NOMINAL_MEASURES_TYPE, "Data Structure Similarity", DataStructSimilarity.class);

	}
	// ###########################################################################################



	/** Creates a new HHHExtraction operator. */
	public HHHExtraction(OperatorDescription description) {
		super(description);

		addValue( new ValueString("SetSize", "The size of the set of Hierarchical Heavy Hitters for each example."){
			public String getStringValue(){
				return setSize;
			}
		});

		addValue( new ValueDouble("AvgSetSize", "The average size of the set of Hierarchical Heavy Hitters."){
			public double getDoubleValue(){
				return new Double( avgSetSize );
			}
		});
		
		addValue( new ValueDouble("CacheHits", "Cache hits / number of examples."){
			public double getDoubleValue(){
				return new Double( cacheHits / ( 1.0 * sizeExSet ) );
			}
		});
		
		addValue( new ValueDouble("DataStructSize", "Size of data structure obtained when calculating the HHH."){
			public double getDoubleValue(){
				return new Double( dsSize );
			}
		});
	}

/*
	public Class<?>[] getInputClasses() {
		return new Class[] { ExampleSet.class };
	}

	
	public Class<?>[] getOutputClasses() {
		return new Class[] { ExampleSet.class };
	}
*/	
	
	protected Attribute createHitterAttribute( ExampleSet clone ){		

		Attribute structID = clone.getAttributes().get(	Structures.ID_ATTRIBUTE );
		if ( structID == null ) {
			LogService.getGlobal().log("Create struct attribute.", LogService.NOTE);
			structID = AttributeObjectFactory.createAttribute(
					Structures.ID_ATTRIBUTE, Ontology_struct.OBJECT);
			clone.getExampleTable().addAttribute(structID);			
			clone.getAttributes().addRegular(structID);
		}
		
		return structID;
	}
	
	
	protected ParameterWrapper getParameterWrapper() throws UserError{
		
		ParameterWrapper pw = new ParameterWrapper();		
		DimType[] dims = retrieveDims();
		int[] cap = retrieveCaps();
		pw.par = new SysParameter( dims, cap );		
		pw.epsilon = getParameterAsDouble(EPSILON);
		pw.phi = getParameterAsDouble(PHI);
		if( pw.phi < pw.epsilon ){
			LogService.getGlobal().log( "Phi < epsilon, setting phi = epsilon", LogService.WARNING );
			pw.phi = pw.epsilon;
		}		
		if( getParameterAsInt(ALGORITHM) == 0 ) pw.algo = AlgoType.PART_ANC;
		else pw.algo = AlgoType.FULL_ANC;
		
		return pw;
	}
	
	
	protected Attribute createDSAttribute( ExampleSet clone, boolean writeDs ){
		
		Attribute dsID = clone.getAttributes().get(	DataStructSimilarity.DS_ATTRIBUTE );
		if ( dsID == null && writeDs ) {
			LogService.getGlobal().log("Create ds attribute.", LogService.NOTE);
			dsID = AttributeObjectFactory.createAttribute(
					DataStructSimilarity.DS_ATTRIBUTE, Ontology_struct.OBJECT);
			clone.getExampleTable().addAttribute(dsID);			
			clone.getAttributes().addRegular(dsID);
		}
		
		return dsID;		
	}

	
	protected abstract DimType[] retrieveDims()throws UserError;
	
	
	protected abstract int[] retrieveCaps() throws UndefinedParameterError;
	
	
	/** Returns a list with all parameter types of this model. */
	public List<ParameterType> getParameterTypes() {
		
		List<ParameterType> types = super.getParameterTypes();
		ParameterType type;
		
		String[] algo = { "Partial Ancestry", "Full Ancestry" };
		type = new ParameterTypeCategory(ALGORITHM, "The algorithm used to calculate the HHH.",  algo, 1 );
		types.add(type);		
	
		type = new ParameterTypeDouble(EPSILON, "The number epsilon used to calculate the HHH.", 0.000000000001, 0.5, 0.0005 );
		types.add(type);	
		
		type = new ParameterTypeDouble(PHI, "The number phi used to calculate the HHH.", 0.000000000001, 0.5, 0.002 );
		types.add(type);	

		type = new ParameterTypeDirectory(CACHE_PATH, "Path to the directory for the files containing the cached HHHs.", "D:/cache" );
		types.add(type);	
		
		type = new ParameterTypeBoolean(RESULT, "Operator should return the Hierarchical Heavy Hitters as a ResultObject.", true);
		types.add(type);
		
		//Cache the HHH for each logfile and parameter set for later use. Uses only a moderate amount of disk space.
		type = new ParameterTypeBoolean(USE_CACHE, "Cache the HHH for each logfile and parameter set for later " +
				"use. Uses only a moderate amount of disk space.", true);		
		types.add(type);
		
		//Also write data structure
		type = new ParameterTypeBoolean(DATASTRUCT, "Write the data structure " +
				"obtained when calculating the HHH to the example set.", false);
		types.add(type);
		
		return types;
	}
		
	@Override
	public boolean shouldAutoConnect(OutputPort outputPort) {
		if (outputPort == exampleSetOutput) {
			return getParameterAsBoolean("keep_example_set");
		} else {
			return super.shouldAutoConnect(outputPort);
		}
	}
	
}
