package com.rapidminer.operator.preprocessing.hhhitter;

import hitters.multi.AbstractComplexHHH;
import hitters.multi.AlgoType;
import hitters.multi.DimType;
import hitters.multi.Element;
import hitters.multi.FullAncHHH;
import hitters.multi.MultiDatabase;
import hitters.multi.PartAncHHH;
import hitters.multi.SysParameter;
import hitters.tools.Utils;

import java.io.File;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Vector;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.ObjectAttribute;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.ValueDouble;
import com.rapidminer.operator.ValueString;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.tools.LogService;

/**
 * 
 * This operator calculates the set of Hierarchical Heavy Hitters for each Example in
 * the given ExampleSet and returns an ExampleSet containing the sets of HHH. The HHH
 * are stored as Objects in the ExampleSet via mapping: Each Example has
 * an attribute that maps to an Object of Type <code>Set</code> containing 
 * the HHH.  
 * 
 * @author Peter Fricke 
 * @version $Id$
 *  
 */
public class HHHExtractionPlain extends HHHExtraction {
	
	/** The parameter name for &quot;Use the paths as a 
	 * hierarchical variable.&quot; */
	public static final String USE_PATH = "use_path";
	
	/** The parameter name for &quot;Use the system calls 
	 * as a hierarchical variable.&quot; */
	public static final String USE_CALL = "use_calls";
	
	/** The parameter name for &quot;Use the sequence of 
	 * (flat) calls as a hierarchical variable.&quot; */
	public static final String USE_SEQ = "use_sequence";
	
	/** The parameter name for &quot;Use the return value 
	 * of the system calls as a hierarchical variable.&quot; */
	public static final String USE_RETURN = "use_returnvalue";

	/** The parameter name for &quot;The limit on the path depth.&quot; */
	public static final String PATH_CAP = "path_cap";

	/** The parameter name for &quot;The limit on the system call depth.&quot; */
	public static final String CALL_CAP = "call_cap";

	/** The parameter name for &quot;The limit on the sequence length.&quot; */
	public static final String SEQ_CAP = "sequence_cap";

	String callString = "";
	double hitterTime;
	

	/** Creates a new HHHExtraction operator. */
	public HHHExtractionPlain(OperatorDescription description) {
		super(description);

		addValue( new ValueString("UsedCalls", "The systemcalls that are used, all other calls are ignored."){
			public String getStringValue(){
				return callString;
			}
		});
		
		addValue( new ValueDouble("HitterTime", "The time spent to calculate the Hierarchical Heavy Hitters."){
			public double getDoubleValue(){
				return new Double( hitterTime );
			}
		});
	}


	public void doWork() throws OperatorException {
				
		super.doWork();
		
		LogService logService = LogService.getGlobal();
		//logService.setVerbosityLevel( LogService.STATUS );		
		logService.log( "Start apply   ", LogService.NOTE );		
		
		ExampleSet exampleSet = exampleSetInput.getData();
		ExampleSet clone = (ExampleSet) exampleSet.clone();
		clone.recalculateAllAttributeStatistics();
		sizeExSet = exampleSet.size();
		cacheHits = 0;
		setSize = "\"";
		avgSetSize = 0.0;
		dsSize = 0;
			
		Attribute idAttribute = getIdAttribute( clone );
		ParameterWrapper pw = getParameterWrapper(); 
			
		boolean writeDs = getParameterAsBoolean(DATASTRUCT);
		String cachePrefix = getParameterAsString(CACHE_PATH);
		
		HashSet<String> calls = new HashSet<String>(); 				
		for( Attribute attr : clone.getAttributes() ){
			calls.add(attr.getName());
		}
		callString = makeString( calls );		
		logService.log( "" + calls, LogService.NOTE );
		pw.par.setUsedCalls(calls);
		
		Attribute structID = createHitterAttribute(clone); 
		Attribute dsID = createDSAttribute(clone, writeDs);
			
		List<Set<Element>> allHHH = new ArrayList<Set<Element>>(); 
		List<String> names = new ArrayList<String>();
		
		double val;
		hitterTime = - System.currentTimeMillis();
		for (Example example : clone) {
		
			double startTime = System.currentTimeMillis();
				
			checkForStop();			
			Set<Element> hhh;
			
			int tmpDsSize = 0;
			
			if( writeDs ){
				AbstractComplexHHH hitterAlgo = getAlgo(example, pw);
				hhh = hitterAlgo.outputSet( pw.phi ).keySet();
				Map<Element, Integer> ds = hitterAlgo.dumpf();
				dsSize += ds.size();
				tmpDsSize = ds.size();
				val = ((ObjectAttribute<?>)dsID).getMapping().mapString( ds );		
				example.setValue(dsID, val);
			}
			else{
				hhh = calcHitters( example, pw, cachePrefix, getParameterAsBoolean(USE_CACHE) );	
			}
			
			setSize += hhh.size() + "; ";
			avgSetSize += hhh.size();
			
			allHHH.add(hhh);
			names.add( example.getValueAsString(idAttribute, 3, false) );
	
			val = ((ObjectAttribute<?>)structID).getMapping().mapString( hhh );		
			example.setValue(structID, val);		
			
			double endTime = System.currentTimeMillis();
			
			logService.log( "  ds: " + tmpDsSize + ", hhh: " + hhh.size() + ", time: " +
			   (endTime - startTime), LogService.NOTE );
		}
		hitterTime += System.currentTimeMillis();
		
		avgSetSize = avgSetSize / (double)clone.size();
		dsSize = dsSize / (double)clone.size();
		setSize += "\"";
		
		HHHResult result = new HHHResult( pw, allHHH, names );
		if( getParameterAsBoolean(RESULT) ) {
			//return new IOObject[] { clone, result };
			exampleSetOutput.deliver( clone );
			resultOutput.deliver( result );
		    }
		else
			//return new IOObject[] { clone };
			exampleSetOutput.deliver( clone );
	}

	/** Calculates the Hierarchical Heavy Hitters for the given Example, 
	 * ParameterWrapper and cache-directory.
	 */
	private Set<Element> calcHitters( Example example, ParameterWrapper pw, 
			String cachePrefix, boolean shouldUseCache ) throws OperatorException{

		String id = null;
		String cacheName = null;		
		Set<Element> hhh = null;
		
		LogService logService = LogService.getGlobal();
		
		AbstractComplexHHH hitterAlgo;
		Attribute idAttribute = example.getAttributes().getSpecial(Attributes.ID_NAME);		
		id = example.getValueAsString(idAttribute, 3, false);							
		id = id.substring( id.lastIndexOf( "/" ) );
		
		if( shouldUseCache ){
			
			// Create path to cachefile step by step.
			// The path to the file contains the parameters 
			// used to create the file.
			String cacheDir = cachePrefix + "/" + pw.algo + "/eps" + 
			Utils.format(pw.epsilon) + "#phi" + Utils.format(pw.phi);

			SysParameter sysPar = (SysParameter)pw.par;
			for( int i = 0; i < pw.par.getDim(); i++ ){
				cacheDir += "#" + sysPar.dimContents(i) + "#" + sysPar.getCap()[i];
			}

			// Create a code stating which system calls were used
			HashSet<String> calls = pw.par.usedCalls();
			String callCode = Utils.createCallCode( calls );
			cacheDir += "/" + callCode;

			// Create the directories containing the parameters
			// if they do not already exist.
			File tmpDir = new File( cacheDir ); 
			if( ! tmpDir.exists() ) 
				tmpDir.mkdirs();
			
			cacheName = cacheDir + id;		

			try{
				hhh = MultiDatabase.readHHH( cacheName );
				//logService.log( "Cache: " + cacheName, LogService.STATUS );
				cacheHits++;				
			}catch( Exception e ){ }//logService.log( "No cache " + cacheName, LogService.STATUS); }
		}// END if( shouldUseCache ){

		if( hhh == null ){
			//logService.log( "Extracting HHH for " + id.substring(1) + "...", LogService.STATUS );
			
			hitterAlgo = getAlgo( example, pw );
			hhh = hitterAlgo.outputSet( pw.phi ).keySet();
			
			//logService.log( "...done." );

			if( shouldUseCache ){
				try{
					MultiDatabase.saveHHH(hhh, cacheName, id, pw.par , pw.epsilon, pw.phi, pw.algo);
				}catch( Exception e ){ logService.log( "Can't write cache: " + cacheName + " " + e, LogService.WARNING); }
			}			
		}
		return hhh;
	}

	
	/** Inserts the elements for the log file specified by the given example into an 
	 * instance of a hitter algorithm.
	 */
	private AbstractComplexHHH getAlgo( Example example, ParameterWrapper pw ){
		
		LogService logService = LogService.getGlobal();		
		AbstractComplexHHH hitterAlgo;		
		if( pw.algo == AlgoType.PART_ANC ) hitterAlgo = new PartAncHHH( pw.epsilon, pw.par );
		else  hitterAlgo = new FullAncHHH( pw.epsilon, pw.par );			

		Attribute idAttribute = example.getAttributes().getSpecial(Attributes.ID_NAME);		
		String id = example.getValueAsString(idAttribute, 3, false);					
		MultiDatabase d = new MultiDatabase( id, pw.par );		
		id = id.substring( id.lastIndexOf( "/" ) );
		logService.log( id.substring(1) + "...", LogService.NOTE );
		
		//logService.log( "Still here!" );
		
		d.openRead();
		Vector<Element> elements = d.readSystemCalls();
		d.closeRead();
		
		for( Element e : elements ) 
			hitterAlgo.insert( e, 1 );

		return hitterAlgo;
	}
	
	
	private Attribute getIdAttribute( ExampleSet exampleSet ) throws UserError{
		boolean found = false;
		String id;
		Attribute idAttribute = exampleSet.getAttributes().getSpecial(Attributes.ID_NAME);
		if( idAttribute == null ){
			throw new UserError(this, 117, "ID (containing the name of a logfile)");
		}
		else{
			if ( exampleSet.size() == 0) {	
				throw new UserError(this, 117);
			}
			else{
				id = exampleSet.getExample(0).getValueAsString(idAttribute, 3, false);
				File f = new File( id );
				found = f.exists();
			}
			if( ! found ){			
				throw new UserError(this, 127, "An ID attribute (containing the name" +
						" of a logfile) is required. This logfile does not exist!");
			}
		}
		
		return idAttribute;		
	}

		
	/**
	 * Fetches the hierarchical variable (Path, Call, etc.) the user
	 * has selected in the GUI for each dimension. 
	 * 
	 * @return for each dimension, the hierarchical variable the user 
	 * has selected in the GUI.
	 */
	protected DimType[] retrieveDims() throws UserError {		

		List<DimType> dimList = new ArrayList<DimType>();
		if( getParameterAsBoolean(USE_PATH) )   dimList.add( DimType.PATH );
		if( getParameterAsBoolean(USE_CALL) )   dimList.add( DimType.CALL );
		if( getParameterAsBoolean(USE_SEQ) )    dimList.add( DimType.SEQUENCE );
		if( getParameterAsBoolean(USE_RETURN) ) dimList.add( DimType.RETURN );
		DimType[] dims = dimList.toArray(new DimType[0]);

		if( dims.length < 1 ) 
			throw new UserError(this, 202, USE_PATH + ", " + USE_CALL, USE_SEQ, USE_RETURN );
		
		return dims;
	}
	
	
	/**
	 * Fetches the limit on the depth of the hierarchy for each dimension
	 * that the user has selected in the GUI.
	 * 
	 * @return for each dimension, limit on the depth of the hierarchy
	 *  the user has selected in the GUI
	 */
	protected int[] retrieveCaps() throws UndefinedParameterError{		

		List<Integer> capList = new ArrayList<Integer>();	

		if( getParameterAsBoolean(USE_PATH) )   
			capList.add( getParameterAsInt(PATH_CAP) );
		if( getParameterAsBoolean(USE_CALL) )   
			capList.add( getParameterAsInt(CALL_CAP) );
		if( getParameterAsBoolean(USE_SEQ) )   
			capList.add( getParameterAsInt(SEQ_CAP) );   

		//Return: Switch it off or use it, limiting depth does
		//not make sense.
		if( getParameterAsBoolean(USE_RETURN) )   
			capList.add( 2 );
		
		int[] cap = new int[capList.size()];
		for( int i = 0; i < cap.length; i++ ) cap[i] = capList.get(i);

		return cap;
	}

	
	private String makeString( HashSet<String> calls ){
		String s = "";
		for( String c : calls ) s += ", " + c;
		return s.substring(2);		
	}
	
	
	/** Returns a list with all parameter types of this model. */
	public List<ParameterType> getParameterTypes() {
		
		List<ParameterType> types = super.getParameterTypes();
		ParameterType type;
			
		type = new ParameterTypeBoolean(USE_PATH, "Use the paths as a hierarchical variable.", true);
		types.add(type);
		type = new ParameterTypeInt(PATH_CAP, "The limit on the path depth.", 1, Integer.MAX_VALUE, 12);
		types.add(type);
		
		type = new ParameterTypeBoolean(USE_CALL, "Use the system calls as a hierarchical variable.", true);
		types.add(type);
		type = new ParameterTypeInt(CALL_CAP, "The limit on the system call depth.", 1, 6, 6);
		types.add(type);

		type = new ParameterTypeBoolean(USE_SEQ, "Use the sequence of (flat) calls as a hierarchical variable.", false);
		types.add(type);
		type = new ParameterTypeInt(SEQ_CAP, "The limit on the sequence length.", 1, 4, 4);
		types.add(type);

		type = new ParameterTypeBoolean(USE_RETURN, "Use the return value of the system calls as a hierarchical variable.", false);
		types.add(type);
		
		return types;
	}
			
}
