package com.rapidminer.operator.preprocessing.hhhitter;

import hitters.multi.AbstractComplexHHH;
import hitters.multi.AlgoType;
import hitters.multi.DimType;
import hitters.multi.Element;
import hitters.multi.FullAncHHH;
import hitters.multi.MultiDatabase;
import hitters.multi.MultiHitterInfo;
import hitters.multi.PartAncHHH;
import hitters.multi.SysParameter;
import hitters.tools.Utils;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.ObjectAttribute;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeDirectory;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.tools.LogService;

/**
 * 
 * This operator calculates the set of Hierarchical Heavy Hitters for each Example in
 * the given ExampleSet and returns an ExampleSet containing the sets of HHH. The HHH
 * are stored as Objects in the ExampleSet via mapping: Each Example has
 * an attribute that maps to an Object of Type <code>Set</code> containing 
 * the HHH.  
 * 
 * @author Peter Fricke 
 * @version $Id$
 *  
 */
public class HHHExtractionSpinczyk extends HHHExtraction {
	
	

	/** The parameter name for &quot;Path to the directory containing 
	 * the logfiles.&quot; */
	public static final String DATA_PATH = "data_path";

	/** The parameter name for &quot;Number of the first logfile to use.&quot; */
	public static final String FIRST_FILE = "first_file";

	/** The parameter name for &quot;Number of the last logfile to use.&quot; */
	public static final String LAST_FILE = "last_file";
		
	static Pattern callP = Pattern.compile( "syscall:\\s[a-z_0-9]+(?=,\\s)" );
	static Pattern gidP  = Pattern.compile( "(?<=,\\s)gid:\\s[0-9]{1,5}(?=$)" );
	static Pattern uidP  = Pattern.compile( "(?<=,\\s)uid:\\s[0-9]{1,5}(?=,\\s)" );
	static Pattern fileP = Pattern.compile( "(?<=,\\s)file:\\s[^,\\n]+(?=,)" );	
	
	
	/** Creates a new HHHExtraction operator. */
	public HHHExtractionSpinczyk(OperatorDescription description) {
		super(description);
	}


	public IOObject[] apply() throws OperatorException {
				
		LogService logService = LogService.getGlobal();
		logService.setVerbosityLevel( LogService.STATUS );		
		logService.log( "Start apply   ", LogService.STATUS );		
		
		ExampleSet exampleSet = getInput( ExampleSet.class );
		ExampleSet clone = (ExampleSet) exampleSet.clone();
		clone.recalculateAllAttributeStatistics();
		sizeExSet = exampleSet.size();
		cacheHits = 0;
		setSize = "\"";
		avgSetSize = 0.0;
		dsSize = 0;						
		
		ParameterWrapper pw = getParameterWrapper(); 		
		boolean writeDs = getParameterAsBoolean(DATASTRUCT);
		String cachePrefix = getParameterAsString(CACHE_PATH);
		
		Attribute structID = createHitterAttribute(clone); 
		Attribute dsID = createDSAttribute(clone, writeDs);
		
		List<Set<Element>> allHHH = new ArrayList<Set<Element>>(); 
		List<String> names = new ArrayList<String>();
		int firstFile = getParameterAsInt(FIRST_FILE);
		int lastFile = getParameterAsInt(LAST_FILE);
		
		String f, line, oldline = "";
		HashMap<String, String> parsed = new HashMap<String, String>();		
		HashMap< Integer, AbstractComplexHHH > algos = null;
		HashMap< Integer, Set<Element> > hitterSets = null;
		
		if( getParameterAsBoolean(USE_CACHE) && ( ! writeDs ) ){
			logService.log( "Searching for cache.", LogService.STATUS );
			hitterSets = readCache( clone, cachePrefix, pw, firstFile, lastFile );
			if( hitterSets == null ) logService.log( "Cache not found.", LogService.WARNING );
		}
		
		if( hitterSets == null ){
			//A hiterAlgo for each uid
			algos = new HashMap< Integer, AbstractComplexHHH >();
			fillAlgoMap( algos, clone, pw );		

			for( int i = getParameterAsInt(FIRST_FILE); i <= getParameterAsInt(LAST_FILE); i++){
				checkForStop();			
				f = "" + i;
				while( f.length() < 7 ) f = "0" + f;
				String file = getParameterAsString(DATA_PATH) + "/file" + f + ".log";
				try {
					BufferedReader in = new BufferedReader( new FileReader( file ) );								
					boolean first = true;
					while( (line = in.readLine()) != null) {
						line = line.trim();
						if( line.length() > 2 && ( ! line.startsWith("#") ) ){

							//We have ugly data, some calls take up several lines.
							//So we add lines until we find "syscall:", indicating that the
							//previous call is complete.
							if( line.startsWith( "syscall: " ) ){
								if( first ){
									first = false;
									oldline += line.replaceAll("\n", "").trim();
									continue;
								}																	

								if( parseLine( oldline, parsed, file ) ){
									AbstractComplexHHH al = algos.get( Integer.parseInt(parsed.get("uid")) );
									if( al != null && parsed.get("call").equals("close") ){
										String[] s = { Utils.cleanPath2( parsed.get("file") ) };										
										if( s[0] != null ){																					
											Element e = Element.createElement( s, pw.par );						
											if( e != null ) e.capHierarchy( pw.par.getCap() );
											al.insert(e, 1);
										}								
									}
								}

								oldline = "";
							}
							oldline += line.replaceAll("\n", "").trim();
						}					
					}
					if( ! first ){ //Don't forget last line
						if( parseLine( oldline, parsed, file ) ){
							AbstractComplexHHH al = algos.get( Integer.parseInt(parsed.get("uid")) );
							if( al != null && parsed.get("call").equals("close") ){
							String[] s = { Utils.cleanPath2( parsed.get("file") ) };										
										if( s[0] != null ){																					
											Element e = Element.createElement( s, pw.par );						
											if( e != null ) e.capHierarchy( pw.par.getCap() );
											al.insert(e, 1);
										}								
							}
						}
					}

				} catch( IOException e ) {
					throw new RuntimeException(e);
				}
			}
			writeCacheMarcoEilig( algos, cachePrefix, pw, firstFile, lastFile );	
			if( getParameterAsBoolean(USE_CACHE) ) writeCache( algos, cachePrefix, pw, firstFile, lastFile );
			hitterSets = calcHitterSets( clone, algos, pw );
		}

		double val;
		for (Example example : clone) {
		
			checkForStop();			
			
			Attribute uidAttr = clone.getAttributes().get( "uid" );			
			int uid = (int)example.getNumericalValue( uidAttr );
			
			Set<Element> hhh = hitterSets.get( uid );
			setSize += hhh.size() + "; ";
			avgSetSize += hhh.size();
			allHHH.add(hhh);
			names.add( uid + "" );
	
			val = ((ObjectAttribute<?>)structID).getMapping().mapString( hhh );		
			example.setValue(structID, val);
			
			if( writeDs ){
				AbstractComplexHHH hitterAlgo = algos.get( uid );
				Map<Element, Integer> ds = hitterAlgo.dumpf();
				dsSize += ds.size();
				val = ((ObjectAttribute<?>)dsID).getMapping().mapString( ds );		
				example.setValue(dsID, val);
			}

		}
		
		avgSetSize = avgSetSize / (double)clone.size();
		dsSize = dsSize / (double)clone.size();
		setSize += "\"";
		
		HHHResult result = new HHHResult( pw, allHHH, names );
		if( getParameterAsBoolean(RESULT) )
			return new IOObject[] { clone, result };
		else
			return new IOObject[] { clone };
	}

	
	private HashMap<Integer, Set<Element>> readCache(ExampleSet clone, 
			String cachePrefix, ParameterWrapper pw, int firstFile, int lastFile) {
		
		HashMap< Integer, Set<Element> > hitterSets = 
			new HashMap< Integer, Set<Element> >();
		String cacheDir = createCacheName( cachePrefix, pw, firstFile, lastFile );		

		for (Example example : clone) {					
			
			Attribute uidAttr = clone.getAttributes().get( "uid" );			
			int uid = (int)example.getNumericalValue( uidAttr );
			String cacheName = cacheDir + "/" + uid;
			System.out.println( "Trying " + cacheName );
			
			File cacheFile = new File( cacheName + ".hhh" ); 
			if( ! cacheFile.exists() ) return null;
			
			System.out.println( "Read " + cacheName );
			
			Set<Element> hhh = MultiDatabase.readHHH( cacheName );
			hitterSets.put( uid, hhh );
		}
		
		return hitterSets;
	}


	private HashMap< Integer, Set<Element> > calcHitterSets(ExampleSet clone,
			HashMap<Integer, AbstractComplexHHH> algos, ParameterWrapper pw) {

		HashMap< Integer, Set<Element> > hitterSets = 
			new HashMap< Integer, Set<Element> >();
		
		for (Example example : clone) {					
			
			Attribute uidAttr = clone.getAttributes().get( "uid" );			
			int uid = (int)example.getNumericalValue( uidAttr );
			
			Set<Element> hhh = algos.get( uid ).outputSet( pw.phi ).keySet();
			hitterSets.put( uid, hhh );
		}
		
		return hitterSets;
	}


	private void writeCache(HashMap<Integer, AbstractComplexHHH> algos,
			String cachePrefix, ParameterWrapper pw, int firstFile, int lastFile ) {
	
		// Create path to cachefile.
		// The path to the file contains the parameters 
		// used to create the hitterset.
		String cacheDir = createCacheName( cachePrefix, pw, firstFile, lastFile );
		AbstractComplexHHH hitterAlgo;
		Set<Element> hhh;
		
		for( Integer i : algos.keySet() ){
			
			hitterAlgo = algos.get(i);
			hhh = hitterAlgo.outputSet( pw.phi ).keySet();
			try{
				MultiDatabase.saveHHH(hhh, cacheDir + "/" + i, null, pw.par , pw.epsilon, pw.phi, pw.algo);
			}catch( Exception e ){ LogService.getGlobal().log( "Can't write cache: " + cacheDir + i + " " + e, LogService.WARNING); }			
		}
	}

private void writeCacheMarcoEilig(HashMap<Integer, AbstractComplexHHH> algos,
			String cachePrefix, ParameterWrapper pw, int firstFile, int lastFile ) {

		AbstractComplexHHH hitterAlgo;
		HashMap<Element, MultiHitterInfo> hhhMap;
		BufferedWriter out = null, out2 = null;
		String cr = System.getProperty("line.sparator");
		cr = com.rapidminer.tools.Tools.getLineSeparator();
		
		try{ 
			out = new BufferedWriter( new FileWriter( "J:/outEilig" ) );
			out2 = new BufferedWriter( new FileWriter( "J:/outEiligShort" ) );
			out.write( pw.par.dimString() + cr );

			for( Integer i : algos.keySet() ){				
				hitterAlgo = algos.get(i);
				hhhMap = hitterAlgo.outputSet( pw.phi );

//				out.write( cr + i + ":" + cr );
				for( Element el : hhhMap.keySet() ){
					out.write( i + "\t" + hhhMap.get(el).fmin + "\t" + hhhMap.get(el).fmax + "\t" + hhhMap.get(el).F + "\t" + el.toShortString() + cr );
					int f = (int) (0.5 * (hhhMap.get(el).fmin + hhhMap.get(el).fmax));
					out2.write( i + "\t" + Math.min(f, hhhMap.get(el).F ) + "\t" + el.toShortString()+ "\t" + Utils.cleanPath(el.toShortString()) + cr );					
				}
			}
			out.write( cr );			
		}
		catch (java.io.FileNotFoundException e){ 
			System.out.print( e.getMessage() );			
		}
		catch (java.io.IOException e){			
			System.out.print( e.getMessage() );			
		}			
		
		try{		
			out.close();
			out2.close();
		}
		catch (java.io.IOException e) { 
			System.out.println("CloseWrite: Error: " + e.getMessage()); 
		}						

		
		
	}



	private String createCacheName(String cachePrefix, ParameterWrapper pw, int firstFile, int lastFile) {

		String cacheDir = cachePrefix + "/Spinc" + pw.algo + "/eps" + 
		Utils.format(pw.epsilon) + "#phi" + Utils.format(pw.phi);

		SysParameter sysPar = (SysParameter)pw.par;
		for( int i = 0; i < pw.par.getDim(); i++ ){
			cacheDir += "#" + sysPar.dimContents(i) + "#" + sysPar.getCap()[i];
		}
		cacheDir += "#" + firstFile + "#" + lastFile;

		// Create the directories containing the parameters
		// if they do not already exist.
		File tmpDir = new File( cacheDir ); 
		if( ! tmpDir.exists() ) 
			tmpDir.mkdirs();
			
		return cacheDir;
	}


	private void fillAlgoMap(HashMap<Integer, AbstractComplexHHH> algos,
			ExampleSet clone, ParameterWrapper pw) {
		
		AbstractComplexHHH hitterAlgo;
		Attribute uidAttr = clone.getAttributes().get( "uid" );
		
		for (Example example : clone) {

			if( pw.algo == AlgoType.PART_ANC ) hitterAlgo = new PartAncHHH( pw.epsilon, pw.par );
			else  hitterAlgo = new FullAncHHH( pw.epsilon, pw.par );			
			
			int uid = (int)example.getNumericalValue( uidAttr );
			
			algos.put( uid, hitterAlgo );
		}		
	}

	
	protected DimType[] retrieveDims() throws UserError {		
		DimType[] dims = { DimType.PATH };
		return dims;
	}
	
	
	protected int[] retrieveCaps() throws UndefinedParameterError{		
		int[] cap = {12};
		return cap;
	}

	
	private boolean parseLine( String oldline, HashMap<String, String> parsed, String itFile ){
		parsed.clear();
		String call = null, uid = null, gid = null, file = null;

		//System.out.println( "Trying: " + oldline  );
		Matcher m = callP.matcher(oldline );
		if( m.find() ){
			call = m.group().split(":")[1].trim();
		}
		//		else System.out.println( "NOOOOOO: " + oldline );
		//		 System.out.println( "call " + call );	

		m = gidP.matcher( oldline );
		if( m.find() ){
			gid = m.group().split(":")[1].trim();
		}
		//		else System.out.println( "NOOOOOO: " + oldline );
		//		 System.out.println( "gid " + gid );	

		m = uidP.matcher( oldline );
		if( m.find() ){
			uid = m.group().split(":")[1].trim();
		}
		//		else System.out.println( "NOOOOOO: " + oldline );
		//		 System.out.println( "uid " + uid );	

		m = fileP.matcher( oldline );
		if( m.find() ){ //fails for exec
			file = m.group().split(":")[1].trim();
			file = cleanPath( file );
		}

		parsed.put("call", call);
		parsed.put("uid", uid);
		parsed.put("file", file);

		if( uid == null || gid == null || call == null ){
			System.out.println( "NO WAY: " + itFile + "  " + oldline );
			return false;
		}
		return true;
	}
	
	
	private String cleanPath( String file ){
		
		while( file.endsWith( "/" ) ) 
			file = file.substring( 0, file.length() - 1 );
		
		// get rid of stuff like "/etc//////////sound" 
		int lev = 0;
		int k = file.indexOf( '/', 0 );
		int oldk = k;
		while( k >= 0){
			k = file.indexOf( '/', k + 1 );
			if( k == oldk + 1 ){ 
				if( k == 1 ) file = file.substring( k );
				else file = file.substring(0, k-1) + file.substring( k );
				k--;					
			}
			else{
				lev++;
				oldk = k;
			}
		}
		return file;
	}	


	/** Returns a list with all parameter types of this model. */
	public List<ParameterType> getParameterTypes() {

		List<ParameterType> types = super.getParameterTypes();
		ParameterType type;

		type = new ParameterTypeDirectory(DATA_PATH, "Path to the directory containing the logfiles.", "J:/unzipped" );
		types.add(type);	

		type = new ParameterTypeInt(FIRST_FILE, "Number of the first logfile to use.", 0, 437, 0);
		types.add(type);		

		type = new ParameterTypeInt(LAST_FILE, "Number of the last logfile to use.", 0, 437, 0);
		types.add(type);

		return types;
	}

}
