/**
 * @author Peter Fricke 
 */
package hitters.tools;

import hitters.multi.SysParameter;
import hitters.multi.Taxonomy;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Random;

/**
 * @author Peter Fricke 
 *
 */
public class ExSetCreate {

//	/home/fricke/log30 /home/fricke/batch/in/hi5split30.csv CLASS tb tp vg xe xt
//	/dev/shm /home/fricke/batch/in/ram.csv	
	// /dev/shm/logPLUS30 /home/fricke/batch/in/ram-allPLUSsplit30.csv
	
	public static void main(String[] args){

		HashSet<String> labels = new HashSet<String>();
		String aLabel;		
		List<String> wanted = new ArrayList<String>();
		boolean selectClasses = false;
		String path;
		String outputfile;
		if( args.length > 0 ) path = args[0];
		else path = "D:/ws/HHH/resources/data";	//   /home/fricke/log	
		if( args.length > 1 ) outputfile = args[1];
		else outputfile = "D:/rapidworkspace-44/DMV/da/tst.csv";		// /home/fricke/rm_workspace		


		File dir = new File(path);
		System.out.println( "Dir = " + dir );
		File[] files = dir.listFiles();
		String[] filenames = new String[ files.length ];
		List<String> calls = null;
		List<String> calls2 = null;
		//int size;
		Random rnd = new Random(2001L);
		List<File> list = Arrays.asList(files);
		Collections.shuffle(list, rnd);
		files = list.toArray(new File[0]);
		
		BufferedWriter out = null;
		try{ 
			out = new BufferedWriter( new FileWriter( outputfile ) );
			
			out.write( "lab;id" );
			calls = loadAllCalls();
			calls2 = loadCallsImpl();
			Collections.sort(calls);
			Collections.sort(calls2);

			//If the hierarchical variable "PATH" is used, we have to
			//translate file descriptors to filenames. In order to do so,
			//we have to log all calls that manipulate file descriptors in some way.
			//If the hierarchical variable "PATH" is used, logging these calls 
			//can't be switched off, they will be logged automatically.
			//Therefore, they do are removed from the set of attributes:
			//We don't want forward selection to waste time trying to
			//switch these attributes on and off when that has no effect at all.
			if( args.length > 2 && args[2].equals("PATH") ){
				calls.removeAll( SysParameter.getMinCalls() );				
			}
			if( args.length > 2 && args[2].equals("FEATURE") ){  //Reduced feature set for testing
				List<String> shortCalls = new ArrayList<String>();
				for( int i = 1; i < 8; i++ ) shortCalls.add( calls.get(i) );
				calls = shortCalls;				
			}
			if( args.length > 2 && args[2].equals("CLASS") ){  //Include only some applications
				//  ff  gy  ka  ne  rb tb  tp  vg  xe  xt
				selectClasses = true;				
				int pos = 3;
				while( args.length > pos )
					wanted.add( args[pos++] );
				System.out.println( "Wanted: " + wanted );				
			}
			
//			if( args.length > 2 && args[2].equals("SPLIT") ){  //Split in train and testset
//				//size = Integer.parseInt(args[3]);
//				Random rnd = new Random(2001L);
//				List<File> list = Arrays.asList(files);
//				Collections.shuffle(list, rnd);
//				files = list.toArray(new File[0]);
//			}

			
			
			int k = -1;
			for( String call : calls ){
				out.write( ";" + call );
				System.out.print(call + "  ");
				System.out.println(calls2.get(++k));
			}
			out.newLine();			

			int i = -1;
			for( File f : files ){
				filenames[++i] = f.getPath().replace("\\", "/");
				aLabel = f.getName().substring(0, 2);
				if( selectClasses && (! wanted.contains( aLabel ) ) ) continue;
				labels.add( aLabel );
				//if(f.getName().substring(2, 3).equals("9") ){//time
					out.write( aLabel + ";" + filenames[i] ); 
					for( int j = 0; j < calls.size(); j++ ) 
						out.write( ";1" );
					out.newLine();
				//}
			}
		}
		catch (java.io.FileNotFoundException e){ 
			System.out.print( e.getMessage() );			
		}
		catch (java.io.IOException e){			
			System.out.print( e.getMessage() );			
		}			

		try{		
			out.close();
		}
		catch (java.io.IOException e) { 
			System.out.println("CloseWrite: Error: " + e.getMessage()); 
		}

		System.out.println( "Scanned directory " + path + ": " );
		System.out.println( files.length + " files, " + labels.size() + " labels. ");
		System.out.println( calls.size() + " calls." );
		System.out.println( "Output written to " + outputfile + "." );
	}


	private static List<String> loadCallsImpl(){

		List<String> list = new ArrayList<String>();
		String line; 
		String file = "resources/callsImpl";		
		
		try {
			BufferedReader in = new BufferedReader( new FileReader(file) );
			while( (line = in.readLine()) != null ) {							
				if(  ! line.startsWith("#") ) {
					list.add( line.trim() );						
				}
			}			
		} catch( IOException e ) {
			throw new RuntimeException(e);
		}

		return list;
	}


	private static Taxonomy loadTaxonomy(){

		Taxonomy taxonomy = null;
		try{
			taxonomy  = new Taxonomy("Taxonomy3");
		}catch(Exception e) {}
		return taxonomy;	
	}

	
	private static List<String> loadAllCalls(){
		
		Taxonomy taxonomy = loadTaxonomy();

		List<String> allCalls = 
			new ArrayList<String>( taxonomy.getAllCalls() );	

		return allCalls;	
	}
	
}
