/*
 * MiningMart Version 1.1
 * 
 * Copyright (C) 2006 Martin Scholz, Timm Euler, 
 *                    Daniel Hakenjos, Katharina Morik
 *
 * Contact: miningmart@ls8.cs.uni-dortmund.de
 *
 * A list of contributing developers (other than the copyright 
 * holders) can be found at
 * http://mmart.cs.uni-dortmund.de/downloads/download.html
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program, see the file MM_HOME/LICENSE; if not, write
 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301, USA.
 */
package edu.udo.cs.miningmart.compiler.wrapper.dista.C45;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.PrintWriter;
import java.io.Writer;
import java.sql.ResultSet;
import java.util.Vector;

import edu.udo.cs.miningmart.operator.ConceptOperator;
import edu.udo.cs.miningmart.operator.ExecutableOperator;
import edu.udo.cs.miningmart.operator.FeatureConstruction;
import edu.udo.cs.miningmart.compiler.utils.DrawSample;
import edu.udo.cs.miningmart.compiler.wrapper.dista.ColumnInfo;
import edu.udo.cs.miningmart.compiler.wrapper.dista.DMException;
import edu.udo.cs.miningmart.compiler.wrapper.dista.Wrapper;

import edu.udo.cs.miningmart.compiler.SystemPropertyInterface;
import edu.udo.cs.miningmart.db.DB;
import edu.udo.cs.miningmart.exception.DbConnectionClosed;
import edu.udo.cs.miningmart.exception.M4CompilerError;
import edu.udo.cs.miningmart.exception.M4Exception;
import edu.udo.cs.miningmart.m4.Columnset;
import edu.udo.cs.miningmart.m4.Concept;
import edu.udo.cs.miningmart.m4.Step;

/**
 * Superclass for the wrappers that use C4.5
 * 
 * @author Alessandro Serra, Timm Euler
 * @version $Id: C45Wrapper.java,v 1.5 2006/09/27 14:59:54 euler Exp $
 */
public abstract class C45Wrapper extends Wrapper {
	
	protected String m_file_name;
	protected PrintWriter m_data;
	protected PrintWriter m_names;

	private String sampleTableName;

	public C45Wrapper(Step step, ExecutableOperator exOp) throws Exception {
		super(step, exOp);
	}

	public void checkMetaData() throws Exception {
		if(m_md.getColumnToLearn()==null) {
			throw new DMException("C45 is a supervised learning"+
				"algoritm.  It require a column to learn");
		}
		if(m_md.getColumnToLearn().getConceptType()!=
				ColumnInfo.CATEGORIAL &&
			m_md.getColumnToLearn().getConceptType()!=
				ColumnInfo.NOMINAL &&
			m_md.getColumnToLearn().getConceptType()!=
				ColumnInfo.BINARY) {
			throw new DMException("C45 can learn only a"+
					" CATEGORICAL/NOMINAL/BINARTY conceptual datatype");
		}
		int i;
		for(i=0; i<m_md.getColumnInfos().size(); ++i) {
			ColumnInfo ci=(ColumnInfo)
				m_md.getColumnInfos().elementAt(i);
			if(ci.getDataType()==ColumnInfo.STRING &&
				ci.getConceptType()!=ColumnInfo.NOMINAL &&
				ci.getConceptType()!=ColumnInfo.ORDINAL &&
				ci.getConceptType()!=ColumnInfo.BINARY &&
				ci.getConceptType()!=ColumnInfo.CATEGORIAL) {
				throw new DMException("C45 require that "+
					" the column "+ ci.getName()+
					" that is a string has a Nominal or "+
					" Categorial or Ordinal conceptual datatype");
			}
		}
	}
	
	protected String makeQuery() throws DMException, DbConnectionClosed, M4CompilerError
	{

		// *** Determine name of the current Columnset: ***
		final Columnset cs;
		{ 
			Columnset tmp = null;
			Step theStep = this.getStep();
			if (theStep != null) {
				ExecutableOperator op = this.getExecutableOperator();
				Concept inputCon = null;
				if (op != null && op instanceof FeatureConstruction) {
					inputCon = ((FeatureConstruction) op).getTheInputConcept();
				}
				else if (op != null && op instanceof ConceptOperator) {
					inputCon = ((ConceptOperator) op).getInputConcept();
				}
				
				if (inputCon != null) {
					try {
						tmp = (inputCon == null) ? null : (Columnset) inputCon.getCurrentColumnSet();
					}
   					catch (M4Exception m4e)
			   		{   throw new M4CompilerError("M4 interface error in C45Wrapper: " + m4e.getMessage());  } 
				}
			}
			cs = tmp;			
			
			if (cs == null) {
			  	throw new DMException(
			  		"Could not determine Columnset in class miningmart.compiler.wrapper.dista.C45Wrapper!"
			  	);
			}
		}

		// *** Sampling: ***
		final long stepId = this.getStep().getId();
		final String sampleTableName = "SAMPLE_" + stepId;
		this.setSampleTableName(sampleTableName);
		{
			String tempTableName = "TMP_" + stepId;
			try {
				new DrawSample(cs, null, sampleTableName, tempTableName, null, m_sample_size, null, this.getM4Db());
				this.getM4Db().addTableToTrash(this.getSampleTableName(), cs.getSchema(), stepId);
			}
			catch (M4CompilerError e) {
				throw new DMException(e.getMessage());
			}
		}

		// *** Construct the query based on the materialized sample: ***		
		final String select = "SELECT ";
		
		int numberOfColumns = m_md.getColumnInfos().size();
		String[] singleColumns = new String[numberOfColumns + 1];
		String columns = "";
		
		for (int i=0; i<numberOfColumns; ++i)
		{
			if (i != 0) {
				columns += ", ";
			}
			ColumnInfo ci = (ColumnInfo) m_md.getColumnInfos().elementAt(i);
			if (ci.getDataType() == ColumnInfo.DATE)
			{
				singleColumns[i] =
					// "m4_date_to_timestamp("+ci.getSqlName()+")";
					"m4_date_to_timestamp("+ci.getName()+")";
			} else if (ci.getDataType() == ColumnInfo.TIME)
			{
				singleColumns[i] =
					// "m4_date_to_timestamp("+ci.getSqlName()+")";
					"m4_date_to_timestamp(" + ci.getName() + ")";
			} else
			{
				// singleColumns[i] = ci.getSqlName();
				singleColumns[i] = ci.getName();
			}
			columns += singleColumns[i];
		}
		if (m_md.getColumnToLearn() != null)
		{
			// singleColumns[numberOfColumns] = m_md.getColumnToLearn().getSqlName();
			singleColumns[numberOfColumns] = m_md.getColumnToLearn().getName();
			columns += ", " + singleColumns[numberOfColumns];
		}
		
		String from =
			// " FROM " + m_md.getTableName() + " ";
			" FROM " + sampleTableName + " ";
			
		String where = "";
		if (m_md.getColumnToLearn() != null) {
			// where = "WHERE " + m_md.getColumnToLearn().getSqlName()+" is not null ";
			where = "WHERE " + m_md.getColumnToLearn().getName() + " is not null ";
		}
				
		String query = select + columns + from + where;
		return query.trim();
	}

	protected void getValues() throws Exception
	{
		for(int i=0; i<m_md.getColumnInfos().size(); ++i) {
			ColumnInfo ci=(ColumnInfo)
				m_md.getColumnInfos().elementAt(i);
			if(ci.getConceptType()==ColumnInfo.ORDINAL &&
				ci.getDataType()==ColumnInfo.STRING) {
				getValues(ci);
			}
		}
	}

	protected void getValues(ColumnInfo ci) throws Exception {
		String q =
			"select "
				+ ci.getSqlName()
				+ " from "
				+ m_md.getTableName()
				+ " order by  "
				+ ci.getSqlName();
		ResultSet res = null;
		try {
			res = this.getM4Db().executeBusinessSqlRead(q);
			while (res.next()) {
				ci.addValue(res.getString(1));
			}
		}
		finally {
			DB.closeResultSet(res);
		}
	}

	protected void exportTable() throws Exception {
		String temporaryDir = SystemPropertyInterface.getProperty(SystemPropertyInterface.NAME_MM_TEMP);
		m_file_name = temporaryDir + File.separator + "c45_"+ m_md.getTableName();
		Writer w= new BufferedWriter(new FileWriter(m_file_name+".data"));
		m_data=new PrintWriter(w);
		w= new BufferedWriter(new FileWriter(m_file_name+".names"));
		m_names=new PrintWriter(w);
		getValues();
		exportData();
		exportNames();
		m_data.close();
		m_data=null;
		m_names.close();
		m_names=null;
	}

	protected void exportData() throws Exception {
		ResultSet res = null;
		try {
			res = this.getM4Db().executeBusinessSqlRead(makeQuery());
			// ResultSetMetaData res_md=res.getMetaData();
			int col_num = m_md.getColumnInfos().size();
			while (res.next()) {
				int col_i;
				for (col_i = 1; col_i <= col_num; ++col_i) {
					if (col_i != 1)
						m_data.print(",");
					ColumnInfo ci =
						(ColumnInfo) m_md.getColumnInfos().elementAt(col_i - 1);
					exportValue(res, col_i, ci);
				}
				if (m_md.getColumnToLearn() != null) {
					m_data.print(",");
					exportValue(res, col_num + 1, m_md.getColumnToLearn());
				}
				else {
					m_data.print(",X");
				}
				m_data.print("\n");
			}
		}
		finally {
			DB.closeResultSet(res);
		}
	}
	
	protected void exportValue(ResultSet res, int col_i, ColumnInfo ci)
				throws Exception {
		switch(ci.getConceptType()) {
			case ColumnInfo.NUMERIC:
			case ColumnInfo.TIME:
			case ColumnInfo.SCALAR:
				switch(ci.getDataType()) {
					case ColumnInfo.STRING:
						// impossible
						break;
					case ColumnInfo.DATE:
						m_data.print(res.getLong(col_i));
						break;
					case ColumnInfo.TIME:
						m_data.print(res.getLong(col_i));
						break;
					case ColumnInfo.NUMBER:
						m_data.print(res.getDouble(col_i));
						break;
				}
				break;
			case ColumnInfo.ORDINAL:
				switch(ci.getDataType()) {
					case ColumnInfo.STRING:
						// sorted
						m_data.print(ci.getValueIndex(res.getString(col_i)));
						break;
					case ColumnInfo.DATE:
						m_data.print(res.getLong(col_i));
						break;
					case ColumnInfo.TIME:
						m_data.print(res.getLong(col_i));
						break;
					case ColumnInfo.NUMBER:
						m_data.print(res.getDouble(col_i));
						break;
				}
				break;
			case ColumnInfo.CATEGORIAL:
			case ColumnInfo.BINARY:
			case ColumnInfo.NOMINAL:
				switch(ci.getDataType()) {
					case ColumnInfo.STRING:
						m_data.print(ci.getValueIndex(res.getString(col_i)));
						break;
					case ColumnInfo.DATE:
						ci.addValue(res.getString(col_i));
						m_data.print(res.getString(col_i));
						break;
					case ColumnInfo.TIME:
						ci.addValue(res.getString(col_i));
						m_data.print(res.getString(col_i));
						break;
					case ColumnInfo.NUMBER:
						ci.addValue(res.getString(col_i));
						m_data.print(res.getString(col_i));
						break;
				}
				break;
		}
	}

	protected void exportNames() throws Exception {
		Vector values;
		int i;
		if(m_md.getColumnToLearn()!=null) {
			values=m_md.getColumnToLearn().getValues();
			for(i=0; i<values.size(); ++i) {
				if(i!=0) m_names.print(",");
				m_names.print(i);
			}
			m_names.print(".\n\n");
		} else {
			m_names.print("X,Y .\n\n");
		}
		for(i=0; i<m_md.getColumnInfos().size(); ++i) {
			ColumnInfo ci=(ColumnInfo)
				m_md.getColumnInfos().elementAt(i);
			m_names.print(ci.getName()+": ");
			switch(ci.getConceptType()) {
				case ColumnInfo.SCALAR:
				case ColumnInfo.NUMERIC:
				case ColumnInfo.TIME:
				case ColumnInfo.ORDINAL:
					m_names.print("continuous");
					break;
				case ColumnInfo.CATEGORIAL:
				case ColumnInfo.BINARY:
				case ColumnInfo.NOMINAL:
					values=ci.getValues();
					if(ci.getDataType()==
							ColumnInfo.STRING) {
						int j;
						for(j=0; j<values.size(); ++j) {
							if(j!=0)
								m_names.print(",");
							m_names.print(j);
						}
					} else {
						int j;
						for(j=0; j<values.size(); ++j) {
							if(j!=0)
								m_names.print(",");
							m_names.print(
								values.elementAt(j));
						}
					}
					break;
			}
			m_names.print(".\n");
		}
	}
	
	protected void clean() throws Exception {
		callScript("rm "+ m_file_name+".* "+ m_file_name+"_*");
	}

	protected void setSampleTableName(String sampleTableName) {
		this.sampleTableName = sampleTableName;		
	}
	
	protected String getSampleTableName() {
		return this.sampleTableName;
	}
	
};

/*
 * Historie
 * --------
 * 
 * $Log: C45Wrapper.java,v $
 * Revision 1.5  2006/09/27 14:59:54  euler
 * New version 1.1
 *
 * Revision 1.4  2006/04/11 14:10:09  euler
 * Updated license text.
 *
 * Revision 1.3  2006/04/06 16:31:09  euler
 * Prepended license remark.
 *
 * Revision 1.2  2006/03/30 16:07:12  scholz
 * fixed author tags for release
 *
 * Revision 1.1  2006/01/03 09:54:02  hakenjos
 * Initial version!
 *
 */
