/*
 * MiningMart Version 1.0
 * 
 * Copyright (C) 2006 Martin Scholz, Timm Euler, 
 *                    Daniel Hakenjos, Katharina Morik
 *
 * Contact: miningmart@ls8.cs.uni-dortmund.de
 *
 * A list of contributing developers (other than the copyright 
 * holders) can be found at
 * http://mmart.cs.uni-dortmund.de/downloads/download.html
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program, see the file MM_HOME/LICENSE; if not, write
 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301, USA.
 */
package edu.udo.cs.miningmart.operator;

import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Vector;

import edu.udo.cs.miningmart.db.DB;
import edu.udo.cs.miningmart.exception.M4CompilerError;
import edu.udo.cs.miningmart.exception.M4Exception;
import edu.udo.cs.miningmart.m4.BaseAttribute;
import edu.udo.cs.miningmart.m4.Concept;
import edu.udo.cs.miningmart.m4.EstimatedStatistics;
import edu.udo.cs.miningmart.m4.Feature;
import edu.udo.cs.miningmart.m4.Step;
import edu.udo.cs.miningmart.m4.utils.Print;

/**
 * @author Hanna Koepcke
 * @version $Id: EvaluateAdvantageOfTFIDFTransformation.java,v 1.6 2006/04/11 14:10:12 euler Exp $
 */
public class EvaluateAdvantageOfTFIDFTransformation extends ExecutableOperator {

	/* (non-Javadoc)
	 * @see miningmart.compiler.operator.Operator#createStatement(boolean)
	 */
	public void createStatement(boolean lazy)
		throws SQLException, M4CompilerError
	{
		String inputConcept, key, timeStamp, colNames;
		Feature[] features;
		try {
			inputConcept =
				this.getInputConcept().getCurrentColumnSet().getSQLDefinition();
			key = this.getTheKey().getCurrentColumn().getName();
			timeStamp = this.getTheTimeStamp().getCurrentColumn().getName();
			colNames =
				this.getTheKey().getCurrentColumn().getSQLDefinition() + ", ";

			features = this.getTheSelectedAttributes();
			for (int i = 0; i < features.length; i++) {
				BaseAttribute ba = (BaseAttribute) features[i];
				edu.udo.cs.miningmart.m4.Column col = ba.getCurrentColumn();
				colNames += col.getName() + ", ";
			}
		}
		catch (M4Exception e) {
			throw new M4CompilerError(
				"M4Exception during EvaluateAdvantageOfTFIDFTransformation.createStatement:\n"
				+ e.getMessage());	
		}
		
		colNames = colNames.substring(0, colNames.length() - 2);
		Hashtable indexes = new Hashtable();

		ResultSet ergebnis = null;
		try {
			String query =
				"SELECT "
					+ colNames
					+ " FROM "
					+ inputConcept
					+ " ORDER BY "
					+ key
					+ ", "
					+ timeStamp;
			ergebnis = this.getM4Db().executeBusinessSqlRead(query);
			double maximum_euklidian_length = 0;
			double maximum_frequency = 0;
			double euklidian_length = 0;
			while (ergebnis.next()) {
//				int identity = ergebnis.getInt(1);

				int id = ergebnis.getInt(key);
				if (!indexes.containsKey(new Integer(id))) {
					Vector liste = new Vector();
					String[] werte = new String[features.length];
					for (int i = 0; i < features.length; i++) {
						try {
							String colName =
								((BaseAttribute) features[i])
									.getCurrentColumn()
									.getName();
							werte[i] = ergebnis.getString(colName);
						}
						catch (M4Exception e) {
							throw new M4CompilerError(
							"M4Exception during EvaluateAdvantageOfTFIDFTransformation.createStatement:\n"
							+ e.getMessage());								
						}
					}
					liste.add(werte);
					indexes.put(new Integer(id), liste);
				} else {
					Vector werteListe = (Vector) indexes.get(new Integer(id));
					String[] werte = new String[features.length];
					for (int i = 0; i < features.length; i++) {
						try {
							String colName =
								((BaseAttribute) features[i])
									.getCurrentColumn()
									.getName();
							werte[i] = ergebnis.getString(colName);
						}
						catch (M4Exception e) {
							throw new M4CompilerError(
							"M4Exception during EvaluateAdvantageOfTFIDFTransformation.createStatement:\n"
							+ e.getMessage());								
						}
					}
					werteListe.add(werte);
				}

			}
			ergebnis.close();

			for (Enumeration e = indexes.keys(); e.hasMoreElements();) {
				Integer index = (Integer) e.nextElement();
				Vector aktuell = (Vector) indexes.get(index);
				int[] frequencies = new int[features.length];
				if (aktuell.size() > 1) {

					for (int i = 1; i < aktuell.size(); i++) {
						String[] currentValues = (String[]) aktuell.get(i);
						String[] previousValues = (String[]) aktuell.get(i - 1);
						for (int j = 0; j < features.length; j++) {
							if (!currentValues[j].equals(previousValues[j])) {
								frequencies[j]++;
							}
						}
					}

					double sum = 0;
					for (int i = 0; i < features.length; i++) {
						sum += Math.pow(frequencies[i], 2);
						if (frequencies[i] > maximum_frequency) {
							maximum_frequency = frequencies[i];
						}
					}

					euklidian_length = Math.sqrt(sum);
					if (euklidian_length > maximum_euklidian_length) {
						maximum_euklidian_length = euklidian_length;
					}
				}
			}

			int n = features.length;
			double nm =
				Math.sqrt(new Integer(n).doubleValue()) * maximum_frequency;
			if (maximum_euklidian_length < nm) {
				this.doPrint(
					Print.MAX,
					"The maximum Euklidian length is: "
						+ maximum_euklidian_length
						+ "\n"
						+ "The maximum frequency is: "
						+ maximum_frequency
						+ "\n"
						+ "As "
						+ maximum_euklidian_length
						+ " is < sqrt("
						+ n
						+ ") * "
						+ maximum_frequency
						+ " the transformation into TF/IDF features is worth a try!");
				System.out.println(
					"The maximum Euklidian length is: "
						+ maximum_euklidian_length
						+ "\n"
						+ "The maximum frequency is: "
						+ maximum_frequency
						+ "\n"
						+ "As "
						+ maximum_euklidian_length
						+ " is < sqrt("
						+ n
						+ ") * "
						+ maximum_frequency
						+ " the transformation into TF/IDF features is worth a try!");
			} else {
				this.doPrint(
					Print.MAX,
					"The maximum Euklidian length is: "
						+ maximum_euklidian_length
						+ "\n"
						+ "The maximum frequency is: "
						+ maximum_frequency
						+ "\n"
						+ "As "
						+ maximum_euklidian_length
						+ " is > sqrt("
						+ n
						+ ") * "
						+ maximum_frequency
						+ " the transformation into TF/IDF features has little chances of success!");
				System.out.println(
					"The maximum Euklidian length is: "
						+ maximum_euklidian_length
						+ "\n"
						+ "The maximum frequency is: "
						+ maximum_frequency
						+ "\n"
						+ "As "
						+ maximum_euklidian_length
						+ " is > sqrt("
						+ n
						+ ") * "
						+ maximum_frequency
						+ " the transformation into TF/IDF features has little chances of success!");
			}
		}
		catch (SQLException sqle) {
			throw new M4CompilerError(
				"EvaluateAdvantageOFTFIDFTransformation: an error occurred: "
					+ sqle.getMessage());
		}
		finally {
			DB.closeResultSet(ergebnis);
		}
	}

    /**
     * @see edu.udo.cs.miningmart.operator.ExecutableOperator#estimateStatistics(Step)
     */
    public EstimatedStatistics estimateStatistics(Step theStep) throws M4Exception {
    	return null;
    }
    
	/**
	 * @see edu.udo.cs.miningmart.operator.ExecutableOperator#compileStatement()
	 */
	public void compileStatement() throws SQLException, M4CompilerError {
	}

	/**
	 * @see edu.udo.cs.miningmart.operator.ExecutableOperator#writeResults()
	 */
	public void writeResults() throws SQLException, M4CompilerError {
	}

	/**
	 * Gets the input Concept.
	 * 
	 * @return Returns a Concept
	 */
	public Concept getInputConcept() throws M4CompilerError {
		return (Concept) this.getSingleParameter("TheInputConcept");
	}

	/**
	 * Parameter getter.
	 * 
	 * @returns BaseAttribute as specified by parameter "TheTimeStamp"
	 */
	public BaseAttribute getTheTimeStamp() throws M4CompilerError {
		return (BaseAttribute) this.getSingleParameter("TheTimeStamp");
	}

	/**
	 * Parameter getter.
	 * 
	 * @returns BaseAttribute as specified by parameter "TheKey"
	 */
	public BaseAttribute getTheKey() throws M4CompilerError {
		return (BaseAttribute) this.getSingleParameter("TheKey");
	}

	/**
	* Getter method for the parameter "TheSelectedAttributes". This parameter specifies
	* the set of features for which TFIDF should be computed.
	* 
	* @return an array of Feature objects
	*/
	protected Feature[] getTheSelectedAttributes() throws M4CompilerError {
		return (Feature[]) this.getParameter("TheSelectedAttributes");
	}

}
/*
 * Historie
 * --------
 *
 * $Log: EvaluateAdvantageOfTFIDFTransformation.java,v $
 * Revision 1.6  2006/04/11 14:10:12  euler
 * Updated license text.
 *
 * Revision 1.5  2006/04/06 16:31:11  euler
 * Prepended license remark.
 *
 * Revision 1.4  2006/03/30 16:07:13  scholz
 * fixed author tags for release
 *
 * Revision 1.3  2006/03/23 11:13:44  euler
 * Improved exception handling.
 *
 * Revision 1.2  2006/01/18 16:58:58  euler
 * Added some basic estimations of statistics.
 * Will need improvements.
 *
 * Revision 1.1  2006/01/03 09:54:22  hakenjos
 * Initial version!
 *
 */
