/*
 * MiningMart Version 1.0
 * 
 * Copyright (C) 2006 Martin Scholz, Timm Euler, 
 *                    Daniel Hakenjos, Katharina Morik
 *
 * Contact: miningmart@ls8.cs.uni-dortmund.de
 *
 * A list of contributing developers (other than the copyright 
 * holders) can be found at
 * http://mmart.cs.uni-dortmund.de/downloads/download.html
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program, see the file MM_HOME/LICENSE; if not, write
 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301, USA.
 */
package edu.udo.cs.miningmart.operator;

import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Vector;

import edu.udo.cs.miningmart.db.DB;
import edu.udo.cs.miningmart.exception.M4CompilerError;
import edu.udo.cs.miningmart.exception.M4Exception;
import edu.udo.cs.miningmart.m4.BaseAttribute;
import edu.udo.cs.miningmart.m4.Column;
import edu.udo.cs.miningmart.m4.Columnset;
import edu.udo.cs.miningmart.m4.Feature;

/**
 * @author Hanna Koepcke
 * @version $Id: FeatureConstructionWithTFIDF.java,v 1.6 2006/04/11 14:10:11 euler Exp $
 */
public class FeatureConstructionWithTFIDF extends SingleCSOperator {

	private String colNames, colNamesWithDefs;
	/* Names of the physical columns created by this operator: */
	private final String NEW_COLDT_NAME = "NUMBER";
	private final short NEW_COL_DT = 12;
	
	/**
	 * @see miningmart.compiler.operator.SingleCSOperator#getTypeOfNewColumnSet()
	 */
	public String getTypeOfNewColumnSet() {
		return Columnset.CS_TYPE_TABLE;
	}

	/**
	 * @see miningmart.compiler.operator.SingleCSOperator#generateSQLDefinition(String)
	 */
	public String generateSQLDefinition(String selectPart)
		throws M4CompilerError {

		String inputConcept;
		String key = "";
		String timeStamp = "";
		String columnSetName;
		Feature inF = null;
		Feature[] features = this.getTheSelectedAttributes();
		String[] columnNames = new String[features.length];

		try {
			this.makeListsOfColumns();
			columnSetName = this.getNewCSName();
			inputConcept =
				this.getInputConcept().getCurrentColumnSet().getName();
			Iterator it = this.getInputConcept().getFeatures().iterator();
			while (it.hasNext()) {
				inF = (Feature) it.next();
				if (inF.correspondsTo(this.getTheKey())) {

					key = ((BaseAttribute) inF).getCurrentColumn().getName();

				}
				if (inF.correspondsTo(this.getTheTimeStamp())) {

					timeStamp =
						((BaseAttribute) inF).getCurrentColumn().getName();

				}

				for (int i = 0; i < features.length; i++) {
					BaseAttribute ba = (BaseAttribute) features[i];
					if (inF.correspondsTo(ba)) {
						columnNames[i] =
							((BaseAttribute) inF).getCurrentColumn().getName();
					}

				}
			}

		} catch (M4Exception e) {
			throw new M4CompilerError(
				"FeatureConstructionWithTFIDF.generateSQLDefinition(String):\n"
					+ e.getMessage());
		}

		Hashtable indexes = new Hashtable();
		int total = 0;

		ResultSet ergebnis = null;
		try {
			String query =
				"SELECT "
					+ colNames
					+ " FROM "
					+ inputConcept
					+ " ORDER BY "
					+ key
					+ ", "
					+ timeStamp;
			System.out.println(query);
			ergebnis = this.getM4Db().executeBusinessSqlRead(query);

			while (ergebnis.next()) {
				//				int identity = ergebnis.getInt(1);

				int id = ergebnis.getInt(key);
				if (!indexes.containsKey(new Integer(id))) {
					Vector liste = new Vector();
					String[] werte = new String[features.length];
					for (int i = 0; i < features.length; i++) {
						try {

							werte[i] = ergebnis.getString(columnNames[i]);

						} catch (SQLException e) {
							throw new M4CompilerError(
								"FeatureConstructionWithTFIDF.generateSQLDefinition(String):\n"
									+ e.getMessage());
						}
					}
					liste.addElement(werte);
					indexes.put(new Integer(id), liste);
				} else {
					Vector werteListe = (Vector) indexes.get(new Integer(id));

					String[] werte = new String[features.length];
					for (int i = 0; i < features.length; i++) {
						try {

							werte[i] = ergebnis.getString(columnNames[i]);

						} catch (SQLException e) {
							throw new M4CompilerError(
								"FeatureConstructionWithTFIDF.generateSQLDefinition(String):\n"
									+ e.getMessage());
						}
					}
					werteListe.addElement(werte);
				}

			}
			ergebnis.close();
		} catch (SQLException sqle) {
			throw new M4CompilerError(
				"EvaluateAdvantageOFTFIDFTransformation: An error occurred: "
					+ sqle.getMessage());
		} finally {
			DB.closeResultSet(ergebnis);
		}

		Hashtable tfidf = new Hashtable();
		int[] df = new int[features.length];

		for (Enumeration e = indexes.keys(); e.hasMoreElements();) {
			Integer index = (Integer) e.nextElement();
			Vector aktuell = (Vector) indexes.get(index);

			int[] frequencies = new int[features.length];
			tfidf.put(index, frequencies);
			if (aktuell.size() > 1) {

				for (int i = 1; i < aktuell.size(); i++) {
					String[] currentValues = (String[]) aktuell.get(i);
					String[] previousValues = (String[]) aktuell.get(i - 1);
					for (int j = 0; j < features.length; j++) {

						if (currentValues[j] != null) {

							if (!currentValues[j].equals(previousValues[j])) {

								frequencies[j] = frequencies[j] + 1;
							}
						} else {
							if (previousValues[j] != null) {
								frequencies[j] = frequencies[j] + 1;
							}
						}
					}
				}

				for (int k = 0; k < features.length; k++) {

					if (frequencies[k] > 0) {
						df[k]++;
					}
				}

			}
		}

		try {
			this.createTable(columnSetName, this.colNamesWithDefs);
		} catch (SQLException sqle) {
			throw new M4CompilerError(
				"FeatureConstructionByTFIDF: could not create table: "
					+ sqle.getMessage());
		}

		try {
			total =
				this
					.getM4Db()
					.executeBusinessSingleValueSqlReadL(
						"SELECT COUNT(DISTINCT "
							+ key
							+ ") FROM "
							+ inputConcept)
					.intValue();
		} catch (SQLException sqle) {
			throw new M4CompilerError(
				"FeatureConstructionByTFIDF: An errror occurred: "
					+ sqle.getMessage());
		}

		for (Enumeration en = tfidf.keys(); en.hasMoreElements();) {
			Integer aktuelleId = (Integer) en.nextElement();
			double[] tfidfValues = new double[features.length];
			int[] frequencyValues = (int[]) tfidf.get(aktuelleId);
			String insertValues = "(" + aktuelleId.intValue() + ", ";
			for (int i = 0; i < frequencyValues.length; i++) {
				tfidfValues[i] = frequencyValues[i] * Math.log(total / df[i]);
				insertValues += tfidfValues[i] + ", ";

			}
			insertValues = insertValues.substring(0, insertValues.length() - 2);
			insertValues += ")";
			String updateTable =
				"INSERT INTO " + columnSetName + " VALUES " + insertValues;
			try {
				this.getM4Db().executeBusinessSqlWrite(updateTable);

			} catch (SQLException sqle) {
				throw new M4CompilerError(
					"FeatureConstructionByTFIDF: could not insert values: "
						+ sqle.getMessage());
			}

		}

		try {

			this.getM4Db().commitBusinessTransactions();

		} catch (SQLException sqle) {
			throw new M4CompilerError(
				"FeatureConstructionByTFIDF: could not commit to business: "
					+ sqle.getMessage());
		}

		try {
			this.getM4Db().addTableToTrash(
				this.getNewCSName(),
				this.getInputConcept().getCurrentColumnSet().getSchema(),
				this.getStep().getId());
		} catch (M4Exception e) {
			throw new M4CompilerError(
				"FeatureConstructionWithTFIDF.generateSQLDefinition(String):\n"
					+ e.getMessage());
		}

		try {
			this.getM4Db().commitM4Transactions();
		} catch (SQLException sqle) {
			throw new M4CompilerError(
				"FeatureConstructionWithTFIDF: could not commit to M4: "
					+ sqle.getMessage());
		}

		return columnSetName;
	}

	/**
	 * @see miningmart.compiler.operator.ConceptOperator#mustCopyFeature(String)
	 */
	protected boolean mustCopyFeature(String nameOfFeature)	throws M4CompilerError {
		return false;
	}

	/**
	 * This method overrides the method in <code>ConceptOperator</code>.
	 * 
	 * @see miningmart.operator.ConceptOperator#generateColumns(Columnset)
	 */
	protected String generateColumns(Columnset csForOutputConcept)
		throws M4CompilerError {
		Column keyColumn = null;
		Feature inF = null;
		// BaseAttribute inB = null;
		BaseAttribute outB = null;
		String columnExpr = "";
		try {

			Iterator it = this.getInputConcept().getFeatures().iterator();
			while (it.hasNext()) {
				inF = (Feature) it.next();
				if (inF.correspondsTo(this.getTheKey())) {

					keyColumn = 
						((BaseAttribute) inF).getCurrentColumn().copyColToCS(
							csForOutputConcept);
					columnExpr += keyColumn.getSQLDefinition() + ", ";
				}
				Feature[] selectedFeatures = this.getTheSelectedAttributes();
				for (int i = 0; i < selectedFeatures.length; i++) {
					BaseAttribute ba = (BaseAttribute) selectedFeatures[i];
					if (inF.correspondsTo(ba)) {
						int iIn = 0;
						do {
							outB =
								(BaseAttribute) getOutputConcept().getFeature(
									iIn);
							iIn++;
						} while (
							(iIn < getOutputConcept().getNumberOfFeatures())
								&& (!inF.correspondsTo(outB)));

						Column col =
							this.createM4Column(
								outB,
								csForOutputConcept,
								((BaseAttribute) inF)
									.getCurrentColumn()
									.getName()
									+ "_tfidf");
						columnExpr += col.getName() + ", ";
					}

				}
			}
		} catch (M4Exception e) {
			throw new M4CompilerError(
				"FeatureConstructionWithTFIDF.generateColumns(Columnset):\n"
					+ e.getMessage());
		}

		if (columnExpr.equals("")) {
			throw new M4CompilerError(
				"ConceptOperator ("
					+ this.getName()
					+ "): No columns for output concept created!");
		}

		columnExpr = columnExpr.substring(0, columnExpr.length() - 2);
		return columnExpr;
	}

	/**
	 * Method to create new M4 column objects.
	 * 
	 * @param outputBA the base attribute of the output concept the
	 *        new column should belong to
	 * @param newCS the new ColumnSet created by the operator
	 * @param newColumnName the name of the new column
	 * @return the new Column
	*/
	protected Column createM4Column(
		BaseAttribute outputBA,
		Columnset newCS,
		String newColumnName)
		throws M4CompilerError, M4Exception {
		Column newColumn = new edu.udo.cs.miningmart.m4.core.Column(this.getM4Db());
		newColumn.setId(0);
		newColumn.setName(newColumnName);
		newColumn.setColumnset(newCS);
		newColumn.setBaseAttribute(outputBA);
		newColumn.setColumnDataType(NEW_COL_DT);
		newColumn.setColumnDataTypeName(NEW_COLDT_NAME);
		newColumn.setSQLDefinition(newColumnName);
		outputBA.addColumn(newColumn);
		newCS.addColumn(newColumn);
		return newColumn;
	}

	private void makeListsOfColumns() throws M4CompilerError {
		this.colNames = "";
		this.colNamesWithDefs = "";
		// Column keyColumn = null;
		Feature inF = null;
		// BaseAttribute inB = null;
		// BaseAttribute outB = null;
		// String columnExpr = "";
		try {

			Iterator it = this.getInputConcept().getFeatures().iterator();
			while (it.hasNext()) {
				inF = (Feature) it.next();
				if (inF.correspondsTo(this.getTheKey())) {

					colNames
						+= ((BaseAttribute) inF)
							.getCurrentColumn()
							.getSQLDefinition()
						+ ", ";
					colNamesWithDefs
						+= ((BaseAttribute) inF)
							.getCurrentColumn()
							.getSQLDefinition()
						+ " "
						+ ((BaseAttribute) inF)
							.getCurrentColumn()
							.getColumnDataTypeName()
						+ ", ";
				}
				Feature[] selectedFeatures = this.getTheSelectedAttributes();
				for (int i = 0; i < selectedFeatures.length; i++) {
					BaseAttribute ba = (BaseAttribute) selectedFeatures[i];
					if (inF.correspondsTo(ba)) {

						this.colNames
							+= ((BaseAttribute) inF).getCurrentColumn().getName()
							+ ", ";
						this.colNamesWithDefs
							+= ((BaseAttribute) inF).getCurrentColumn().getName()
							+ "_tfidf "
							+ this.NEW_COLDT_NAME
							+ ", ";
					}

				}
			}

		} catch (M4Exception e) {
			throw new M4CompilerError(
				"FeatureConstructionWithTFIDF.generateColumns(Columnset):\n"
					+ e.getMessage());
		}

		this.colNames = this.colNames.substring(0, this.colNames.length() - 2);
		this.colNamesWithDefs =
			this.colNamesWithDefs.substring(
				0,
				this.colNamesWithDefs.length() - 2);
	}

	private void createTable(String tableName, String columnDefs)
		throws SQLException, M4CompilerError {
		/* First of all: Drop table, if it already exists: */
		try {
			this.getM4Db().dropBusinessTable(tableName);
		}
		catch (M4Exception m4e) {
			throw new M4CompilerError(m4e.getMessage());
		}

		/* Then create the new table: */
		String cmd = "CREATE TABLE " + tableName + " (" + columnDefs + ")";
		this.getM4Db().executeBusinessSqlWrite(cmd);
	}

	/**
	* Getter method for the parameter "TheSelectedAttributes". This parameter specifies
	* the set of features for which TFIDF should be computed.
	* 
	* @return an array of Feature objects
	*/
	protected Feature[] getTheSelectedAttributes() throws M4CompilerError {
		return (Feature[]) this.getParameter("TheSelectedAttributes");
	}

	/**
	 * Parameter getter.
	 * 
	 * @returns BaseAttribute as specified by parameter "TheTimeStamp"
	 */
	public BaseAttribute getTheTimeStamp() throws M4CompilerError {
		return (BaseAttribute) this.getSingleParameter("TheTimeStamp");
	}

	/**
	 * Parameter getter.
	 * 
	 * @returns BaseAttribute as specified by parameter "TheKey"
	 */
	public BaseAttribute getTheKey() throws M4CompilerError {
		return (BaseAttribute) this.getSingleParameter("TheKey");
	}

}
/*
 * Historie
 * --------
 *
 * $Log: FeatureConstructionWithTFIDF.java,v $
 * Revision 1.6  2006/04/11 14:10:11  euler
 * Updated license text.
 *
 * Revision 1.5  2006/04/06 16:31:11  euler
 * Prepended license remark.
 *
 * Revision 1.4  2006/03/30 16:07:12  scholz
 * fixed author tags for release
 *
 * Revision 1.3  2006/03/29 09:50:47  euler
 * Added installation robustness.
 *
 * Revision 1.2  2006/03/23 11:13:45  euler
 * Improved exception handling.
 *
 * Revision 1.1  2006/01/03 09:54:21  hakenjos
 * Initial version!
 *
 */
