/*
 * MiningMart Version 1.0
 * 
 * Copyright (C) 2006 Martin Scholz, Timm Euler, 
 *                    Daniel Hakenjos, Katharina Morik
 *
 * Contact: miningmart@ls8.cs.uni-dortmund.de
 *
 * A list of contributing developers (other than the copyright 
 * holders) can be found at
 * http://mmart.cs.uni-dortmund.de/downloads/download.html
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program, see the file MM_HOME/LICENSE; if not, write
 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301, USA.
 */
package edu.udo.cs.miningmart.operator;

import java.sql.SQLException;
import java.util.Iterator;
import java.util.StringTokenizer;
import java.util.Vector;

import edu.udo.cs.miningmart.exception.M4CompilerError;
import edu.udo.cs.miningmart.exception.M4Exception;
import edu.udo.cs.miningmart.m4.BaseAttribute;
import edu.udo.cs.miningmart.m4.Column;
import edu.udo.cs.miningmart.m4.Columnset;
import edu.udo.cs.miningmart.m4.Concept;
import edu.udo.cs.miningmart.m4.Feature;
import edu.udo.cs.miningmart.m4.MultiColumnFeature;
import edu.udo.cs.miningmart.m4.RelationalDatatypes;
import edu.udo.cs.miningmart.m4.Value;
import edu.udo.cs.miningmart.m4.utils.Print;

/**
 * Creates a table in the business data schema with the
 * statistics values in them.
 * 
 * @author Timm Euler
 * @version $Id: SpecifiedStatistics.java,v 1.5 2006/04/11 14:10:11 euler Exp $
 */
public class SpecifiedStatistics extends SingleCSOperator {

	public static final String SUM_SUFFIX    = "_SUM";
	public static final String COUNT_SUFFIX  = "_COUNT";
	public static final String UNIQUE_SUFFIX = "_UNIQUE";
	public static final String MIN_SUFFIX    = "_MIN";
	public static final String MAX_SUFFIX    = "_MAX";
	public static final String AVG_SUFFIX    = "_AVG";

	public static final String PARAMETER_ATTR_SUM     = "AttributesComputeSum";
	public static final String PARAMETER_ATTR_GROUPBY = "GroupBy";
	public static final String PARAMETER_ATTR_AVG     = "AttributesComputeAvg";
	public static final String PARAMETER_ATTR_MIN     = "AttributesComputeMin";
	public static final String PARAMETER_ATTR_MAX     = "AttributesComputeMax";
	public static final String PARAMETER_ATTR_COUNT   = "AttributesComputeCount";
	public static final String PARAMETER_ATTR_UNIQUE  = "AttributesComputeUnique";
	public static final String PARAMETER_ATTR_DISTRIB = "AttributesComputeDistrib";
	public static final String PARAMETER_DISTRIB_VAL  = "DistribValues";
	
	private final Vector columnNames = new Vector();
	private final Vector columnTypes = new Vector();
	private final Vector sqlDefs = new Vector();
	
	/**
	 * @see edu.udo.cs.miningmart.m4.core.operator.SingleCSOperator#getTypeOfNewColumnSet()
	 */
	public String getTypeOfNewColumnSet() {
		return Columnset.CS_TYPE_TABLE;
	}

	/**
	 * Overrides the method in ConceptOperator.java, because the relationship
	 * between features of TheOutputConcept and TheInputConcept is more complex.
	 */
    protected String generateColumns(Columnset csForOutputConcept) throws M4CompilerError
    {
    	try {
		String columnExpr = ""; // to be returned
		Feature inF = null;
		int iIn = 0;
		Feature outF = null;
		BaseAttribute inBA, outBA;
		MultiColumnFeature inMCF, outMCF;
		Iterator theBAs;
		Column inputColumn, outputColumn;

		// loop through features of output concept, these are the selected features
		Iterator it = this.getOutputConcept().getFeatures().iterator();
		while (it.hasNext())
		{
			outF = (Feature) it.next();
			if (mustCopyFeature(outF.getName()))
			{
				iIn = 0;
				// find input feature with same name:
				do {
					inF = getInputConcept().getFeature(iIn);
					iIn++;
				}
				while ((iIn < getInputConcept().getNumberOfFeatures())
					&& (!this.correspondsTo(outF, inF)));

				if (!this.correspondsTo(outF, inF))
				{
					this.doPrint(
						Print.OPERATOR,
						"Output Concept '"
							+ getOutputConcept().getName()
							+ "': skipped feature '"
							+ outF.getName()
							+ "' because no corresponding input feature was found.");
					continue;
				}

				if (this.isDeselectedParameter(inF))
				{
					this.doPrint(
						Print.PARAM,
						"Output Concept '"
							+ getOutputConcept().getName()
							+ "': skipped feature '"
							+ outF.getName()
							+ "' because the corresponding input feature was deselected by "
							+ "a FeatureSelection operator.");
					continue;
				}

				// copy metadata for column to output columnset
				if (outF instanceof BaseAttribute)
				{
					inBA = (BaseAttribute) inF;
					outBA = (BaseAttribute) outF;
					inputColumn = inBA.getCurrentColumn();
					outputColumn = inputColumn.copyColToCS(csForOutputConcept);
					this.getStep().addToTrash(outputColumn);
					outputColumn.setBaseAttribute(outBA);
					// outBA.addColumn(outputColumn);
					outputColumn.setSQLDefinition(outBA.getName());
					outputColumn.setName(outBA.getName());
				}
				else
				{
					if (!(outF instanceof MultiColumnFeature))
					{
						throw new M4CompilerError(
							"Unknown Feature type found in Concept with id: "
								+ getOutputConcept().getId()
								+ "; Feature id: "
								+ outF.getId());
					}

					// copy metadata for each BaseAttribute in this MultiColumnFeature
					outMCF = (MultiColumnFeature) outF;
					inMCF = (MultiColumnFeature) inF;
					theBAs = outMCF.getBaseAttributes().iterator();
					if (theBAs != null)
					{
						try
						{
							while (theBAs.hasNext())
							{
								outBA = (BaseAttribute) theBAs.next();
								inBA = inMCF.getBaseAttributeByName(outBA.getName());
								inputColumn = inBA.getCurrentColumn();
								outputColumn = inputColumn.copyColToCS(csForOutputConcept);
								outputColumn.setBaseAttribute(outBA);
								// outBA.addColumn(outputColumn);
								outputColumn.setSQLDefinition(outBA.getName());
								outputColumn.setName(outBA.getName());
							}
						}
						catch (NullPointerException nfe)
						{
							throw new M4CompilerError("ConceptOperator: Mismatch between MultiColumnFeatures in in- and output concept!");
						}
						catch (ArrayIndexOutOfBoundsException aie)
						{
							throw new M4CompilerError("ConceptOperator: Mismatch between MultiColumnFeatures in in- and output concept!");
						}
					}
				} // end else to if (feature is BA)
			} // end if (mustCopyFeature)
		} // end for (loop through output features)

		return columnExpr; // returns "" as this operator creates a new table anyway
    	}
   		catch (M4Exception m4e)
   		{   throw new M4CompilerError("M4 interface error in " + this.getName() + ": " + m4e.getMessage());  } 
    } // end protected String generateColumns

	// This method returns TRUE if the two features are of same type and
	// if their names are equal, where the operator-specific suffixes
	// "_SUM", "_UNIQUE" and "_COUNT" may be part of Feature out, but
	// not of Feature in.
	private boolean correspondsTo(Feature out, Feature in)  
	throws M4CompilerError {		
		if (out == null)
		{    return (false);  }
		
		String outName = out.getName().toUpperCase();
		String inName = in.getName().toUpperCase();
		
		if (outName.endsWith(SUM_SUFFIX))  
		{  outName = outName.substring(0, outName.length() - SUM_SUFFIX.length());  }
		if (outName.endsWith(COUNT_SUFFIX))
		{  outName = outName.substring(0, outName.length() - COUNT_SUFFIX.length());  }
		if (outName.endsWith(UNIQUE_SUFFIX))  
		{  outName = outName.substring(0, outName.length() - UNIQUE_SUFFIX.length());  }
		if (outName.endsWith(MIN_SUFFIX))  
		{  outName = outName.substring(0, outName.length() - MIN_SUFFIX.length());  }
		if (outName.endsWith(MAX_SUFFIX))  
		{  outName = outName.substring(0, outName.length() - MAX_SUFFIX.length());  }
		if (outName.endsWith(AVG_SUFFIX))  
		{  outName = outName.substring(0, outName.length() - AVG_SUFFIX.length());  }
		
		Value[] theDistribValues = (Value[]) this.getParameter(PARAMETER_DISTRIB_VAL);
		String[] vals;
		if (theDistribValues != null) {
			for (int i = 0; i < theDistribValues.length; i++) {
				vals = getSingleValues(theDistribValues[i]);
				for (int j = 0; j < vals.length; j++) {
					if (outName.endsWith("_" + vals[j].toUpperCase()))
					{  outName = outName.substring(0, outName.length() - vals[j].length() - 1);  }
				}
			}
		}		
	    return ( out.getClass() == in.getClass() &&
	    	 	 outName.equalsIgnoreCase(inName) );
	} // end private boolean correspondsTo

	/**
	 * @see edu.udo.cs.miningmart.m4.core.operator.SingleCSOperator#generateSQLDefinition(String)
	 */
	public String generateSQLDefinition(String selectPart)
		throws M4CompilerError 
	{
		// ignore selectPart, which is only needed for views,
		// but this operator creates a table
		
		// computation may take some time:
		this.doPrint(Print.OPERATOR, "Operator SpecifiedStatistics is computing...");

		// gets the GROUP BY expression for later usage and adds the attributes
		// to the Vectors attributeNames and sqlDefs:
		final String groupBy = this.getGroupByExpr();
		
		// compute all sums:
		this.getStatisticsSQLFor(PARAMETER_ATTR_SUM, "SUM(", SUM_SUFFIX);

		// compute all counts:
		this.getStatisticsSQLFor(PARAMETER_ATTR_COUNT, "COUNT(", COUNT_SUFFIX);

		// compute all numbers of distinct values:
		this.getStatisticsSQLFor(PARAMETER_ATTR_UNIQUE, "COUNT(DISTINCT ", UNIQUE_SUFFIX);

		// compute all mininum values:
		this.getStatisticsSQLFor(PARAMETER_ATTR_MIN, "MIN(", MIN_SUFFIX);
		
		// compute all maximum values:
		this.getStatisticsSQLFor(PARAMETER_ATTR_MAX, "MAX(", MAX_SUFFIX);
		
		// compute all maximum values:
		this.getStatisticsSQLFor(PARAMETER_ATTR_AVG, "AVG(", AVG_SUFFIX);

		// get the distribution counts:
		this.getStatisticsSQLFor(PARAMETER_ATTR_DISTRIB, null, null);
		
		// read the target table name:
		final String tableName = this.getNewCSName();
	
	
		// * Prepares the SQL statement for inserting all statistics expect for distributions *
		final String inserts;
		{
			StringBuffer query = new StringBuffer("INSERT INTO " + tableName + " ( ");
			Iterator it = this.columnNames.iterator();
			while (it.hasNext()) {
				query.append((String) it.next());
				if (it.hasNext())
					query.append(", ");
				else query.append(" ) ( SELECT ");
			}

			it = this.sqlDefs.iterator();
			while (it.hasNext()) {
				String sqlDef = (String) it.next();
				query.append("(" + sqlDef + ")");
				if (it.hasNext())
					query.append(", ");
				else {
					Columnset inCs;
					try {
						inCs = this.getInputConcept().getCurrentColumnSet();
					}
					catch (M4Exception e) {
						throw new M4CompilerError(
							"Specified Statistics: Could not get current Columnset of Concept '"
							+ this.getInputConcept().getName() + "'!");	
					}
					String inSqlFrom = " FROM " + inCs.getSchemaPlusName();
					query.append(inSqlFrom + groupBy + " )");
				}
			}
			
			inserts = query.toString();
		}

		// Prepares the distribution statistics:
		// String dvals = this.computeDistributions();

		// Create the target table with all the necessary attributes:
		createTable(tableName);
		
		// Insert all statistics expect for the distribution information:		
		try {
			this.executeBusinessSqlWrite(inserts);
		}
		catch (SQLException e) {
			throw new M4CompilerError("SQLException for M4 write statement: " + inserts);
		}

		/*
		// Add the distribution information.
		// Attention: We add the distribution values to ALL tuples,
		// so we ignore a possible GROUP BY statement at the moment!
		if (dvals != null) {
			String insert = "UPDATE " + tableName + " SET " + dvals;
			try {
				this.getM4Db().executeBusinessSqlWrite(insert);
			}
			catch (SQLException e) {
				throw new M4CompilerError("SQLException for business data write statement: " + insert);
			}
		}
		*/
		
		this.doPrint(Print.OPERATOR, "Operator SpecifiedStatistics has finished.");
		
		// name of table is its SQL definition:
		return this.getNewCSName(); 
	}

	/* 
	 * Returns the group by expression if corresponding attributes have been defined
	 * or returns an empty String. The names of the attributes and their SQL definitions
	 * are also added to the Vectors.
	 */
	private String getGroupByExpr()
		throws M4CompilerError
	{
		BaseAttribute[] theBAs = (BaseAttribute[]) this.getParameter(PARAMETER_ATTR_GROUPBY);
		StringBuffer buf = new StringBuffer();
		if (theBAs != null && theBAs.length >= 0) {
		 L: for (int i=0; i<theBAs.length; i++) {
				BaseAttribute curBa = theBAs[i];
				if (curBa != null) {
					String name = curBa.getName();
					String sql = this.getColumnSQLForBaseAttribute(curBa);
					buf.append(sql + ", ");
					this.addColumnToTable(name);
					this.sqlDefs.add(sql);
				}
			}
		}
		if (buf.length() > 2) {
			return " GROUP BY " + buf.substring(0, buf.length() - 2);
		} else return "";
	}

	/*
	 * Adds the attribute names to the Vector, the SQL definitions to another one.
	 * The operator contains the calculation, e.g. "SUM(", one closing bracket is
	 * assumed after the attribute.
	 */
	private void getStatisticsSQLFor(String m4ParameterName, String operator, String attributeSuffix)
		throws M4CompilerError
	{
		BaseAttribute[] theBAs = (BaseAttribute[]) this.getParameter(m4ParameterName);
		if (theBAs == null)
			return;
		
		if (operator != null) {
			for (int i = 0; i < theBAs.length; i++) {
				BaseAttribute curBa = theBAs[i];
				if (curBa != null) {
					String name = curBa.getName() + attributeSuffix;
					String sql  = operator + this.getColumnSQLForBaseAttribute(curBa) + ")";				
					this.addColumnToTable(name);
					this.sqlDefs.add(sql);
				}
			}
		}
		else {
			// special case: distributions!
			Value[] theDistribValues = (Value[]) this.getParameter(PARAMETER_DISTRIB_VAL);
			
			if (
			      ((theBAs != null) && (theBAs.length > 0) && (theDistribValues == null))
			   	  ||
			      ((theBAs != null) && (theDistribValues != null) && (theBAs.length != theDistribValues.length))
			   )
			{ throw new M4CompilerError("Operator SpecifiedStatistics: expected as many parameters '" + 
					PARAMETER_DISTRIB_VAL + "' as '" + PARAMETER_ATTR_DISTRIB + "'!"); }
					
			String[] vals;
			if (theBAs != null) {
				for (int i = 0; i < theBAs.length; i++)	{
					vals = getSingleValues(theDistribValues[i]);
					boolean useQuotesForValues = this.needsQuotes(theBAs[i]);
					for (int j = 0; j < vals.length; j++) {
					//		String count = computeDistCount(theBAs[i], vals[j]);
						String name = theBAs[i].getName() + "_" + vals[j];
						this.addColumnToTable(name);
						String quotedValue = (useQuotesForValues ? "'" + vals[j] + "'" : vals[j]);
						String sql = "SUM(CASE WHEN " + this.getColumnSQLForBaseAttribute(theBAs[i]) + 
									 " = " + quotedValue + " THEN 1 ELSE 0 END)";
						this.sqlDefs.add(sql);
					}					
				}
			}
		}
	}

	// returns true if the values of the given BaseAttribute should be
	// quoted in SQL
	private boolean needsQuotes(BaseAttribute ba) throws M4CompilerError {
		if (ba == null) {
			return false;
		}
		try {
			Column c = ba.getCurrentColumn();
			return (c.getColumnDataTypeName().equalsIgnoreCase(RelationalDatatypes.RELATIONAL_DATATYPE_STRING));
		}
		catch (M4Exception m4e) {
			throw new M4CompilerError("Operator SpecifiedStatistics: could not access column data type of BaseAttribute '" +
					ba.getName() + "'!");			
		}
	}
	
	/* Helper method calculating the distribution. */
	private String computeDistributions() throws M4CompilerError {
		BaseAttribute[] theBAs = (BaseAttribute[]) this.getParameter(PARAMETER_ATTR_DISTRIB);
		Value[] theDistribValues = (Value[]) this.getParameter(PARAMETER_DISTRIB_VAL);
	
		if (
		      ((theBAs != null) && (theBAs.length > 0) && (theDistribValues == null))
	    	  ||
		      ((theBAs != null) && (theDistribValues != null) && (theBAs.length != theDistribValues.length))
		   )
		{ throw new M4CompilerError("Operator SpecifiedStatistics: expected as many parameters '" + 
				PARAMETER_DISTRIB_VAL + "' as '" + PARAMETER_ATTR_DISTRIB + "'!"); }
				
		String[] vals;
		if (theBAs != null) {
			StringBuffer distrib = new StringBuffer();
			for (int i = 0; i < theBAs.length; i++)
			{
				vals = getSingleValues(theDistribValues[i]);
				for (int j = 0; j < vals.length; j++)
				{
					String count = computeDistCount(theBAs[i], vals[j]);
					String name = theBAs[i].getName() + "_" + vals[j];
					this.addColumnToTable(name);
					distrib.append(name + "=" + count + ", ");
				}
				if (distrib.length() > 2)
					return distrib.substring(0, distrib.length() - 2);
			}					
		}
		return null;
	}

	/* Helper method: returns the BaseAttribute's Column or throws an Exception */
	private Column getColumnForBaseAttribute(BaseAttribute ba) throws M4CompilerError {
		Column column;
		try {
			column = ba.getCurrentColumn();
		}
		catch (M4Exception e) {
			throw new M4CompilerError(
				"SpecifiedStatistics: Exception when trying to get current column for"
				+ " BaseAttribute '" + ba.getName() + "'!");	
		}
		return column;
	}
	
	/* 
	 * Helper method: returns the SQL definition of the BaseAttribute's Column
	 * or throws an Exception
	 */
	private String getColumnSQLForBaseAttribute(BaseAttribute ba) throws M4CompilerError {
		Column column = this.getColumnForBaseAttribute(ba);
		String sql;	
		if (column == null || (sql = column.getSQLDefinition()) == null) {
			throw new M4CompilerError(
				"SpecifiedStatistics: Found <null> Column for BaseAttribute that is NOT deselected!"
				+ "\nBaseAttribute name is " + ba.getName());
		}
		return sql;
	}

	/**
	 * @see edu.udo.cs.miningmart.m4.core.operator.ConceptOperator#mustCopyFeature(String)
	 */
	protected boolean mustCopyFeature(String nameOfFeature) {
		return true;
	}

	/* This method is called once for each attribute to be part of the target table. */
	private void addColumnToTable(String columnName) throws M4CompilerError
	{
		if (columnName == null)
			return;
			
		try {
			// check that a BA with name == columnName 
			// exists in TheOutputConcept:
			Concept outC = this.getOutputConcept();
			
			Iterator it = outC.getFeatures().iterator();
			boolean found = false;
			String columnType = null;
			while (it.hasNext()) {   
				Feature f = (Feature) it.next();
				if (f.getName().equalsIgnoreCase(columnName)) {
					if ( ! (f instanceof BaseAttribute)) {
						throw new M4CompilerError("Operator SpecifiedStatistics: Found a MCF, cannot handle it.");
					}
					columnType = ((BaseAttribute) f).getCurrentColumn().getColumnDataTypeName();
					found = true;
				}
			}
			
			if ( ! found) {
				throw new M4CompilerError(
					"Operator SpecifiedStatistics: TheOutputConcept must contain a BaseAttribute "
					+ "named '" + columnName + "'!");
			}
			this.columnNames.add(columnName);
			this.columnTypes.add(columnType);
		}
   		catch (M4Exception m4e)
   		{   throw new M4CompilerError("M4 interface error in " + this.getName() + ": " + m4e.getMessage());  } 
	}

	/* Creates the target table and cares about some overhead. */	
	private void createTable(String tableName) throws M4CompilerError {
		String create = "";
		try	{			
			// try to drop the table to make sure it does not exist
			this.getM4Db().dropBusinessTable(tableName);
		
		    String numericDatatype = this.getM4Db().getNameOfNumericDatatype();
			
			create = "create table " + tableName + " (";	
			for (int i = 0; i < columnNames.size(); i ++) {
				// TODO: We do not know the right sizes for the column datatypes...
				int columnTypeSize = 100;
				String dataType = this.getM4Db().getDbNameOfM4Datatype((String) columnTypes.get(i), columnTypeSize, true);
				if (dataType.startsWith(numericDatatype)) {
					dataType = numericDatatype; // for NUMBER, use just NUMBER, not NUMBER(100) (would be too large)
				}
				create += (String) columnNames.get(i) + " " + dataType + ", ";
			}	
			create = create.substring(0, create.length() - 2) + ")";
		
			this.getM4Db().executeBusinessSqlWrite(create);			
		} 
		catch (SQLException sqle)
		{  throw new M4CompilerError("Operator SpecifiedStatistics: could not create table with the computed values, SQL error: "
						+ sqle.getMessage() +"\n" + create);  }
		catch (M4Exception m4e) {
			throw new M4CompilerError(m4e.getMessage());
		}
		
		// add the new table to the trash index:
		try {
			this.getM4Db().addTableToTrash(tableName, this.getInputConcept().getCurrentColumnSet().getSchema(), this.getStep().getId());
		}
   		catch (M4Exception m4e)
   		{   throw new M4CompilerError("M4 interface error in " + this.getName() + ": " + m4e.getMessage());  } 

	}

	/* decomposes the values of the distribution statistics */	
	private String[] getSingleValues(Value v)
	{
		String s = v.getValue();
		StringTokenizer st = new StringTokenizer(s, ", "); // comma and blank as delimiters
		String[] ret = new String[st.countTokens()];
		int i = 0;
		while (st.hasMoreTokens())
		{
			ret[i] = st.nextToken();
			i++;
		}
		return ret;
	}

	/* gets the distribution count using the statistics facility */
	private String computeDistCount(BaseAttribute distBA, String value) throws M4CompilerError
	{
		String d = null;	
		Column c = this.getColumnForBaseAttribute(distBA);
		d = this.getM4Db().computeNumberOfElementsForValue(c, value);
		if (d == null) {
			this.doPrint(Print.OPERATOR,
				"Warning: Operator SpecifiedStatistics: output table contains NULL values!");
		}
		return d;
	}
}
/*
 * Historie
 * --------
 *
 * $Log: SpecifiedStatistics.java,v $
 * Revision 1.5  2006/04/11 14:10:11  euler
 * Updated license text.
 *
 * Revision 1.4  2006/04/06 16:31:10  euler
 * Prepended license remark.
 *
 * Revision 1.3  2006/03/29 09:50:47  euler
 * Added installation robustness.
 *
 * Revision 1.2  2006/03/23 11:13:45  euler
 * Improved exception handling.
 *
 * Revision 1.1  2006/01/03 09:54:21  hakenjos
 * Initial version!
 *
 */
