/*
 * MiningMart Version 1.1
 * 
 * Copyright (C) 2006 Martin Scholz, Timm Euler, 
 *                    Daniel Hakenjos, Katharina Morik
 *
 * Contact: miningmart@ls8.cs.uni-dortmund.de
 *
 * A list of contributing developers (other than the copyright 
 * holders) can be found at
 * http://mmart.cs.uni-dortmund.de/downloads/download.html
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program, see the file MM_HOME/LICENSE; if not, write
 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301, USA.
 */
package edu.udo.cs.miningmart.operator;

import java.sql.SQLException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeMap;
import java.util.Vector;

import edu.udo.cs.miningmart.exception.M4CompilerError;
import edu.udo.cs.miningmart.exception.M4CompilerWarning;
import edu.udo.cs.miningmart.exception.M4Exception;
import edu.udo.cs.miningmart.exception.UserError;
import edu.udo.cs.miningmart.m4.BaseAttribute;
import edu.udo.cs.miningmart.m4.Column;
import edu.udo.cs.miningmart.m4.Columnset;
import edu.udo.cs.miningmart.m4.Concept;
import edu.udo.cs.miningmart.m4.RelationalDatatypes;

/**
 * This class realizes an operator for grouping segments of a <code>Concept</code>
 * together.Applications of the operator <code>SegmetationStratified</code> usually
 * result in several <code>Columnset</code>s for a single <code>Concept</code>.
 * The entry in the field <i>CS_MSBRANCH</i> of table <i>COLUMNSET_T</i> specifies
 * how each of the <code>Columnset</code>s was generated.
 * If a set of <code>Columnset</code>s was generated by a segmentation based on a
 * <code>BaseAttribute</code> <i>b1</i>, then the <code>Unsegment</code> operator 
 * performs a union operation for all <code>Columnsets</code> with the same
 * segmentation history stored in <i>CS_MSBRANCH</i>, ignoring the information given
 * for <i>b1</i>. Therefore <i>b1</i> needs to be specified as a <code>Parameter</code>
 * of the <code>Step</code>. Other kinds of <code>Segmentation</code> are not based on
 * a <code>BaseAttribute</code>. These may be reversed by using predefined
 * pseudo-attributes, like <code>(Random)</code> and <code>(KMeans)</code>, instead.
 * 
 * @author Martin Scholz
 * @version $Id: Unsegment.java,v 1.6 2006/09/27 14:59:56 euler Exp $
 */
public class Unsegment extends MultipleCSOperator {

	private static final String UNSEGMENT_ATTRIB_PARAM = "UnsegmentAttribute";
	private static final String TYPE_OF_OUTPUT_CS      = Columnset.CS_TYPE_VIEW; // it's a view
	private static final int    VIEW_SIZE_RESTRICTION  = 2000; // If view gets longer: try intermediate views!

	private HashMap colsetsByMsbInfo = null;

    
	/**	
	 * @see edu.udo.cs.miningmart.m4.core.operator.MultipleCSOperator#numberOfColumnSets()
	 */
	public int numberOfColumnSets() throws M4CompilerError {
		return this.getColsetsKeySet().size();
	}

    /**
     * @return an array of all the distinct <code>String</code>s found
     * in the <i>CS_MSBRANCH</i> field of the <code>Columnset</code>s of
     * the <i>InputConcept</i> <b>after removing</b> the part containing
     * the attribute specified for unsegmentation!
     */
	private String[] getDistinctMsbInfos() throws M4CompilerError {
		final Set keys = this.getColsetsKeySet();
		return (String[]) keys.toArray(new String[keys.size()]);
	}

	/**
	 * The operator creates a view.
	 * 
	 * @return the <code>String</code> &quot;V&quot;
	 * @see edu.udo.cs.miningmart.m4.core.operator.ConceptOperator#getTypeOfNewColumnSet(int)
	 */
	public String getTypeOfNewColumnSet(int index) {
		return TYPE_OF_OUTPUT_CS;
	}

	/**
	 * All attributes of a view realizing a <code>Feature</code> are copied,
	 * except for the one specified for Unsegmentation.
	 * 
	 * @see edu.udo.cs.miningmart.m4.core.operator.ConceptOperator#mustCopyFeature(String)
	 */
	protected boolean mustCopyFeature(String nameOfFeature) throws M4CompilerError {
		return (!nameOfFeature.equals(this.getUnsegmentAttribute().getName()));
	}
    
	/**
	 * Generates an SQL-definition unifying all <code>Columnset</code>s which
	 * belong to the same segment after unsegmenting. If the statement becomes
	 * too long, then intermediate views are created in the database and used
	 * by views of higher level.
	 * 
	 * @see edu.udo.cs.miningmart.m4.core.operator.ConceptOperator#generateSQLDefinition(String, int)
	 */
	public String generateSQLDefinition(String selectPart, int index) throws M4CompilerError
	{
		Object[] keyArray = this.getColsetsKeySet().toArray();
		String key = (String) keyArray[index];
		Vector theColSets = this.getColsetVectorByKey(key);
        String sql = this.sqlUnionOfColsets(theColSets, index);
        return sql;
	}

	private String sqlUnionOfColsets(Vector theColsets, int index) throws M4CompilerError {
		if (theColsets == null || theColsets.size() == 0)
			throw new M4CompilerError("Unsegment: No Columnsets found when trying to " +
									  "generate an SQL-view for union of ColumnSets.");

		Iterator it = theColsets.iterator();
		Vector sqlDefs = new Vector();
		while (it.hasNext()) {
			String refined = this.refineSqlDef((Columnset) it.next());
			sqlDefs.add(refined);
		}
		return this.concatenateOrSubView(sqlDefs, index, 1);
	}

	private String concatenateOrSubView(Vector viewDefs, final int index, int nameSuffix)
	throws M4CompilerError
	{
		if (viewDefs == null || viewDefs.size() < 1)
			return null;
		Iterator it = viewDefs.iterator();
		StringBuffer sbuf = new StringBuffer((String) it.next());
		String viewNamePre = this.getNewCSName(index) + "_V_" + index + "_";
		Vector meta = new Vector();
		while (it.hasNext()) {
			String nextView = (String) it.next();
			if (sbuf.length() + nextView.length() < VIEW_SIZE_RESTRICTION) {
				sbuf.append(" union " + nextView);
			}
			else {
				final String intermediateView = "(" + sbuf.toString() + ")";
				final String completeName = viewNamePre + nameSuffix++;
				this.createView(intermediateView, completeName);
				meta.add("(select * from " + completeName + ")");
				sbuf = new StringBuffer(nextView);
			}
		}
		if (meta.isEmpty()) {
			if (viewDefs.size() == 1)
				return sbuf.toString();
			return "(" + sbuf.toString() + ")";
		}
		else {
			final String intermediateView = "(" + sbuf.toString() + ")";
			final String completeName = viewNamePre + nameSuffix++;
			this.createView(intermediateView, completeName);
			meta.add("(select * from " + completeName + ")");
			return this.concatenateOrSubView(meta, index, nameSuffix);
		}
	}

	private void createView(String viewDef, String viewName) throws M4CompilerError 
	{
		String sql = null;
		try {
			final String schema = this.getInputConcept().getCurrentColumnSet().getSchema();
			sql = "create view " + schema + "." + viewName + " as (" + viewDef + ")";
		
			this.getM4Db().executeBusinessSqlWrite(sql);
			this.getM4Db().addViewToTrash(viewName, schema, this.getStep().getId());
		}
		catch (SQLException e) {
			throw new M4CompilerError("Unsegment: Could not create intermediate view. SQL:\n" +
									  sql + "\n" + e.getMessage());
		}
   		catch (M4Exception m4e)
   		{   throw new M4CompilerError("M4 interface error in " + this.getName() + ": " + m4e.getMessage());  } 
	}

	/** 
	 * This method reads the SQL definition of the given <code>Columnset</code>
	 * and adds the segmentation information found in the <i>CS_MSBRANCH</i> 
	 * for the unsegment-attribute (if it is no pseudo-attribute!).
	 * @param c The <code>Columnset</code> to refine the SQL-definition for.
	 * @return a complete SQL statement yielding the view corresponding to the
	 * extended <code>Columnset</code>. Thus all <code>Column</code>s may just
	 * have their names entered as SQL definitions.
	 */
	private String refineSqlDef(Columnset c) throws M4CompilerError 
	{
		try {
			String srcDef = c.getSchemaPlusName();
	
			StringBuffer sql = new StringBuffer("(select ");
			Column[] columns = Unsegment.sortColsByName(c.getColumns());
			for (int i=0; i<columns.length; i++) {
				final String sqlDef  = columns[i].getSQLDefinition();
				final String colName = columns[i].getName();
				if (sqlDef.equals(colName))
					sql.append(colName + ", ");
				else sql.append(sqlDef + " AS " + colName + ", ");
			}
			
			final String unsegAttrName = this.getUnsegmentAttribute().getName();
			final String value = c.getMSBranchSelectionValue(unsegAttrName);
			if (value == null || value.length() == 0)
				throw new M4CompilerError("Unsegment: Could not find information for unsegment-attribute " +
											unsegAttrName + " in SQL definition of Columnset " + c.getName() +
											", id: " + c.getId() + "\nSQL definition is:\n" + c.getSQLDefinition());
			String sqlS;
			if (this.unsegmentBaToBeWritten()) {
				sqlS = sql.toString() + value + " AS " + unsegAttrName;
			}
			else {
			    sqlS = sql.substring(0, sql.length() - 2); // remove ", "
			}
			return sqlS + " from " + srcDef + ")";
		}
   		catch (M4Exception m4e)
   		{   throw new M4CompilerError("M4 interface error in " + this.getName() + ": " + m4e.getMessage());  } 
	}

	/**
	 * Helper method to sort a Column array by Column names.
	 * Exceptions are thrown, if the input collection is null or contains
	 * a null value.
	 * */
	private static Column[] sortColsByName(Collection cols) throws M4CompilerError {
		if (cols == null) {
			throw new M4CompilerError(
				"Error in Unsegment operator: Found null value instead "
			  + "of input Column[] when trying to sortColsByName!");
		}
		// Sort by entering values into a TreeMap with the Column name as the key.
		TreeMap tm = new TreeMap();
		Iterator it = cols.iterator();
		while (it.hasNext()) {
			final Column c = (Column) it.next();
			if (c == null) {
				throw new M4CompilerError(
					"Error in Unsegment operator: Found null value in "
				  + "Column[] input when trying to sortColsByName!");
			}
			tm.put(c.getName().toUpperCase(), c);
		}
		// Gets the values in the right order by using the values() method:
		return (Column[]) tm.values().toArray(new Column[cols.size()]);
	}

	/**
	 * Gets the unsegmentAttribute.
	 * 
	 * @return Returns a BaseAttribute
	 */
	public BaseAttribute getUnsegmentAttribute() throws M4CompilerError {
		return (BaseAttribute) this.getSingleParameter(Unsegment.UNSEGMENT_ATTRIB_PARAM);
	}

	/**
	 * Needs to be overridden !!
	 * 
	 * @see miningmart.compiler.operator.ConceptOperator#generateColumns(Columnset)
	 */
	protected String generateColumns(Columnset csForOutputConcept) throws M4CompilerError 
	{
		try {
			String columnExpr = super.generateColumns(csForOutputConcept);
			if (this.unsegmentBaToBeWritten()) {
				// Setting up and linking a Column for the unsegment-attribute:
			    final BaseAttribute outBA = this.getUnsegmentAttribute();
	            Column outputColumn = (Column) this.getM4Db().createNewInstance(edu.udo.cs.miningmart.m4.core.Column.class);
		    	outputColumn.setId(0);
		    	outputColumn.setName(outBA.getName());
		    	outputColumn.setColumnset(csForOutputConcept);
	        	outputColumn.setBaseAttribute(outBA);
	            // outBA.addColumn(outputColumn);
	            outputColumn.setSQLDefinition(outBA.getName());
	
				// This could also be derived from the original Segmentation Step/BA :
				outputColumn.setColumnDataType(13);
				outputColumn.setColumnDataTypeName(RelationalDatatypes.RELATIONAL_DATATYPE_STRING);
	
				// add the outputColumn to the columnset for the output concept:
				// csForOutputConcept.addColumn(outputColumn);
				
				// Extending the columnExpr:
				columnExpr = columnExpr + ", " + outBA.getName() + " AS " + outBA.getName();

				this.getStep().addToTrash(outputColumn);
			}
	        return columnExpr;
		}
   		catch (M4Exception m4e)
   		{   throw new M4CompilerError("M4 interface error in " + this.getName() + ": " + m4e.getMessage());  } 
	}

	/**
	 * This method checks is the specified unsegment-attribute is a pseudo-attribute
	 * or a new <code>Feature</code> of the <code>OutputConcept</code>.
	 * @return <code>true</code> iff the unsegment attribute is a &quot;real&quot;
	 * <code>BaseAttribute</code>.
	 */
	private boolean unsegmentBaToBeWritten() throws M4CompilerError {
		return (! this.getUnsegmentAttribute().getName().startsWith("(") );
	}

	private Set getColsetsKeySet() throws M4CompilerError {
		this.aggregateColsets();
		return this.colsetsByMsbInfo.keySet();
	}

	private Vector getColsetVectorByKey(String key) throws M4CompilerError {
		this.aggregateColsets();
		return (Vector) this.colsetsByMsbInfo.get(key);
	}

	/**
	 * Aggregates all columnsets with the same MultiStepBranch-Info String
	 * (after removing the "baName" substring).
	 * Each of the resulting sets is stored in a Vector.
	 * Each such Vector is stored in a HashMap with its String as its key.
	 * The result is stored in the private field <code>colsetsByMsbInfo</code>,
	 * which should be accessed using the methods <code>getColsetsKeySet</code>,
	 * and <code>getColsetVectorByKey</code>.
	*/
	private void aggregateColsets() throws M4CompilerError {
		if (this.colsetsByMsbInfo != null)
			return;
		
		try {
			final Collection colsets = this.getInputConcept().getColumnSets();
			if (colsets == null)
				throw new M4CompilerError("InputConcept " + this.getInputConcept().getId() +
										" of operator Unsegment has no Columnset(s) !");
	        final String baName = this.getUnsegmentAttribute().getName();		
	
			final HashMap hm = new HashMap();
			Iterator it = colsets.iterator();
			Columnset cs;
			while (it.hasNext()) {
				cs = (Columnset) it.next();
				final String msbDef = cs.getMsbInfoWithoutAttrib(baName);
				if (!hm.containsKey(msbDef)) {
					hm.put(msbDef, new Vector());
				}
				Vector v = (Vector) hm.get(msbDef);
				v.add(cs);
			}
			
			this.colsetsByMsbInfo = hm;
		}
   		catch (M4Exception m4e)
   		{   throw new M4CompilerError("M4 interface error in " + this.getName() + ": " + m4e.getMessage());  } 
	}

	/**
	 * This method is overriden in order to switch off multiple runs of the operator
	 * due to multistep support in the compiler's control structure.
	 * You should not change this part, unless you know what you are doing!
	 * @throws UserError
	 * @throws M4CompilerError
	 * @throws SQLException
	 * @throws M4Exception
	 * @see edu.udo.cs.miningmart.m4.core.operator.Operator#execute(boolean)
	 */
	public void execute(boolean lazy) throws UserError, M4CompilerWarning, M4CompilerError, M4Exception, SQLException {
		super.execute(lazy);
		try {
			if (!lazy) {
				Concept inputConcept = this.getInputConcept();
				while (inputConcept.hasNextColumnSet())
					inputConcept.getNextColumnSet();
			}
		}
   		catch (M4Exception m4e)
   		{   throw new M4CompilerError("M4 interface error in " + this.getName() + ": " + m4e.getMessage());  } 
	}

	/**
	 * Overrides the corresponding method of class <code>ConceptOperator</code>.
	 * This is done in order to replace the information on multi step branching,
	 * underlying the generation of each <code>Columnset</code>.
	 *
	 * @see edu.udo.cs.miningmart.m4.core.operator.ConceptOperator#setNewCSMultiStepBranch(Columnset, int)
	 */
	protected void setNewCSMultiStepBranch(final Columnset newCS, int index)
		throws M4CompilerError
	{
		Object[] keys = this.getColsetsKeySet().toArray();
		newCS.setMultiStepBranch((String) keys[index]);
	}

	/**
	 * Overrides the corresponding method in <code>ConceptOperator</code>
	 * in order to beautify the output name (omitting a common &quot;_0&quot;).
	 * 
	 * @see edu.udo.cs.miningmart.m4.core.operator.ConceptOperator#getNewCSName(int)
	 */
	public String getNewCSName(int index) {
		String name =
					// this.getOutputConcept().getName() + "_" +
					"CS_" + this.getStep().getId() + "_" + index;
		return name;
	}
}
/*
 * Historie
 * --------
 *
 * $Log: Unsegment.java,v $
 * Revision 1.6  2006/09/27 14:59:56  euler
 * New version 1.1
 *
 * Revision 1.5  2006/04/11 14:10:11  euler
 * Updated license text.
 *
 * Revision 1.4  2006/04/06 16:31:10  euler
 * Prepended license remark.
 *
 * Revision 1.3  2006/03/23 11:13:44  euler
 * Improved exception handling.
 *
 * Revision 1.2  2006/03/19 21:17:13  scholz
 * refactoring
 *
 * Revision 1.1  2006/01/03 09:54:21  hakenjos
 * Initial version!
 *
 */
