/*
 * MiningMart Version 1.1
 * 
 * Copyright (C) 2006 Martin Scholz, Timm Euler, 
 *                    Daniel Hakenjos, Katharina Morik
 *
 * Contact: miningmart@ls8.cs.uni-dortmund.de
 *
 * A list of contributing developers (other than the copyright 
 * holders) can be found at
 * http://mmart.cs.uni-dortmund.de/downloads/download.html
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program, see the file MM_HOME/LICENSE; if not, write
 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301, USA.
 */
package edu.udo.cs.miningmart.operator;

import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.Vector;

import edu.udo.cs.miningmart.exception.M4CompilerError;
import edu.udo.cs.miningmart.exception.M4Exception;
import edu.udo.cs.miningmart.exception.ParameterDeselectedError;
import edu.udo.cs.miningmart.m4.BaseAttribute;
import edu.udo.cs.miningmart.m4.Column;
import edu.udo.cs.miningmart.m4.Columnset;
import edu.udo.cs.miningmart.m4.Concept;
import edu.udo.cs.miningmart.m4.Feature;
import edu.udo.cs.miningmart.m4.Step;
import edu.udo.cs.miningmart.m4.utils.Print;

/**
 * This operator joins several concepts using their specified keys.
 * The output is one concept.
 * 
 * @author Timm Euler
 * @version $Id: JoinByKey.java,v 1.7 2006/09/27 14:59:57 euler Exp $
 */
public class JoinByKey extends SingleCSOperator 
{
	public static final String PARAMETER_KEYS = "TheKeys";
	
	private BaseAttribute[] sortedKeys;
	private Concept[] sortedConcepts;

	/**
	 * @see edu.udo.cs.miningmart.m4.core.operator.SingleCSOperator#getTypeOfNewColumnSet()
	 */
	public String getTypeOfNewColumnSet() {
		return Columnset.CS_TYPE_VIEW;
	}

	/**
	 * @see edu.udo.cs.miningmart.m4.core.operator.SingleCSOperator#generateSQLDefinition(String)
	 */
    public String generateSQLDefinition(String selectPart) throws M4CompilerError
    {
    	final String sqlDef =
	    	   "(select " + selectPart +
	           " from " + this.createListOfColumnSets() +
	           " where " + this.createCondition() + ")";
	           
		return sqlDef;
    }

	/**
	 * This method is overridden because the copying of columns from input
	 * to output concept works differently for this operator.
	 * 
	 * @see edu.udo.cs.miningmart.m4.core.operator.ConceptOperator(Columnset)
	 */
    protected String generateColumns(Columnset csForOutputConcept) throws M4CompilerError
    {
		try
		{
			String columnExpr = ""; // to be returned

			// For every Feature in TheOutputConcept, a column is created
			// as a copy from the corresponding Feature in one of the input concepts,
			// respecting the mapping specified by the user   	

			Feature inF = null;
			int iIn = 0;
			Feature outF = null;
			int conceptNumber;
			Concept currentConcept;

			final Feature[][] mapping = this.getInOutMap();

			// 1. loop through features of output concept, these are the selected features
			Iterator it = getOutputConcept().getFeatures().iterator();
			Vector outputFeaturesNotInMapping = new Vector();
			while (it.hasNext())
			{
				outF = (Feature) it.next();

				// if the output feature occurs in the map, select input feature
				// from the map, otherwise later via names from the input concepts:
				int mapPos;
				if ((mapPos = this.checkOccurrence(mapping, outF)) > -1) {
					// get input feature from map:
					inF = mapping[0][mapPos];
					columnExpr = this.createMetadata(inF, outF, csForOutputConcept, columnExpr);
				}
				else {
					outputFeaturesNotInMapping.add(outF);
				}
			}
			
			// 2. now that the mapped features are dealt with, go through those which are
			// not explicitly mapped, and map them by names:
			it = outputFeaturesNotInMapping.iterator();
			while (it.hasNext()) {
				outF = (Feature) it.next();
				
				if ( ! this.getStep().isVisible(outF)) {
					continue;
				}
				
				conceptNumber = 0;
				// find input feature with same name:
				do {
					currentConcept = this.getSortedConcepts()[conceptNumber];
					iIn = 0;
					do {
						do {
							inF = currentConcept.getFeature(iIn);
							iIn++;
						} while ((iIn < currentConcept.getNumberOfFeatures())
								&& ( ! this.getStep().isVisible(inF)));
					}
					while ((iIn < currentConcept.getNumberOfFeatures())
							&& (!outF.correspondsTo(inF)));
					conceptNumber++;
				}
				while ((conceptNumber < sortedConcepts.length)
 						&& (!outF.correspondsTo(inF)));

				// some checks:
				if (!outF.correspondsTo(inF)) {
					this.doPrint(Print.OPERATOR, "Output Concept '"	+ getOutputConcept().getName()
												+ "': skipped feature '" + outF.getName()
												+ "' because no corresponding input feature was found.");
					continue;
				}
				if (this.isDeselectedParameter(inF)) {
					this.doPrint(Print.PARAM, "Output Concept '" + getOutputConcept().getName()
											+ "': skipped feature '" + outF.getName()
											+ "' because the corresponding input feature was deselected by "
											+ "a FeatureSelection operator.");
					continue;
				}
				// the checks are done, so create output column for this feature:
				columnExpr = this.createMetadata(inF, outF, csForOutputConcept, columnExpr);
			}

			// Delete the last ", "
			columnExpr = columnExpr.substring(0, columnExpr.length() - 2);

			return columnExpr;
		}
   		catch (M4Exception m4e)
   		{   throw new M4CompilerError("M4 interface error in " + this.getName() + ": " + m4e.getMessage());  } 
    } // end protected String generateColumns()

	/** 
	 * Overrides the superclass method because here a mapping is used.
	 * 
	 * @see miningmart.compiler.operator.ConceptOperator#createMetadataForOneBA(BaseAttribute,BaseAttribute,Columnset,String)
	 */
    protected String createMetadataForOneBA( BaseAttribute inBA, 
                                             BaseAttribute outBA, 
                                             Columnset csForOutputConcept,
                                             String columnExpr) throws M4CompilerError {
                        
        Column inputColumn, outputColumn;
        try {
            inputColumn = inBA.getCurrentColumn();
            outputColumn = inputColumn.copyColToCS(csForOutputConcept);
            this.getStep().addToTrash(outputColumn);
            outputColumn.setBaseAttribute(outBA);
        }
        catch (M4Exception m4e) {
            throw new M4CompilerError("JoinByKey: M4 Interface error occurred when creating metadata for " +
                                      "input BaseAttribute '" + inBA.getName() + "' (Id: " + inBA.getId() +
                                      ") and output BaseAttribute '" + outBA.getName() + "' (Id: " + outBA.getId() +
                                                  "): " + m4e.getMessage());
        }

        outputColumn.setSQLDefinition(outBA.getName());
        outputColumn.setName(outBA.getName());

        String nextAttrib = "";
        if (inputColumn.getSQLDefinition().equalsIgnoreCase(inputColumn.getName()))     {
            nextAttrib = inputColumn.getColumnset().getSchemaPlusName()     + ".";
        }
        nextAttrib += inputColumn.getSQLDefinition() + " AS " + outBA.getName();
                                
        columnExpr = columnExpr + nextAttrib + ", ";
        return columnExpr;
    }

	/**
	 * MultiStepBranch information of all Concepts need to be merged.
	 * In order to reduce the risk of different permutations of entries, which
	 * might be problematic with the current implementation of the <code>Unsegment</code>
	 * operator, the MultiStepBranch-<code>String</code>s of all <code>Columnset</code>s
	 * are sorted before merging them. Multiple occurences of the same attribute are
	 * not supported; after the first occurence of an attribute each further occurence
	 * is removed.
	 * @see ConceptOperator#setNewCSMultiStepBranch(Columnset, int)
	 * @see MultiRelationalFeatureConstruction#createSingleColumnSet(int)
	 */
	protected void setNewCSMultiStepBranch(final Columnset newCS, int index)
		throws M4CompilerError
	{
		try {
			final Concept[] theConcepts = this.getSortedConcepts();
			String[] msbArray = new String[theConcepts.length + 1];
			int i = 0;
			while (i<theConcepts.length) {
				String msb = theConcepts[i].getCurrentColumnSet().getMultiStepBranch();
				msbArray[i++] = nullToEmpty(msb);
			}
			msbArray[i] = nullToEmpty(this.getInputConcept().getCurrentColumnSet().getMultiStepBranch());
			Arrays.sort(msbArray);
			newCS.setMultiStepBranch("");
			for (i=0; i<msbArray.length; i++) {
				newCS.addMultiStepBranch(msbArray[i]);
			}
		}
   		catch (M4Exception m4e)
   		{   throw new M4CompilerError("M4 interface error in " + this.getName() + ": " + m4e.getMessage());  } 
	}	

	/**
 	 * This method overrides the superclass method, so that it can still be used, 
 	 * although this operator has more than one input concept.
 	 */
	public Concept getInputConcept()
	{  return this.getSortedConcepts()[0];  }	

	private static String nullToEmpty(String s) {
		return (s == null) ? "" : s;
	}

	/**
	 * @see edu.udo.cs.miningmart.m4.core.operator.ConceptOperator#mustCopyFeature(String)
	 */
	protected boolean mustCopyFeature(String nameOfFeature) {
		return false;
	}

	/**
	 * Gets the parameters "TheConcepts".
	 * @return Returns a Concept[]
	 */
	protected Concept[] getSortedConcepts() {
		return sortedConcepts;
	}

	/**
	 * Gets the primary keys of TheConcepts.
	 * @return Returns a BaseAttribute[]
	 */
	protected BaseAttribute[] getSortedKeys() {
		return sortedKeys;
	}

	/**
	 * Getter method for the mapping parameters.
	 * 
	 * @return A two-dimensional array of Features.
	 *         At position [0][i] is the ith "MapInput",
	 *         at position [1][i] is the ith "MapOutput".
	 */
	protected Feature[][] getInOutMap() throws M4CompilerError {
		Feature[] inputF = (Feature[]) this.getParameter("MapInput");
		Feature[] outputF = (Feature[]) this.getParameter("MapOutput");
		Feature[][] theMap = new Feature[2][0];
		if (inputF != null) {
			int noOfMappings = inputF.length;
			// this should have been checked already, but let's be safe:
			if (outputF.length != noOfMappings) {
				throw new M4CompilerError("Operator JoinByKey: need same number of MapInput and MapOutput attributes!");
			}
			theMap = new Feature[2][noOfMappings];
			for (int l = 0; l < noOfMappings; l++) {
				theMap[0][l] = inputF[l];
				theMap[1][l] = outputF[l];
				if ((theMap[0][l] == null) || (theMap[1][l] == null)) {
					throw new M4CompilerError("Operator JoinByKey: mapping information is incomplete!");
				}
			}
		}
		return theMap;
	}
	
	// returns the position of the ba in the output part of the map,
	// or -1 if it is not there. Comparison is done via M4 ids.
	private int checkOccurrence(Feature[][] map, Feature f)
	{
		for (int i = 0; i < map[1].length; i++)
		{   if (map[1][i].getId() == f.getId())
			{  return i;  }
		}
		return -1;
	}
	
	// returns the FROM-part for the SQL definition
	private String createListOfColumnSets() throws M4CompilerError
	{
		try {
			edu.udo.cs.miningmart.m4.Columnset cs;
			String list = "";
			
			for (int i = 0; i < this.getSortedConcepts().length; i++)
			{
				cs = this.getSortedConcepts()[i].getCurrentColumnSet();
				list += cs.getSchemaPlusName() + ", ";	// use "t+<cs_id>" as sql variable			
			}
		
			// delete last ", ":
			list = list.substring(0, list.length() - 2);
		
			return list;
		}
   		catch (M4Exception m4e)
   		{   throw new M4CompilerError("M4 interface error in " + this.getName() + ": " + m4e.getMessage());  } 
	} // end private String createListOfColumnSets()
	
	private String createCondition() throws M4CompilerError
	{			
		try {
			edu.udo.cs.miningmart.m4.Columnset cs = this.getSortedConcepts()[0].getCurrentColumnSet();
			BaseAttribute key = this.getSortedKeys()[0];
			
			int noOfCons = this.getSortedConcepts().length;
			
			String cond = cs.getSchemaPlusName() + "." + key.getCurrentColumn().getSQLDefinition();
			cond += " = ";
				
			for (int i = 1; i < noOfCons - 1; i++)
			{
				cs = this.getSortedConcepts()[i].getCurrentColumnSet();
				key = this.getSortedKeys()[i];
					
				cond += cs.getSchemaPlusName() + "." + key.getCurrentColumn().getSQLDefinition();
				
				cond += " AND "; 
				
				cond += cs.getSchemaPlusName() + "." + key.getCurrentColumn().getSQLDefinition();
				
				cond += " = ";			
			}
			
			cs = this.getSortedConcepts()[noOfCons - 1].getCurrentColumnSet();
			key = this.getSortedKeys()[noOfCons - 1];
			
			cond += cs.getSchemaPlusName() + "." + key.getCurrentColumn().getSQLDefinition();
			
			return cond;
		}
   		catch (M4Exception m4e)
   		{   throw new M4CompilerError("M4 interface error in " + this.getName() + ": " + m4e.getMessage());  } 
	} // end private String createCondition()
	
	/*
	 * This method makes sure that the Arrays of TheKeys and TheConcepts
	 * are such that for a position i, TheKeys[i] is a BaseAttribute that
	 * is actually part of TheConcepts[i]. If this is not possible, an exception
	 * is thrown.
	 */
	private void relateKeysToConcepts() throws M4CompilerError
	{
		BaseAttribute[] keys = (BaseAttribute[]) this.getParameter(PARAMETER_KEYS);
		sortedConcepts = (Concept[]) this.getParameter("TheConcepts");
		
		if ((keys == null) || (keys.length == 0))
		{  throw new M4CompilerError("Operator JoinByKey: No keys found!");  }
		if ((sortedConcepts == null) || (sortedConcepts.length == 0))
		{  throw new M4CompilerError("Operator JoinByKey: No concepts found!");  }
		if (sortedConcepts.length != keys.length)
		{  throw new M4CompilerError("Operator JoinByKey: Number of keys must be equal to number of concepts!");  }
		
		// sort the keys so that they match the concepts:
		sortedKeys = new BaseAttribute[keys.length];
		
		for (int i = 0; i < sortedConcepts.length; i++)
		{
			sortedKeys[i] = keys[findKeyPosition(sortedConcepts[i], keys)];
		}
	} // end private relateKeysToConcepts
	
	// finds out which of the given BAs is part of the given concept,
	// and returns its position in the BA-Array
	private int findKeyPosition(Concept c, BaseAttribute[] bas) throws M4CompilerError
	{
		Collection theConceptFeatures;
		try {
			theConceptFeatures = c.getFeatures();
		}
   		catch (M4Exception m4e)
   		{   throw new M4CompilerError("M4 interface error in " + this.getName() + ": " + m4e.getMessage());  } 
   		
		if ((theConceptFeatures == null) || (theConceptFeatures.size() == 0))
		{  throw new M4CompilerError("Operator JoinByKey: concept '" + c.getName() + 
									 "' has no features!");  }
		Iterator it;
		for (int j = 0; j < bas.length; j++)
		{
			it = theConceptFeatures.iterator();
			while (it.hasNext())
			{
				if (bas[j].getId() == ((Feature) it.next()).getId())
				{   return j;  }	
			}
		}
		throw new M4CompilerError("Operator JoinByKey: concept '" + c.getName() + 
		                          "' does not contain any of the given keys!");
	} // end private findKeyPosition
	
	/**
	 * @see Operator#load(Step)
	 */
	public void load(Step st) throws ParameterDeselectedError, M4CompilerError {
		super.load(st);
		this.relateKeysToConcepts();
	}
}
/*
 * Historie
 * --------
 * 
 * $Log: JoinByKey.java,v $
 * Revision 1.7  2006/09/27 14:59:57  euler
 * New version 1.1
 *
 * Revision 1.6  2006/09/12 11:42:55  euler
 * bugs fixed
 *
 * Revision 1.5  2006/08/25 13:06:23  euler
 * *** empty log message ***
 *
 * Revision 1.4  2006/04/11 14:10:12  euler
 * Updated license text.
 *
 * Revision 1.3  2006/04/06 16:31:11  euler
 * Prepended license remark.
 *
 * Revision 1.2  2006/02/02 09:14:25  euler
 * Removed unused variables.
 *
 * Revision 1.1  2006/01/03 09:54:22  hakenjos
 * Initial version!
 *
 */
