/*
 * MiningMart Version 1.0
 * 
 * Copyright (C) 2006 Martin Scholz, Timm Euler, 
 *                    Daniel Hakenjos, Katharina Morik
 *
 * Contact: miningmart@ls8.cs.uni-dortmund.de
 *
 * A list of contributing developers (other than the copyright 
 * holders) can be found at
 * http://mmart.cs.uni-dortmund.de/downloads/download.html
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program, see the file MM_HOME/LICENSE; if not, write
 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301, USA.
 */
package edu.udo.cs.miningmart.operator;

import java.util.Collection;
import java.util.Iterator;

import edu.udo.cs.miningmart.db.DB;
import edu.udo.cs.miningmart.exception.M4CompilerError;
import edu.udo.cs.miningmart.exception.M4Exception;
import edu.udo.cs.miningmart.m4.BaseAttribute;
import edu.udo.cs.miningmart.m4.Column;
import edu.udo.cs.miningmart.m4.ColumnStatistics2;
import edu.udo.cs.miningmart.m4.Columnset;
import edu.udo.cs.miningmart.m4.utils.Print;

/**
 * This operator segments an input concept according to the different values
 * of a specified attribute, such that each segment contains the rows where
 * this attribute has the same value.
 * 
 * @author Timm Euler
 * @version $Id: SegmentationStratified.java,v 1.6 2006/04/11 14:10:11 euler Exp $
 */
public final class SegmentationStratified extends Segmentation {

    private String[] distribution; // the different values of the target attribute

	/**
	 * @see edu.udo.cs.miningmart.m4.core.operator.Segmentation#generateConditionForOp
	 */
    public String generateConditionForOp(int columnSetIndex) throws M4CompilerError
    {
    	Column theColumn;
        try {
        	theColumn = getTheAttribute().getCurrentColumn();
        }
   		catch (M4Exception m4e)
   		{   throw new M4CompilerError("M4 interface error in " + this.getName() + ": " + m4e.getMessage());  } 

        if (distribution == null)
        {  
        	// if the attribute is not numeric, compute the distinct elements
        	// by a simple SQL query; otherwise use the statistics functions:
        	if ((getTheAttribute().getConceptualDataType() != 7) &&
        	    (getTheAttribute().getConceptualDataType() != 9))
        	{ 	
				distribution = this.getM4Db().getDistinctElements(theColumn);  }
        	else
        	{   
				try
				{
					this.doPrint(Print.OPERATOR, "Computing value distribution for TheAttribute for SegmentationStratified...");
					theColumn.updateStatistics();				
					Collection distribC = theColumn.getDistributionStatistics();
					Iterator it = distribC.iterator();
					distribution = new String[distribC.size()];
					int i = 0;
					while (it.hasNext()) {	
						ColumnStatistics2 cstat = (ColumnStatistics2) it.next();
						distribution[i] = cstat.getDistributionValue();
						i++;
					}
				}
		   		catch (M4Exception m4e)
		   		{   throw new M4CompilerError("M4 interface error in " + this.getName() + ": " + m4e.getMessage());  } 
        	}
        }

        if (distribution.length == 0)
        {   throw new M4CompilerError("Error in SegmentationStratified: no values found for segmenting column!");  }

		if ((columnSetIndex < 0) || (columnSetIndex >= distribution.length))
        {  throw new M4CompilerError("Error in SegmentationStratified: wrong index for ColumnSet condition!");  }

        String curr = distribution[columnSetIndex];

        if ((getTheAttribute().getConceptualDataType() != 7) &&
            (getTheAttribute().getConceptualDataType() != 9))
        {   curr = DB.quote(curr);   }

        String sqlCondition = theColumn.getSQLDefinition() + " = " + curr;

	    return sqlCondition;
    } // end public String generateConditionForOp

	/**
	 * Getter method for the parameter "TheAttribute".
	 * 
	 * @return parameter "TheAttribute" as a BaseAttribute
	 */
    public BaseAttribute getTheAttribute() throws M4CompilerError {
    	return (BaseAttribute) this.getSingleParameter("TheAttribute");
    }

	/**
	 * @see edu.udo.cs.miningmart.m4.core.operator.Segmentation#numberOfSegments
	 */
    public int numberOfSegments() throws M4CompilerError
    {
    	edu.udo.cs.miningmart.m4.Columnset inCS;
    	Column theColumn;
    	try {
	        inCS = getInputConcept().getCurrentColumnSet();
	        theColumn = getTheAttribute().getCurrentColumn();
    	}
   		catch (M4Exception m4e)
   		{   throw new M4CompilerError("M4 interface error in " + this.getName() + ": " + m4e.getMessage());  } 

        // seems to occur frequently...:
        if (theColumn == null)
        {  throw new M4CompilerError("Operator SegmentationStratified: could not find the Column for parameter TheAttribute ('" +
                                     getTheAttribute().getName() + "') that belongs to the current ColumnSet '" +
                                     inCS.getName() + "'!");  }

        distribution = this.getM4Db().getDistinctElements(theColumn);
        if (distribution == null)
        {   return 0;   }

        return distribution.length;
    }
    
	/**
	 * The value embedded in the SQL-statement (WHERE attribute = value)
	 * is found by parsing the statement found in the <code>Columnset</code>.
	 * Together with the <code>BaseAttribute</code> it is added to the field
	 * <i>CS_MSBRANCH</i> in the table <i>COLUMNSET_T</i>.
	 * 
	 * @see edu.udo.cs.miningmart.m4.core.operator.Segmentation#setCSSegmentInfo(String, Columnset, int)
	 */
	public void setCSSegmentInfo(String inputMultiStepBranch, Columnset cs, int index)
	throws M4CompilerError
	{
		final String sqlDef = unBracket(cs.getSQLDefinition());
		final String value  = lastAssignmentValue(sqlDef);
		try {
			cs.addMultiStepBranchInfo(inputMultiStepBranch, this.getTheAttribute().getName(), value);
		}
   		catch (M4Exception m4e)
   		{   throw new M4CompilerError("M4 interface error in " + this.getName() + ": " + m4e.getMessage());  } 
	}

	/** Removes brackets from a string:
	 * If the first and last non-whitespace characters are '(' and ')'
	 * this methods returns the substring in between, otherwise the original
	 * <code>String</code> after a <code>trim()</code> is returned. */
	private static String unBracket(String s) {
		if (s == null)
			return null;
		s = s.trim();
		if (s.startsWith("(") && s.endsWith(")"))
			s = s.substring(1, s.length() - 1);
		return s;
	}

	/** This method finds the attribute value by which the corresponding
	 * columnset_t was generated. */
	private static String lastAssignmentValue(String s) {
		if (s == null)
			return null;
		int eqIdx = s.lastIndexOf('=');
		if (eqIdx != -1)
			s = s.substring(eqIdx + 1).trim();
		else s = "";
		return s;
	}
}
/*
 * Historie
 * --------
 *
 * $Log: SegmentationStratified.java,v $
 * Revision 1.6  2006/04/11 14:10:11  euler
 * Updated license text.
 *
 * Revision 1.5  2006/04/06 16:31:11  euler
 * Prepended license remark.
 *
 * Revision 1.4  2006/03/23 11:13:45  euler
 * Improved exception handling.
 *
 * Revision 1.3  2006/01/12 20:35:18  scholz
 * bugfix statistics
 *
 * Revision 1.2  2006/01/06 16:28:50  euler
 * Bugfixes
 *
 * Revision 1.1  2006/01/03 09:54:21  hakenjos
 * Initial version!
 *
 */

/*
 * Old Historie
 * ------------
 *
 * Revision 1.15  2002/11/19 13:08:42  euler
 * Added use of statistics function for
 * numeric TheAttribute.
 *
 * Revision 1.14  2002/10/25 13:43:25  euler
 * Changed Sql Definitions that FeatureConstruction
 * operators create. Adapted the Join operators.
 *
 * Revision 1.13  2002/10/24 10:20:05  euler
 * New Javadoc comments.
 *
 * Revision 1.12  2002/10/08 18:07:56  scholz
 * Prepared code for parallel execution of multiple compile-Threads.
 * Calls to static fields like DB.m4Db were removed. Now each
 * Thread has its own DB object, reachable via
 * CompilerAccessLogic or Case.
 * The methods getCase() and getM4Db() were added to M4Object.
 * The static methods of Parameter now need an additional
 * parameter of type DB.
 * All direct calls from Operators to these Parameter methods were
 * removed.
 * All old load() and print() routines were removed.
 * The static calls to Print were removed. Now CompilerAccessLogic
 * references a valid Print object for the current Thread. This is
 * reachable via Case. The methods doPrint for messages and
 * Exceptions were added to M4Object.
 * The Print mechanism is not fully functional, yet.
 * A getStatus method was added to the Interface. It is not
 * functional yet for multiple Threads.
 *
 * Status: Compiles.
 *
 * Revision 1.11  2002/08/29 16:23:19  scholz
 * Unsegmentation support for MRFC.
 * Bugfixes (e.g. "Unsegment" now finally in repository ;-) )
 *
 * Revision 1.10  2002/08/28 19:25:05  scholz
 * First step for automatically loading parameters and checking
 * their validity:
 * Information stored in OP_PARAM_T is represented in an own
 * class and automatically loaded by class Operator.
 *
 * Unsegment:
 * The field CS_MSBRANCH (table COLUMNSET_T) is now
 * set during operator execution.
 * Each Columnset has the information attached, by which
 * segmentations (e.g.' item=19; ' or '(KMeans)=1' ) it was
 * created. This information is used by the Unsegment operator
 * to perform a UNION on subsets of all Columnsets for a
 * Concept. Additionally the segmentation value is restored
 * when "reversing" SegmentationStratified.
 * Status of Unsegment:
 * - Sources compile
 * - CS_MSBRANCH is set correctly, except for MRFC.
 *
 * Revision 1.9  2002/08/05 10:35:48  euler
 * Restructured the operator hierarchy: introduction of SingleCSOperator
 * and MultipleCSOperator. Changed this class accordingly.
 *
 * Revision 1.8  2002/07/10 08:27:49  euler
 * Throws an exception now if the column for the segmenting
 * attribute cannot be found.
 *
 * Revision 1.7  2002/05/31 12:35:01  euler
 * *** empty log message ***
 *
 * Revision 1.6  2002/05/23 11:35:00  euler
 * A few tests worked, others still to be done.
 *
 * Revision 1.5  2002/05/21 12:51:01  euler
 * First beta test ok.
 *
 * Revision 1.4  2002/05/15 13:05:02  euler
 * First version that compiles.
 *
 * Revision 1.3  2002/05/15 10:36:43  euler
 * First version that compiles.
 *
 * Revision 1.2  2002/05/07 13:06:57  wiese
 * get+set methods
 *
 * Revision 1.1  2002/04/30 13:00:30  wiese
 * compileable version
 *
 * Revision 1.6  2002/04/19 15:23:12  wiese
 * Initialversion nach der uebergabe
 *
 */


