/*
 * MiningMart Version 1.1
 * 
 * Copyright (C) 2006 Martin Scholz, Timm Euler, 
 *                    Daniel Hakenjos, Katharina Morik
 *
 * Contact: miningmart@ls8.cs.uni-dortmund.de
 *
 * A list of contributing developers (other than the copyright 
 * holders) can be found at
 * http://mmart.cs.uni-dortmund.de/downloads/download.html
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program, see the file MM_HOME/LICENSE; if not, write
 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301, USA.
 */
package edu.udo.cs.miningmart.operator;

import java.sql.SQLException;
import java.util.Collection;
import java.util.Iterator;

import edu.udo.cs.miningmart.db.DB;
import edu.udo.cs.miningmart.exception.M4CompilerError;
import edu.udo.cs.miningmart.exception.M4Exception;
import edu.udo.cs.miningmart.m4.Column;
import edu.udo.cs.miningmart.m4.ColumnStatistics2;
import edu.udo.cs.miningmart.m4.utils.Print;

/**
 * This operator uses statistical information about the distribution of 
 * the values in the target attribute to randomly choose replacements for
 * the missing values such that the distribution is not expected to change.
 * 
 * @author  Timm Euler
 * @version $Id: AssignStochasticValue.java,v 1.6 2006/09/27 14:59:57 euler Exp $
 */ 
public class AssignStochasticValue extends MissingValues {

    /**
     * @see miningmart.compiler.operator.MissingValues#generateValueForOp
     */
    public String generateValueForOp(Column columnWithMissingValues)
		throws M4CompilerError
	{
		
		if (this.getM4Dbms() != DB.ORACLE) {
			throw new M4CompilerError("Sorry, 'AssignStochasticValue' currently only available for ORACLE!");
		}
		
		try {
			final edu.udo.cs.miningmart.m4.Columnset cs = columnWithMissingValues.getColumnset();
			final edu.udo.cs.miningmart.m4.BaseAttribute ba = columnWithMissingValues.getTheBaseAttribute();

			if (cs == null || ba == null) {
				throw new M4CompilerError("'AssignStochasticGValue': TargetAttribute not properly connected!");
			}

			Collection theValues = columnWithMissingValues.getDistributionStatistics();
			if (theValues == null || theValues.size() == 0) {
				columnWithMissingValues.updateStatistics();
				theValues = columnWithMissingValues.getDistributionStatistics();
			}
		
			if ((theValues == null) || (theValues.size() == 0))
			{	throw new M4CompilerError("Operator AssignStochasticValue: TheTargetAttribute ("
					 					  + this.getTheTargetAttribute().getName()+ ") has no values!"); 
			}

			long noOfRows = Long.parseLong(cs.readOrComputeCount());
			
			String missingValueFunction =
				"CREATE OR REPLACE FUNCTION "
					+ this.getFunctionName()
					+ " (cROWNUM IN NUMBER) "
					+ " RETURN "
					+ columnWithMissingValues.getColumnDataTypeName()
					+ " AS \n"
					+ "  returnVal "
					+ columnWithMissingValues.getColumnDataTypeName()
					+ ";\n"
					+ "  r NUMBER(38,10);\n"
					+ "begin\n"
					+ " --- Generate a Random Number between 1 and nr of rows\n"
					+ "  r := m4randomnr("
					+ 0 // this.getM4Db().getRandomSeedNr()
					+ ", cROWNUM, "
					+ noOfRows
					+ ");\n";

			long covered = 0;

			Iterator it = theValues.iterator();
			ColumnStatistics2 stat2 = null;
			while (it.hasNext()) {
				stat2 = (ColumnStatistics2) it.next();
				if (it.hasNext()) { // skip the last entry
					covered = covered + stat2.getDistributionCount();
					missingValueFunction =
						missingValueFunction
							+ "IF r < "
							+ covered
							+ " THEN returnVal := "
							+ stat2.getDistributionValue()
							+ ";\n"
							+ " ELS";
				}
			}
			covered = covered + stat2.getDistributionCount();
			missingValueFunction =
				missingValueFunction
					+ "E returnVal := "
					+ stat2.getDistributionValue()
					+ ";\n"
					+ "END IF; "
					+ " return(returnVal);\n"
					+ "end;\n";
				
			this.doPrint(Print.OPERATOR, "Installing Stochastic Missing Value Function:");
			try
			{
				this.getM4Db().executeBusinessSqlWrite(missingValueFunction);
				this.getM4Db().addFunctionToTrash( this.getFunctionName(), cs.getSchema(), this.getStep().getId());
			}
			catch (SQLException sqle)
			{
				throw new M4CompilerError(
					"AssignStochasticValue: could not create the following sql function: "
						+ missingValueFunction
						+ "; got SQL error: "
						+ sqle.getMessage());
			}
		}
		catch (M4Exception e) {
			throw new M4CompilerError(e.getMessage());	
		}
		
		return this.getFunctionName() + "(ROWNUM)";
	}

	private String getFunctionName() throws M4CompilerError	{
		return "mv_" + this.getTheOutputAttribute().getId() + "_" + this.getCurrentLoopNumber();
	}	
}
/*
 * Historie
 * --------
 *
 * $Log: AssignStochasticValue.java,v $
 * Revision 1.6  2006/09/27 14:59:57  euler
 * New version 1.1
 *
 * Revision 1.5  2006/04/11 14:10:12  euler
 * Updated license text.
 *
 * Revision 1.4  2006/04/06 16:31:11  euler
 * Prepended license remark.
 *
 * Revision 1.3  2006/03/23 11:13:45  euler
 * Improved exception handling.
 *
 * Revision 1.2  2006/01/12 20:35:18  scholz
 * bugfix statistics
 *
 * Revision 1.1  2006/01/03 09:54:21  hakenjos
 * Initial version!
 *
 */

/*
 * Old Historie
 * -----------
 *
 * Revision 1.11  2003/07/16 09:45:23  euler
 * bugfix.
 *
 * Revision 1.10  2002/11/11 11:50:36  euler
 * Added computation of statistics if they
 * are not up-to-date.
 *
 * Revision 1.9  2002/10/21 16:17:21  scholz
 * javadoc comments: fixed some invalid tags
 *
 * Revision 1.8  2002/10/08 18:07:55  scholz
 * Prepared code for parallel execution of multiple compile-Threads.
 * Calls to static fields like DB.m4Db were removed. Now each
 * Thread has its own DB object, reachable via
 * CompilerAccessLogic or Case.
 * The methods getCase() and getM4Db() were added to M4Object.
 * The static methods of Parameter now need an additional
 * parameter of type DB.
 * All direct calls from Operators to these Parameter methods were
 * removed.
 * All old load() and print() routines were removed.
 * The static calls to Print were removed. Now CompilerAccessLogic
 * references a valid Print object for the current Thread. This is
 * reachable via Case. The methods doPrint for messages and
 * Exceptions were added to M4Object.
 * The Print mechanism is not fully functional, yet.
 * A getStatus method was added to the Interface. It is not
 * functional yet for multiple Threads.
 *
 * Status: Compiles.
 *
 * Revision 1.7  2002/09/05 17:06:29  scholz
 * cleaned up some more operators (wrt. autoload), e.g.
 * Scaling and sub-classes and some MissingValues operators.
 *
 * Revision 1.6  2002/08/05 10:35:46  euler
 * Restructured the operator hierarchy: introduction of SingleCSOperator
 * and MultipleCSOperator. Changed this class accordingly.
 *
 * Revision 1.5  2002/07/11 08:36:34  euler
 * Changed printing of messages.
 *
 * Revision 1.4  2002/06/03 12:02:36  euler
 * Restructured FeatureConstruction.
 *
 * Revision 1.3  2002/05/15 10:36:42  euler
 * First version that compiles.
 *
 * Revision 1.2  2002/05/14 14:05:49  bauschul
 * errors corrected, but not the errors converning the baseattributes
 *
 * Revision 1.1  2002/04/30 13:00:27  wiese
 * compileable version
 *
 * Revision 1.6  2002/04/19 15:23:12  wiese
 * Initialversion nach der uebergabe
 *
 */
