/*
 * MiningMart Version 1.1
 * 
 * Copyright (C) 2006 Martin Scholz, Timm Euler, 
 *                    Daniel Hakenjos, Katharina Morik
 *
 * Contact: miningmart@ls8.cs.uni-dortmund.de
 *
 * A list of contributing developers (other than the copyright 
 * holders) can be found at
 * http://mmart.cs.uni-dortmund.de/downloads/download.html
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program, see the file MM_HOME/LICENSE; if not, write
 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301, USA.
 */
package edu.udo.cs.miningmart.operator;

import java.sql.SQLException;

import edu.udo.cs.miningmart.compiler.utils.DrawSample;

import edu.udo.cs.miningmart.exception.M4CompilerError;
import edu.udo.cs.miningmart.exception.M4Exception;
import edu.udo.cs.miningmart.m4.Columnset;
import edu.udo.cs.miningmart.m4.Value;

/**
 * This operator randomly selects rows with a probability that is computed
 * such that roughly as many rows are selected as are given in the parameter HowMany.
 * 
 * @author Timm Euler
 * @version $Id: RowSelectionByRandomSampling.java,v 1.5 2006/09/27 14:59:55 euler Exp $
 */
public final class RowSelectionByRandomSampling extends RowSelection {
	
	/**
	 * Overrides the method from RowSelection because an own definition format is needed.
	 * 
	 * @see edu.udo.cs.miningmart.m4.core.operator.SingleCSOperator#generateSQLDefinition(String)
	 * @see edu.udo.cs.miningmart.m4.core.operator.RowSelection#generateSQLDefinition(String)
	 */
    public String generateSQLDefinition(String selectPart) throws M4CompilerError
    {
    	try {
			long size = this.getHowMany();
			Columnset cs = this.getInputConcept().getCurrentColumnSet();
			new DrawSample(cs, this.getNewCSName(), "tmp_" + this.getStep().getId(), null, size, null, this.getM4Db());
    	
			this.getM4Db().commitBusinessTransactions();
			this.getM4Db().addTableToTrash(this.getNewCSName(), this.getInputConcept().getCurrentColumnSet().getSchema(), this.getStep().getId());
			this.getM4Db().commitM4Transactions();
    	}
    	catch (SQLException sqle) {
			throw new M4CompilerError(
				"RowSelectionByRandomSampling: could not commit changes to the database:\n"
				+ sqle.getMessage());
    	}
   		catch (M4Exception m4e)
   		{   throw new M4CompilerError("M4 interface error in " + this.getName() + ": " + m4e.getMessage());  } 
		return this.getNewCSName();
    } // end public Columnset generateSQLDefinition
    
    // only needed for java compiler...
    public String generateConditionForOp()
    {   return null;   }    

    /**
     * Overrides the method from RowSelection because for Sampling a table is
     * more efficient.
     * 
     * @see edu.udo.cs.miningmart.m4.core.operator.ConceptOperator#getTypeOfNewColumnSet(int)
     */
    public String getTypeOfNewColumnSet()
    {  return Columnset.CS_TYPE_TABLE;  }
    
    public int getHowMany() throws M4CompilerError {
		Value v = (Value) this.getSingleParameter("HowMany");
		try {
			return Integer.parseInt(v.getValue());
		}
		catch (NumberFormatException e) {
			throw new M4CompilerError
			("Parameter 'HowMany' of Operator RowSelectionByRandomSampling does not"
			+ " contain an integer value: " + v.getValue());
		}
		catch (NullPointerException e) {
			throw new M4CompilerError
			("Parameter 'HowMany' of Operator RowSelectionByRandomSampling not found!");
		}
    }

}
/*
 * Historie
 * --------
 * 
 * $Log: RowSelectionByRandomSampling.java,v $
 * Revision 1.5  2006/09/27 14:59:55  euler
 * New version 1.1
 *
 * Revision 1.4  2006/04/11 14:10:10  euler
 * Updated license text.
 *
 * Revision 1.3  2006/04/06 16:31:10  euler
 * Prepended license remark.
 *
 * Revision 1.2  2006/03/23 11:13:45  euler
 * Improved exception handling.
 *
 * Revision 1.1  2006/01/03 09:54:21  hakenjos
 * Initial version!
 *
 */

/*
 * Old Historie
 * ------------
 *
 * Revision 1.19  2003/07/11 10:59:12  euler
 * Organized Imports.
 *
 * Revision 1.18  2003/06/24 13:25:42  scholz
 * Now this operator simply makes use of edu.udo.cs.miningmart.m4.core.utils.DrawSample to draw a sample of the specified size.
 *
 * Revision 1.17  2003/06/05 09:52:02  euler
 * Organised imports.
 *
 * Revision 1.16  2003/01/28 09:43:43  euler
 * Bugfix: tries to remove sample table now
 * before it is created.
 *
 * Revision 1.15  2003/01/21 15:01:53  scholz
 * replaced direct Statement-based database accesses by use of new java.utils.DB methods
 *
 * Revision 1.14  2002/12/03 18:14:00  euler
 * Bugfix.
 *
 * Revision 1.13  2002/11/29 17:38:32  euler
 * Updated random sampling.
 *
 * Revision 1.12  2002/11/29 13:55:52  euler
 * Bugfix.
 *
 * Revision 1.11  2002/11/28 11:13:23  euler
 * Creates a table instead of a view now.
 *
 * Revision 1.10  2002/11/26 17:57:47  euler
 * Updates on Feature Selection and SVMs.
 *
 * Revision 1.9  2002/11/26 14:20:38  euler
 * Updates for random sampling.
 *
 * Revision 1.8  2002/11/25 08:31:41  euler
 * Bugfix.
 *
 * Revision 1.7  2002/11/22 13:37:41  euler
 * Bugfix: compute count for columnset
 * if it does not exist.
 *
 * Revision 1.6  2002/10/08 18:07:56  scholz
 * Prepared code for parallel execution of multiple compile-Threads.
 * Calls to static fields like DB.m4Db were removed. Now each
 * Thread has its own DB object, reachable via
 * CompilerAccessLogic or Case.
 * The methods getCase() and getM4Db() were added to M4Object.
 * The static methods of Parameter now need an additional
 * parameter of type DB.
 * All direct calls from Operators to these Parameter methods were
 * removed.
 * All old load() and print() routines were removed.
 * The static calls to Print were removed. Now CompilerAccessLogic
 * references a valid Print object for the current Thread. This is
 * reachable via Case. The methods doPrint for messages and
 * Exceptions were added to M4Object.
 * The Print mechanism is not fully functional, yet.
 * A getStatus method was added to the Interface. It is not
 * functional yet for multiple Threads.
 *
 * Status: Compiles.
 *
 * Revision 1.5  2002/08/05 10:35:48  euler
 * Restructured the operator hierarchy: introduction of SingleCSOperator
 * and MultipleCSOperator. Changed this class accordingly.
 *
 * Revision 1.4  2002/05/21 12:51:01  euler
 * First beta test ok.
 *
 * Revision 1.3  2002/05/15 10:36:43  euler
 * First version that compiles.
 *
 * Revision 1.2  2002/05/07 13:06:56  wiese
 * get+set methods
 *
 * Revision 1.1  2002/04/30 13:00:29  wiese
 * compileable version
 *
 * Revision 1.6  2002/04/19 15:23:12  wiese
 * Initialversion nach der uebergabe
 *
 */


