/*
 * MiningMart Version 1.0
 * 
 * Copyright (C) 2006 Martin Scholz, Timm Euler, 
 *                    Daniel Hakenjos, Katharina Morik
 *
 * Contact: miningmart@ls8.cs.uni-dortmund.de
 *
 * A list of contributing developers (other than the copyright 
 * holders) can be found at
 * http://mmart.cs.uni-dortmund.de/downloads/download.html
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program, see the file MM_HOME/LICENSE; if not, write
 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301, USA.
 */
package edu.udo.cs.miningmart.compiler.utils;

import java.sql.SQLException;
import java.util.Collection;
import java.util.Iterator;

import edu.udo.cs.miningmart.db.CompilerDatabaseService;
import edu.udo.cs.miningmart.exception.M4CompilerError;
import edu.udo.cs.miningmart.m4.Columnset;


/**
 * This class encapsulates the frequently occoring sampling of data
 * from a database table. It can for instance be instantiated from
 * operators working on samples. Different constructors allow to
 * leave the calculation of unknown values to this class, so it may
 * for instance be invoked either with a sample size or with a sample
 * ratio.
 * 
 * @author Martin Scholz
 * @version $Id: DrawSample.java,v 1.3 2006/04/11 14:10:18 euler Exp $
 */
public class DrawSample extends Sampling {

	private final String destTable;

	private final double ratio; // sample ratio: probability to select a specific tuple

	/**
	 * Default version of the constructor:
	 * <ul>
	 * <li>Random numbers are not fixed by specifying the random seed.</li>
	 * <li>The number of rows is not known in advance.</li>
	 * <li>A ratio is given rather than a sample size.</li>
	 * </ul>
	 * @param sourceCs the source <code>Columnset</code> to draw a sample from
	 * @param destTable the name of the output table
	 * @param tempTable the name of the temporary table used by this class
	 * @param ratio the sample ratio, a value in <code>[0, 1]</code>.
	 * @param db a reference to the thread's <code>edu.udo.cs.miningmart.m4.core.utils.DB</code>
	 *        object.
	 * @throws M4CompilerError if the sampling fails.
	 * */
	public DrawSample( Columnset sourceCs,
					   String    destTable,
					   String    tempTable,
					   double    ratio,
					   CompilerDatabaseService db )
		throws M4CompilerError
	{
		this(sourceCs, destTable, tempTable, null, ratio, null, db);			
	}


	/**
	 * @param sourceCs the source <code>Columnset</code> to draw a sample from
	 * @param destTable the name of the output table
	 * @param tempTable the name of the temporary table used by this class
	 * @param rowcount the number of rows in the source <code>Columnset</code>,
	 * 		  or <code>null</code>, if this value is not known in advance.
	 * 		  The number of rows is calculated by the class in the latter case.
	 * @param ratio the sample ratio, a value in <code>[0, 1]</code>.
	 * @param seed the random seed to be used or <code>null</code> to use a
	 * 		  &quot;random&quot; random seed.
	 * @param db a reference to the thread's <code>edu.udo.cs.miningmart.m4.core.utils.DB</code>
	 *        object.
	 * @throws M4CompilerError if the sampling fails.
	 * */
	public DrawSample( Columnset sourceCs,
					   String destTable,
					   String tempTable,
					   Long rowcount,
					   double ratio,
					   Long seed,
					   CompilerDatabaseService db )
		throws M4CompilerError
	{
		super(sourceCs, null, tempTable, rowcount, seed, db);
		this.destTable   = destTable;
		this.ratio       = ratio;
		
		try {
			// Create a table with the rownumbers of the selected tuples:
			createTempTable();

			// Join this temporary table with the source columnset:
			joinSourceWithTempTable();

			// Delete the temporary table:
			this.deleteTable(this.getTempTableName());
			
			// Commit the changes.
			db.commitBusinessTransactions();
		}
		catch (SQLException e) {
			this.deleteTable(this.getTempTableName());
			this.deleteTable(this.getDestTableName());

			throw new M4CompilerError(
				"miningmart.compiler.utils.DrawSample:\n"
				+ e.getMessage());
		}
	}

	/**
	 * @param sourceCs the source <code>Columnset</code> to draw a sample from
	 * @param destTable the name of the output table
	 * @param tempTable the name of the temporary table used by this class
	 * @param sampleSize the number of tuples the sample is approximately going to have
	 * @param rowcount the number of rows in the source <code>Columnset</code>,
	 * 		  or <code>null</code>, if this value is not known in advance.
	 * 		  The number of rows is calculated by the class in the latter case.
	 * @param seed the random seed to be used or <code>null</code> to use a
	 * 		  &quot;random&quot; random seed.
	 * @param db a reference to the thread's <code>edu.udo.cs.miningmart.m4.core.utils.DB</code>
	 *        object.
	 * @throws M4CompilerError if the sampling fails.
	 * */
	public DrawSample( Columnset sourceCs,
					   String destTable,
					   String tempTable,
					   Long rowcount,
					   long sampleSize,
					   Long seed,
					   CompilerDatabaseService db )
		throws M4CompilerError
	{	
		this(sourceCs, null, destTable, tempTable, rowcount,
			 sampleSize, seed, db);
	}

	/**
	 * @param sourceCs the source <code>Columnset</code> to draw a sample from
	 * @param selectedColumns a <code>Collection</code> with column names <b>in upper
	 *        case letters</b>. Specifies the subset of columns of the source columnset
	 *        to be contained in the sample table.
	 *        <code>null</code> indicates to select all columns.
	 * @param destTable the name of the output table
	 * @param tempTable the name of the temporary table used by this class
	 * @param sampleSize the number of tuples the sample is approximately going to have
	 * @param rowcount the number of rows in the source <code>Columnset</code>,
	 * 		  or <code>null</code>, if this value is not known in advance.
	 * 		  The number of rows is calculated by the class in the latter case.
	 * @param seed the random seed to be used or <code>null</code> to use a
	 * 		  &quot;random&quot; random seed.
	 * @param db a reference to the thread's <code>edu.udo.cs.miningmart.m4.core.utils.DB</code>
	 *        object.
	 * @throws M4CompilerError if the sampling fails.
	 * */
	public DrawSample( Columnset  sourceCs,
					   Collection selectedColumns,
					   String     destTable,
					   String     tempTable,
					   Long       rowcount,
					   long       sampleSize,
					   Long       seed,
					   CompilerDatabaseService         db )
		throws M4CompilerError
	{	
		super(sourceCs, selectedColumns, tempTable, rowcount, seed, db);
		this.destTable   = destTable;
		this.ratio       = ((double) sampleSize) / this.getRowCount();
		
		try {
			createTempTable();
			joinSourceWithTempTable();
			this.deleteTable(this.getTempTableName());
			db.commitBusinessTransactions();
		}
		catch (SQLException e) {
			this.deleteTable(this.getTempTableName());
			this.deleteTable(this.getDestTableName());
	
			throw new M4CompilerError(
				"miningmart.compiler.utils.DrawSample:\n"
				+ e.getMessage());
		}
		finally {
			if (this.materializedInput) {
				this.deleteTable(this.getSourceTableName());
			}
		}
	}


	/** @return name of the destination table */	
	public String getDestTableName() {
		return this.destTable;
	}

	/**
	 * @return a random boolean value. The probability of receiving <code>true</code>
	 * is equal to the variable <code>ratio</code> specified in the constructor.
	 * */
	public boolean getNextBoolean() {
		return (this.getNextRandomDouble() <= this.ratio);
	}

	/** 
	 * This method creates a temporary table (name is provided as a parameter to the constructor),
	 * which has a single attribute - the_Id. The table contains the row numbers of the tuples
	 * selected by this operator.
	 * */
	private void createTempTable()
		throws M4CompilerError, SQLException
	{
		// create empty temporary table for selected row numbers
		this.deleteTable(this.getTempTableName());
		this.dbWrite("CREATE TABLE " + this.getTempTableName() + " ( the_Id " + this.numericDatatypeName + " )");

		// Prepare SQL string for inserting numbers into table:
		final String prefix = "INSERT INTO " + this.getTempTableName() + " VALUES ( ";
		final String suffix = " )";

		// Loop over number of rows:	
		long loops = this.getRowCount();
		long commitCounter = 0;
		
		Iterator it = null;
		if (this.usingPostgres) {
			it = this.allRowIds.iterator();
		}
		
		for (long i=1; i<=loops; i++) {
			Long nextNumber = (this.usingPostgres ? ((Long) it.next()) : new Long(i));
			if (this.getNextBoolean()) {
				// Insert selected numbers into temporary table:
				this.dbWrite(prefix + nextNumber.toString() + suffix);
				if (++commitCounter >= COMMIT_LIMIT) {
					this.commit();
					commitCounter = 0;	
				}
			}
		}
		this.commit();
	}

	/** 
	 * This method creates the destination table, the name of which is provided as a parameter
	 * to the constructor. The table results from a join of the formerly created temporary table
	 * and the source table.
	 * */
	private void joinSourceWithTempTable()
		throws M4CompilerError, SQLException
	{		
		final String destTable = this.getDestTableName();
		final String sqlDefs   = this.getSourceAttributeDefinitions();
		final String colNames  = this.getSourceAttributes();

		// Remove target table, if it already exists:
		this.deleteTable(destTable);

	   // Find attribute name not yet defined in source columnset:
		String idAttr = "ROW_NUM_";
		while (colNames.indexOf(idAttr) >= 0) {
			idAttr += "Z"; // append Zs until the name is unique
		}

		// Construct query that joins the tables and materializes the result:
		String innerQuery = "";
		//if (this.usingPostgres) {
		//	innerQuery = this.getSourceTableName();
		//}
		// else {
			innerQuery =
			"( SELECT " + this.rowIdentifierName + " AS " + idAttr + ", "
			+ sqlDefs + " FROM "
			+ this.getSourceTableName() + " )";
		//}
		
		String query =
				"CREATE TABLE " + destTable
				+ " AS SELECT " + colNames
				+ " FROM " + this.getTempTableName() + " T, "
				+ innerQuery + " S"
				+" WHERE s." + idAttr + " = t.the_Id";
	
		this.dbWrite(query);
	}
	
	/*
	public static void main(String[] args) throws Exception {
		String dbconfig = "/home/scholz/tmp/mm/db.config3";
		int verbosity   = 5;
		long   csid     = 100001;
		double ratio    = 0.00001;

		DB db = (new CompilerAccessLogic(dbconfig, verbosity)).getM4db();
		Columnset cs = new Columnset(db);
		cs.setId(csid);
		db.readColumnsetFromDB(cs);
		
		System.out.println(
			"DrawSample(cs, \"V3\", \"V3_TMP\", " + ratio + ", null, null, <db>)"
		);
		
		new DrawSample(cs, "V3", "V3_TMP", null, ratio, null, db);

		System.out.println("done.");
	}
	*/	
}
/*
 * Historie
 * --------
 *
 * $Log: DrawSample.java,v $
 * Revision 1.3  2006/04/11 14:10:18  euler
 * Updated license text.
 *
 * Revision 1.2  2006/04/06 16:31:18  euler
 * Prepended license remark.
 *
 * Revision 1.1  2006/01/03 09:54:35  hakenjos
 * Initial version!
 *
 */
