/*
 * MiningMart Version 1.1
 * 
 * Copyright (C) 2006 Martin Scholz, Timm Euler, 
 *                    Daniel Hakenjos, Katharina Morik
 *
 * Contact: miningmart@ls8.cs.uni-dortmund.de
 *
 * A list of contributing developers (other than the copyright 
 * holders) can be found at
 * http://mmart.cs.uni-dortmund.de/downloads/download.html
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program, see the file MM_HOME/LICENSE; if not, write
 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301, USA.
 */
package edu.udo.cs.miningmart.compiler.utils;

import java.sql.SQLException;
import java.util.Collection;
import java.util.Iterator;

import edu.udo.cs.miningmart.db.CompilerDatabaseService;
import edu.udo.cs.miningmart.exception.M4CompilerError;
import edu.udo.cs.miningmart.m4.Columnset;

/**
 * Objects of this class randomly split a source columnset into partitions of approximately
 * equal size.
 * @author Martin Scholz
 * @version $Id: RandomPartition.java,v 1.5 2006/09/29 17:20:01 euler Exp $
 */
public class RandomPartition extends Sampling {

	private final String    destTableNames[];
	private final boolean   stopWithTempTable;

	/**
	 * This constructor does not affect in drawing a single sample, but the
	 * source columnset is randomly split into partitions of equal size.
	 * @param sourceCs the source <code>Columnset</code> to draw a sample from
	 * @param selectedColumns if not <code>null</code> then this <code>Collection</code>
	 *        of <code>Column</code> names in uppercase letters specifies, which
	 *        <code>Column</code>s to copy into the partitioned tables.
	 *        <code>null</code> means to copy all <code>Column</code>s of the
	 *        <code>Columnset</code>.
	 * @param destTableNames an array specifying the names of the output tables.
	 * @param stopWithTempTable
	 *        If this value is true, then the output of the object is the temporary
	 *        table, rather than the segmented tables. The array destTableNames
	 *        must not be <code>null</code> in this case, because it implicitly
	 * 		  specifies the number of segments.
	 * @param tempTable the name of the temporary table used by this class
	 * @param rowcount the number of rows in the source <code>Columnset</code>,
	 * 		  or <code>null</code>, if this value is not known in advance.
	 * 		  The number of rows is calculated by the class in the latter case.
	 * @param seed the random seed to be used or <code>null</code> to use a
	 * 		  &quot;random&quot; random seed.
	 * @param db a reference to the thread's <code>edu.udo.cs.miningmart.m4.core.utils.DB</code>
	 *        object.
	 * @throws M4CompilerError if the sampling fails.
	 * */
	public RandomPartition( Columnset sourceCs,
							Collection selectedColumns,
					        String[] destTableNames,
					        boolean stopWithTempTable,
						    String tempTable,
					   	    Long rowcount,
					        Long seed,
					        CompilerDatabaseService db )
		throws M4CompilerError
	{
		super(sourceCs, selectedColumns, tempTable, rowcount, seed, db);
		this.destTableNames = destTableNames;
		this.stopWithTempTable = stopWithTempTable;

		try {
			// Create a table with the rownumbers of the selected tuples:
			createTempTable();
			
			if (!this.isStopWithTempTable()) {
				// Join this temporary table's segments with the source columnset.
				joinSourceWithTempTable();
			
				// Delete the temporary table:
				this.deleteTable(this.getTempTableName());
			}
			
			// Commit the changes.
			db.commitBusinessTransactions();
		}
		catch (SQLException e) {
			throw new M4CompilerError(
				"miningmart.compiler.utils.DrawSample:\n"
				+ e.getMessage());
		}
	}

	/**
	 * @param i the number of the segment 
	 * @return the destination table name for the given segment or
	 *         <code>null</code>, if the index is out of bound or the
	 *         array is <code>null</code>.
	 * */
	private String getDestTableName(int i) {
		String[] dtm = this.destTableNames;
		if ((dtm != null) && (i>0) && (i<dtm.length)) {
			return this.destTableNames[i];
		}
		else {
			return null;	
		}
	}
	
	/** @return number of segments */
	private int getHowMany() {
		return this.destTableNames.length;	
	}

	private int getNextRandomPartition() {
		double d = this.getNextRandomDouble();
		d *= this.getHowMany();
		return ((int) d);
	}

	/** 
	 * This method creates a temporary table (name is provided as a parameter to the constructor),
	 * which has a two attributes - the_Id and the_Segment. The table contains the row numbers of
	 * the source columnset together with the partition they are in. Partition numbers start with 0.
	 * */
	private void createTempTable()
		throws M4CompilerError, SQLException
	{
		// create empty temporary table for selected row numbers
		this.deleteTable(this.getTempTableName());
		
		this.dbWrite("CREATE TABLE " + this.getTempTableName() + 
		             " ( the_Id " + this.numericDatatypeName + 
		             ", the_Segment " + this.numericDatatypeName + " )");

		// Prepare SQL string for inserting numbers into table:
		final String prefix = "INSERT INTO " + this.getTempTableName() + " VALUES ( ";
		final String separ  = ", ";
		final String suffix = " )";

		// Loop over number of rows:	
		// final int howMany = this.getHowMany();
		long loops = this.getRowCount();
		long commitCounter = 0;
		Iterator it = null;
		if (this.usingPostgres) {
			it = this.allRowIds.iterator();
		}
		
		for (long i=1; i<=loops; i++) {
			Long nextNumber = (this.usingPostgres ? ((Long) it.next()) : new Long(i));
			int partition = this.getNextRandomPartition();
			// Insert partition information into temporary table:
			this.dbWrite(prefix + nextNumber + separ + partition + suffix);
			if (++commitCounter >= COMMIT_LIMIT) {
					this.commit();
					commitCounter = 0;	
			}
		}
	}
	
	private void joinSourceWithTempTable()
		throws M4CompilerError, SQLException
	{
		if (this.rowIdentifierName == null) {
			throw new M4CompilerError("Cannot use a unique row identifier NULL (MySql does not support this operation)!");
		}
		int howMany = this.getHowMany();
		for (int segmentNumber=0; segmentNumber<howMany; segmentNumber++) {
	
			final String destTable = this.getDestTableName(segmentNumber);
			final String sqlDefs   = this.getSourceAttributeDefinitions();
			final String colNames  = this.getSourceAttributes();

			// Remove target table, if it already exists:
			this.deleteTable(destTable);

		   // Find attribute name not yet defined in source columnset:
			String idAttr = "ROW_NUM_";
			while (colNames.indexOf(idAttr) >= 0) {
				idAttr += "Z"; // append Zs until the name is unique
			}

			// Construct query that joins the tables and materializes the result:
			String innerQuery =
				"( SELECT " + this.rowIdentifierName + " AS " + idAttr + ", "
				+ sqlDefs + " FROM "
				+ this.getSourceTableName() + " )";
		
			String query =
					"CREATE TABLE " + destTable
					+ " AS SELECT " + colNames
					+ " FROM " + this.getTempTableName() + " T, "
					+ innerQuery + " S"
					+" WHERE t.the_Segment = " + segmentNumber
					+ " AND s." + idAttr + " = t.the_Id";
	
			this.dbWrite(query);
		}
	}
	
	/**
	 * @return true, if the object's result should be the temporary table
	 */
	public boolean isStopWithTempTable() {
		return stopWithTempTable;
	}

}
/*
 * Historie
 * --------
 *
 * $Log: RandomPartition.java,v $
 * Revision 1.5  2006/09/29 17:20:01  euler
 * Still some mysql bugs
 *
 * Revision 1.4  2006/09/27 15:00:04  euler
 * New version 1.1
 *
 * Revision 1.3  2006/04/11 14:10:19  euler
 * Updated license text.
 *
 * Revision 1.2  2006/04/06 16:31:18  euler
 * Prepended license remark.
 *
 * Revision 1.1  2006/01/03 09:54:35  hakenjos
 * Initial version!
 *
 */
