/*
 * MiningMart Version 1.0
 * 
 * Copyright (C) 2006 Martin Scholz, Timm Euler, 
 *                    Daniel Hakenjos, Katharina Morik
 *
 * Contact: miningmart@ls8.cs.uni-dortmund.de
 *
 * A list of contributing developers (other than the copyright 
 * holders) can be found at
 * http://mmart.cs.uni-dortmund.de/downloads/download.html
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program, see the file MM_HOME/LICENSE; if not, write
 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301, USA.
 */
package edu.udo.cs.miningmart.operator;
import java.io.File;
import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Vector;

import edu.udo.cs.yale.example.Attribute;
import edu.udo.cs.yale.example.DataRow;
import edu.udo.cs.yale.example.DatabaseDataRowReader;
import edu.udo.cs.yale.example.DoubleArrayDataRow;
import edu.udo.cs.yale.example.Example;
import edu.udo.cs.yale.example.ExampleReader;
import edu.udo.cs.yale.example.ExampleSet;
import edu.udo.cs.yale.example.ExampleTable;
import edu.udo.cs.yale.example.MemoryExampleTable;
import edu.udo.cs.yale.example.SimpleExampleSet;
import edu.udo.cs.yale.operator.OperatorException;
import edu.udo.cs.yale.operator.learner.Model;
import edu.udo.cs.yale.tools.DatabaseHandler;
import edu.udo.cs.yale.tools.Ontology;
import edu.udo.cs.yale.tools.att.AttributeSet;
import edu.udo.cs.miningmart.exception.DbConnectionClosed;
import edu.udo.cs.miningmart.exception.M4CompilerError;
import edu.udo.cs.miningmart.exception.M4Exception;
import edu.udo.cs.miningmart.m4.BaseAttribute;
import edu.udo.cs.miningmart.m4.Columnset;
import edu.udo.cs.miningmart.m4.RelationalDatatypes;
import edu.udo.cs.miningmart.m4.Value;
import edu.udo.cs.miningmart.m4.utils.Print;

/**
 * This operator applies a model written by YALE to a set of examples.
 * 
 * Please note:<ul>
 * <li>Please note, that up to now the order and the names of the predicting attributes
 * has to be identical during learning and application! The relevant order is given by
 * the operator's parameter array, the relvant names in MiningMart are the attribute names
 * at database (SQL) level.</li>
 * <li> The key attribute is ignored by the ModelApplier of YALE. It has to be a unique
 * attribute, otherwise the operator will not work correctly and exit with an exception!
 * It is not necessary for the applier to work to have an ID attribute during learning.
 * </li>
 * </ul>
 * 
 * @author Martin Scholz
 * @version $Id: YaleModelApplier.java,v 1.4 2006/04/11 14:10:11 euler Exp $
 */
public class YaleModelApplier extends ModelApplier {

	/** How many tuples to apply the model for each time? */
	private static final int BLOCKSIZE = 100000;

	/** The Yale model currently in use. */	
	private Model yaleModel;

	/** @return M4 relational datatype for the predicted Column(s) */
	protected String getRelationalDtForNewColumn(BaseAttribute ba) {
		if (this.yaleModel != null && isNumericalYaleAttribute(yaleModel.getLabel())) {
			return RelationalDatatypes.RELATIONAL_DATATYPE_NUMBER;
		}
		else return (RelationalDatatypes.RELATIONAL_DATATYPE_STRING); // default value
	}

	/**
	 * @return the database level attribute of the predicted Column(s)
	 * for creating the table.
	 */
	protected String getDatabaseDtForNewColumn(BaseAttribute ba) throws M4CompilerError {
		Attribute label;
		boolean useBusinessDbCoreForDatabaseDt = true;
		if (this.yaleModel != null && ((label = yaleModel.getLabel()) != null)) {
			if (isNumericalYaleAttribute(label)) {
				return this.getM4Db().getDbNameOfM4Datatype(RelationalDatatypes.RELATIONAL_DATATYPE_NUMBER, 
				                                            0, // no need to find a length of this attribute
				                                            useBusinessDbCoreForDatabaseDt); 
			}
			else {
				// Collection col = label.getValuesAsString();
				Collection col = label.getValues();
				if (col != null) {
					Iterator it = col.iterator();
					int maxLength = 1;
					while (it.hasNext()) {
						String s = (String) it.next();
						if (s != null && s.length() > maxLength)
							maxLength = s.length();
					}
					return this.getM4Db().getDbNameOfM4Datatype(RelationalDatatypes.RELATIONAL_DATATYPE_STRING, 
					                                            maxLength, 
					                                            useBusinessDbCoreForDatabaseDt);
				}	
			}
		}
		return (this.getM4Db().getDbNameOfM4Datatype(RelationalDatatypes.RELATIONAL_DATATYPE_STRING, 
		                                             30, // Default value, maybe 30 is to short?
		                                             useBusinessDbCoreForDatabaseDt)); 
	}

	/**
	 * Predicts the values applying the specified models. 
	 * The models are applied to larger blocks of data, trading of performance when complex
	 * models are applied (which need to be parsed and loaded for each block) against memory
	 * consumption when huge amounts of data need to be processed.
	 */
	protected void predict() throws M4Exception, M4CompilerError {
		try {
			long count;
			{
				Columnset inputCs = this.getInputConcept().getCurrentColumnSet();
				String sql = "SELECT COUNT(*) FROM " + inputCs.getSQLDefinition();
				Long countL = this.executeBusinessSingleValueSqlReadL(sql);
				count = countL.longValue();	
			}
			
			Long toRowNum = null;
			for (int i=0; i<count; i+=BLOCKSIZE) {
				Long fromRowNum = toRowNum;
				toRowNum = new Long(i + BLOCKSIZE);
				String sql = getDatabaseQueryForRows(fromRowNum, toRowNum);
				this.predictionForBlock(sql);
			}
		}
		catch (SQLException e) {
			throw new M4CompilerError("YaleModelApplier: SQLException!\n" + e.getMessage());
		}
	}
	
	/** 
	 * This is the main method of <code>predict()</code> for a single block.
	 * @param sql a <code>String</code> with the SQL statement incorporating the
	 * block restrictions. The current implementation might be ORACLE specific!
	 */
	private void predictionForBlock(String sql)
    	throws SQLException, DbConnectionClosed, M4CompilerError	
	{
		// Create a YALE table in main memory for the specified block:
		ExampleTable yaleTable = createDatabaseExampleTable(sql);
		
		// We need the ExampleSet representation to be able to apply models:
		ExampleSet exampleSet;
		try { 
			exampleSet = this.exampleTableToExampleSet(yaleTable);
		}
		catch (OperatorException e) {
			throw new M4CompilerError(
				"YaleModelApplier: Could not turn ExampleTable into ExampleSet!\n"
				+ e.getMessage());	
		}

		// collect name and Yale attribute of each predicted attribute
		List predictedAttribs = new Vector();
		List yalePredictedAttributes = new Vector();
		
		// Iterate through the loops and apply a model for each loop:
		for (int loop=0; loop<this.getNumberOfLoops(); loop++) {
			
			// Read this loop's output attribute name:
			BaseAttribute curPredBa = this.getPredictedAttribute(loop);
			String outputAttribute = curPredBa.getName();
			predictedAttribs.add(outputAttribute);
			
			// Look for the model file:
			File modelFile = this.getModelFile(loop);
		
			ExampleSet toBeExtended = (ExampleSet) exampleSet.clone();
		
			// Apply the model, new attribute should then be part of the example set:
			Attribute newAttribute = this.applyModel(modelFile, toBeExtended, outputAttribute, loop);
			
			// Store predicted attributes for reading them later:
			yalePredictedAttributes.add(newAttribute);
		}
		
		// collect Yale's key attributes
		List yaleKeyAttributes = getKeyAttributesForExampleSet(exampleSet);

		// Create an ExampleSet from the Yale Example table that is suited for fetching
		// all the key and predicted values during a simple iteration.
		List regularAttributes = new Vector(yaleKeyAttributes);
		regularAttributes.addAll(yalePredictedAttributes);
		SimpleExampleSet ses = new SimpleExampleSet(yaleTable, regularAttributes);
		
		// Finally write the results back to the new database table:
		this.writeResultsToDb(ses, predictedAttribs, yaleKeyAttributes, yalePredictedAttributes);
	}

	/** Helper method identifying the step's key attributes of a specified example set. */
	private List getKeyAttributesForExampleSet(ExampleSet exampleSet) {
		Vector attributes = new Vector();
		Iterator it = this.getKeyColumnNames().iterator();
		while (it.hasNext()) {
			String attribName = (String) it.next();
			
			attributes.add(exampleSet.getAttribute(attribName));
		}		
		return attributes;
	}


	/**
	 * This method calls YALE interface methods to read data from the database
	 * into main memory using the specified query. YALE cares about conversion
	 * issues and a service method creates the attributes list needed later on
	 * by YALE.
	 * 
	 * @param sqlQuery the query to be used when reading data from the database
	 */
    private ExampleTable createDatabaseExampleTable(String sqlQuery)
    	throws SQLException, DbConnectionClosed, M4CompilerError
    {
		// Create the ResultSet:
		ResultSet resultSet = this.executeBusinessSqlRead(sqlQuery);

		// Read the data into the right Yale class:
		List attributesL = DatabaseHandler.createAttributes(resultSet);
		final Attribute[] attributes = (Attribute[]) attributesL.toArray(new Attribute[attributesL.size()]);

		DatabaseDataRowReader ddrr = new DatabaseDataRowReader(attributes, resultSet) {
			/** 
			 * The method <code>next()</code> of class <code>DatabaseDataRowReader</code>
			 * simply returns a <code>Row</code> from the <code>ResultSet</code>. This
			 * implementation converts this tuple into a memory representation, so that
			 * subsequent accesses will work as supposed!
			 */
			public DataRow next() {
				DataRow row = super.next();
				if (row != null) {
					Attribute[] allAttribs = attributes;
					double[] rowValues = new double[allAttribs.length];
					for (int i=0; i<rowValues.length; i++) {
						rowValues[i] = row.get(allAttribs[i]);
					}
					row = new DoubleArrayDataRow(rowValues);
				}
				return row;
			}
		};
		
		// Creates a table that is completely stored in main memory:
		ExampleTable table = new MemoryExampleTable(attributesL, ddrr);

		// The ResultSet is no longer necessary.
		resultSet.close();
		return table;
    }
	
	/**
	 * Helper method, creates an ExampleSet from a given ExampleTable, which
	 * are both internal YALE data structures. The keys are interpreted
	 * as special purpose attributes, while attributes available for making the
	 * predicting are the regular attributes.
	 * 
	 * @param yaleTable an <code>ExampleTable</code>
	 * @return a corresponding <code>ExampleSet</code>
	 */
	private ExampleSet exampleTableToExampleSet(ExampleTable yaleTable) 
		throws OperatorException
	{
		// Create a List of all "regular attributes".
		List regularAttributes = new Vector();
		Iterator it = this.getPredictionColumnNames().iterator();
		while (it.hasNext()) {
			String attribName = (String) it.next();
			Attribute attrib = null;
			try {
				attrib = yaleTable.findAttribute(attribName);
			}
			catch (OperatorException oe) {
				// for attributes from databases upper cases may be needed:
				attrib = yaleTable.findAttribute(attribName.toUpperCase());
			}			
			if (attrib == null) {
				this.doPrint(Print.COMPILER_OP_CONTROL, "Warning: Attribute '" + attribName + "' not found!");
			}
			else regularAttributes.add(attrib);
		}

		// Keys are special attributes, so we set them on the appropriate list.
		Map specialAttributes = new HashMap();			
		it = this.getKeyColumnNames().iterator();
		while (it.hasNext()) {
			String attribName = (String) it.next();
			Attribute attrib = null;
			try {
				attrib = yaleTable.findAttribute(attribName);
			}
			catch (OperatorException oe) {
				// for attributes from databases upper cases may be needed:
				attrib = yaleTable.findAttribute(attribName.toUpperCase());
			}
			if (attrib != null) {
				specialAttributes.put(attribName, attrib);
			}
		}

		// Creates an ExampleSet from the ExampleTable using the
		// attribute descriptions created above.
		AttributeSet attributeSet = new AttributeSet(regularAttributes, specialAttributes);
		return yaleTable.createExampleSet(attributeSet);		
	}


	/** 
	 * This is the main applier model. It reads a model from a flat file and applies it
	 * to a given <code>ExampleTable</code> of YALE, creating a new attribute.
	 * 
	 * @param modelFile a <code>File</code> pointing to a YALE model file
	 * @param exampleSet an <code>ExampleSet</code> as used by YALE
	 * @param targetAttribute the name of the output <code>Column</code> in the database
	 * @param loop the current loop number
	 * @return the predicted YALE <code>Attribute</code> created for the model application
	 */
	protected Attribute applyModel(File modelFile, ExampleSet exampleSet, String targetAttribute, int loop) 
		throws M4CompilerError
	{
		try { // First make sure that the model is readable.
			yaleModel = Model.readModel(modelFile);
			
			// Add a new attribute for the predictions in the new database table:
			this.createPredictedAttributeInDb(loop);
			
			// Then create an attribute for the predicted label in memory:
			Attribute predictedLabel = yaleModel.createPredictedLabel(exampleSet, targetAttribute);
			/* {
		  	Attribute predictedLabel = new Attribute(yaleModel.getLabel(), ExampleSet.PREDICTION_NAME);
		  	predictedLabel.setName(targetAttribute);
		  	exampleSet.getExampleTable().addAttribute(predictedLabel); // wenn das nicht klappt: setConstructionDescription(targetAttribute) oder so
  			exampleSet.setPredictedLabel(predictedLabel);
			} */

			// Finally we can apply the model to the example set!			
			yaleModel.apply(exampleSet);
			
			// The predicted attribute is returned for further processing:
			return predictedLabel;
		}
		catch (IOException e) {
			throw new M4CompilerError("YaleModelApplier: Could not load model!\n" + e.getMessage());
		}
		catch (OperatorException e) {
			throw new M4CompilerError(
				"YaleModelApplier: YALE's OperatorException occured during 'applyModel'!\n"
				+ e.getMessage());
		}
	}

	/** 
	 * After the attributes to be predicted have been filled this method writes
	 * the results to the target table. This table has been created before and it
	 * consists of the primary key attributes of the original table and the newly
	 * predicted attributes.
	 * 
	 * @param table the <code>ExampleSet</code> holding the results of prediction
	 * @param predictedAttribs a <code>List</code> of <code>String</code>s containing
	 * the names of the predicted attributes in the database
	 * @param yaleKeyAttribs a <code>List</code> of Yale's key <code>Attribute</code>s
	 * in the same order as the List returned by <code>getKeyColumnNames()</code>.
	 * @param yalePredAttribs a <code>List</code> of Yale's predicted <code>Attribute</code>s
	 * in the order as they are looped.
	 */
	private void writeResultsToDb(ExampleSet exampleSet, List predictedAttribs, List yaleKeyAttribs, List yalePredAttribs)
		throws M4CompilerError
	{
		// *** Gather attribute names of keys ***
		List keyNameCol = this.getKeyColumnNames();

		// The underlying List of MiningMart attribute names and Yale attributes are expected to match!
		if (predictedAttribs == null || yaleKeyAttribs == null || yalePredAttribs == null
		   || yalePredAttribs.size() != predictedAttribs.size()
		   || yaleKeyAttribs.size() != keyNameCol.size())
		{
			throw new M4CompilerError("Operator 'YaleModelApplier':"
				+ " Yale and Database attributes did not match in method 'writeResults'!");
		}
			
		// *** Prepare SQL statement for updates ***
		final String targetTable = this.getTargetTableName();
		final String sqlPre = "UPDATE " + targetTable + " SET "; // + predictedAttribs
		final String sqlCond = " WHERE "; // + keyNames
			
		// *** Write results to target table: ***	
		// Ontology yaleOnto = Ontology.ATTRIBUTE_VALUE_TYPE;
		ExampleReader er = exampleSet.getExampleReader();
		while (er.hasNext()) {
			Example example = er.next();
			
			// Creates the attribute-value pairs of the predicted attributes to be set, or null if no attribute
			// needs to be set.
			String setPart = this.createSqlAttributeValueString(example, predictedAttribs, yalePredAttribs, ", ");
			
			// We do not continue if no attribute needs to be updated for the current tuple!
			if (setPart != null) {
				String condPart = this.createSqlAttributeValueString(example, keyNameCol, yaleKeyAttribs, " AND ");
				String sql = sqlPre + setPart + sqlCond + condPart;

				try {	
					this.executeBusinessSqlWrite(sql);
				}
				catch (SQLException e) {
					throw new M4CompilerError(
						"YaleModelApplier: SQLException occured when trying to insert tuple into table,\n"
						+ "SQL statement was: " + sql + "\n" + e.getMessage());
				}
			}
		}
		try {
			this.getM4Db().commitBusinessTransactions();
		}
		catch (SQLException sqle) {
			throw new M4CompilerError("YaleModelApplier: SQL error when committing a block: " + sqle.getMessage());
		}
	}

	/**
	 * Helper method, builds the part "predictedAttribute1='A', predictedAttribute2=5, ..." 
	 * or "key1=10 AND key2=20". Returns <code>null</code> if only NULL values appear.
	 */
	private String createSqlAttributeValueString(Example example, List predictedAttribs, List yaleAttributes, String separator)
	{
		StringBuffer setList = new StringBuffer();
		Iterator yaleIt = yaleAttributes.iterator();
		Iterator mmIt = predictedAttribs.iterator();
		while (yaleIt.hasNext()) {
			// Fetch the current attribute value and convert it to the String used in the query:
			Attribute curAttrib = (Attribute) yaleIt.next();
			String valueS = example.getValueAsString(curAttrib);
			if (valueS != null) { // we omit setting new database fields to NULL
				if ( ! isNumericalYaleAttribute(curAttrib))	{
					valueS = "'" + valueS + "'";
				}
				// Append "PredictedAttribute=value, "
				setList.append((String) mmIt.next() + "=" + valueS + separator);
			}
		}
		if (setList.length() > 0) {
			return setList.substring(0, setList.length() - separator.length());
		}
		else return null;
	}

	/** Helper method, true if the specified YALE attribute is numerical. */
	private static boolean isNumericalYaleAttribute(Attribute attribute) {
		Ontology types = Ontology.ATTRIBUTE_VALUE_TYPE;
		return (attribute != null && types.isA(attribute.getValueType(), Ontology.NUMERICAL));
	}

	// *** Parameter specific for the YaleModelApplier: ****

	/** @return the path to the YALE model file to be used. */
	public File getModelFile(int loop) throws M4CompilerError {
		Value value = (Value) this.getSingleParameter("ModelFile", loop);
		if (value == null || value.getValue() == null) {
			throw new M4CompilerError(
				"YaleModelApplier: Mandatory parameter 'ModelFile' missing (loop number: "
				+ loop + ") !");
		}
		
		String path = value.getValue();
		File file = new File(path);
		if (file.exists() && file.isFile()) {
			return file;	
		}
		
		throw new M4CompilerError(
			"YaleModelApplier: ModelFile not found (loop number: " + loop + ")!\n"
			+ "Specified path was: '" + path + "'"
		);
	}

}
/*
 * Historie
 * --------
 *
 * $Log: YaleModelApplier.java,v $
 * Revision 1.4  2006/04/11 14:10:11  euler
 * Updated license text.
 *
 * Revision 1.3  2006/04/06 16:31:11  euler
 * Prepended license remark.
 *
 * Revision 1.2  2006/01/05 10:27:38  hakenjos
 * Removed Javadoc Warnings!
 *
 * Revision 1.1  2006/01/03 09:54:21  hakenjos
 * Initial version!
 *
 */
