/*
 * MiningMart Version 1.0
 * 
 * Copyright (C) 2006 Martin Scholz, Timm Euler, 
 *                    Daniel Hakenjos, Katharina Morik
 *
 * Contact: miningmart@ls8.cs.uni-dortmund.de
 *
 * A list of contributing developers (other than the copyright 
 * holders) can be found at
 * http://mmart.cs.uni-dortmund.de/downloads/download.html
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program, see the file MM_HOME/LICENSE; if not, write
 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301, USA.
 */
package edu.udo.cs.miningmart.operator;

import java.util.Iterator;
import java.util.StringTokenizer;
import java.util.Vector;

import edu.udo.cs.miningmart.db.DB;
import edu.udo.cs.miningmart.exception.M4CompilerError;
import edu.udo.cs.miningmart.exception.M4Exception;
import edu.udo.cs.miningmart.m4.BaseAttribute;
import edu.udo.cs.miningmart.m4.EstimatedStatistics;
import edu.udo.cs.miningmart.m4.Step;
import edu.udo.cs.miningmart.m4.Value;

/**
 * @author Timm Euler
 * @version $Id: RowSelectionByQuery.java,v 1.6 2006/04/11 14:10:12 euler Exp $
 */
public final class RowSelectionByQuery extends RowSelection {

    public String generateConditionForOp() throws M4CompilerError {       
    	try {
    		String sqlCondition = "";
            Value rightCond;
            String rightExpr;
	        for (int i = 0; i < this.getNumberOfLoops(); i++)
            {
                rightCond = this.getTheRightCondition(i);
                if ((rightCond.isLong()) ||
                    ( ! this.getTheConditionOperator(i).equalsIgnoreCase("=")))
                {   rightExpr = rightCond.getValue();  }
                else
                {   rightExpr = DB.quote(rightCond.getValue());   }


                sqlCondition = sqlCondition + "(" +
		                         this.getTheLeftCondition(i).getCurrentColumn().getSQLDefinition() + " " +
		                         this.getTheConditionOperator(i) + " " + rightExpr + ") AND ";
            }
	        // Delete the last "AND "
	        sqlCondition = sqlCondition.substring(0, sqlCondition.length() - 4);
	        return sqlCondition;
    	}
   		catch (M4Exception m4e)
   		{   throw new M4CompilerError("M4 interface error in " + this.getName() + ": " + m4e.getMessage());  } 
    }

    /**
     * @see edu.udo.cs.miningmart.operator.ExecutableOperator#estimateStatistics(Step)
     */
    public EstimatedStatistics estimateStatistics(Step theStep) throws M4Exception {
    	EstimatedStatistics myEstStat = super.estimateStatistics(theStep);
    	
    	try {
    		// attempt to correct some values:
    		for (int i = 0; i < this.getNumberOfLoops(); i++) {
    			String targetName = this.getTheLeftCondition(i).getName();
    			String operator = this.getTheConditionOperator(i).trim();
    			String right = this.getTheRightCondition(i).getValue().trim();
    			Vector valueList = myEstStat.getValueList(targetName);
    			
    			if (operator.equals("=")) {
    				// only one value remains
    				if (right.startsWith("'")) {
    					right = right.substring(1);
    					if (right.endsWith("'")) {
    						right = right.substring(0, right.length()-1);
    					}
    				}
    				if (valueList.contains(right)) {
    					int noOfOcc = myEstStat.getNumberOfOccurrences(targetName, right);
    					if (noOfOcc != EstimatedStatistics.VALUE_INT_UNKNOWN && 
    							noOfOcc > 0) {
    						myEstStat.setNumberOfRows(noOfOcc);
    					}
    					Vector newValueList = new Vector();
    					newValueList.add(right);
    					myEstStat.setValueList(targetName, newValueList);
    					myEstStat.setNumberOfOccurrences(targetName, right, noOfOcc);
    				}
    			}
    			else {        		
    				Vector remainingValues = this.findRemainingValues(valueList, operator, right);
    				if (remainingValues != null) { // if null they could not be found
    					// several values remain
    					Iterator allValIt = valueList.iterator();
    					Integer numberOfRemainingRows = new Integer(0);
    					while (allValIt.hasNext()) {
    						String aValue = (String) allValIt.next();
    						int noOfOcc = EstimatedStatistics.VALUE_INT_UNKNOWN;
    						if (remainingValues.contains(aValue)) {
    							// this value is still there. the number of occurrences is unchanged
    							noOfOcc = myEstStat.getNumberOfOccurrences(targetName, aValue);
    							if (noOfOcc != EstimatedStatistics.VALUE_INT_UNKNOWN && 
    									noOfOcc > 0) {
    								if (numberOfRemainingRows != null) {
    									numberOfRemainingRows = new Integer(numberOfRemainingRows.intValue() + noOfOcc);
    								}		        				
    							}
    							else {
    								// number of remaining rows cannot be known
    								numberOfRemainingRows = null;
    							}
    						}
    						else {
    							// this value is no longer there
    							myEstStat.removeValue(aValue, targetName);
    						}
    					}
    					if (numberOfRemainingRows != null) {
    						myEstStat.setNumberOfRows(numberOfRemainingRows.intValue());
    					}
    				}
    			}
    		}    		
        }
    	catch (M4CompilerError mce) {
    		throw new M4Exception("Error when estimating statistics: " + mce.getMessage());
    	}
    	
    	return myEstStat;
    }

    private Vector findRemainingValues(Vector listOfValues, String operator, String right) {
    	if (operator.equalsIgnoreCase("in")) {
    		if (right.startsWith("(")) {
    			right = right.substring(1);
    			if (right.endsWith(")")) {
    				right = right.substring(0, right.length()-1);
    			}
    		}
    		right = right.trim();
    		StringTokenizer st = new StringTokenizer(right, ",");
    		Vector theValues = new Vector();
    		while (st.hasMoreTokens()) {
    			String oneValue = st.nextToken();
    			if (oneValue.startsWith("'")) {
    				oneValue = oneValue.substring(1);
    				if (oneValue.endsWith("'")) {
    					oneValue = oneValue.substring(0, oneValue.length()-1);
    				}
    			}
    			oneValue = oneValue.trim();
    			if (listOfValues.contains(oneValue)) {
    				theValues.add(oneValue);
    			}
    		}
    		if (theValues.isEmpty()) {
    			return null;
    		}
    		else {
    			return theValues;
    		}
    	}
    	boolean smaller = operator.startsWith("<");
    	boolean bigger = operator.startsWith(">");
    	boolean orEqual = operator.endsWith("=");
    	if (smaller || bigger) {
    		String compareToValue = right;
    		if (compareToValue.startsWith("'")) {
    			compareToValue = compareToValue.substring(1);
    			if (compareToValue.endsWith("'")) {
    				compareToValue = compareToValue.substring(0, compareToValue.length()-1);
    			}
    		}
    		compareToValue = compareToValue.trim();
    		double compareToD = EstimatedStatistics.VALUE_DOUBLE_UNKNOWN;
    		try {
    			compareToD = Double.parseDouble(compareToValue);
    		}
    		catch (NumberFormatException e) {
    			// can't do anything:
    			return null;
    		}
    		Vector remainingValues = new Vector();
    		
    		// find the values that fulfill the condition:
    		Iterator it = listOfValues.iterator();
    		while (it.hasNext()) {
				String testValue = (String) it.next();
				double testD = EstimatedStatistics.VALUE_DOUBLE_UNKNOWN;
				try {
					testD = Double.parseDouble(testValue);
				}
				catch (NumberFormatException nfe) {
					// some error 
					return null;
				}
				if ( (bigger && testD > compareToD) ||
					 (smaller && testD < compareToD) ||
					 (bigger && orEqual && testD >= compareToD) ||
					 (smaller && orEqual && testD <= compareToD) ) {
					remainingValues.add(testValue);
				}
			}
    		
    		if (remainingValues.isEmpty()) {
    			return null;
    		}
    		else {
    			return remainingValues;
    		}
    	}
    	return null;
    }
    
    public BaseAttribute getTheLeftCondition(int loopNr) throws M4CompilerError {
    	return (BaseAttribute) this.getSingleParameter("TheLeftCondition", loopNr);
    }
    
    public String getTheConditionOperator(int loopNr) throws M4CompilerError {
    	return ((Value) this.getSingleParameter("TheConditionOperator", loopNr)).getValue();
    }
    
    public Value getTheRightCondition(int loopNr) throws M4CompilerError {
    	return (Value) this.getSingleParameter("TheRightCondition", loopNr);
    }    
}
/*
 * Historie
 * --------
 * 
 * $Log: RowSelectionByQuery.java,v $
 * Revision 1.6  2006/04/11 14:10:12  euler
 * Updated license text.
 *
 * Revision 1.5  2006/04/06 16:31:11  euler
 * Prepended license remark.
 *
 * Revision 1.4  2006/03/30 16:07:12  scholz
 * fixed author tags for release
 *
 * Revision 1.3  2006/03/23 11:13:45  euler
 * Improved exception handling.
 *
 * Revision 1.2  2006/01/18 16:58:58  euler
 * Added some basic estimations of statistics.
 * Will need improvements.
 *
 * Revision 1.1  2006/01/03 09:54:22  hakenjos
 * Initial version!
 *
 */

/*
 * Old Historie
 * ------------
 *
 * Revision 1.11  2003/06/05 09:52:02  euler
 * Organised imports.
 *
 * Revision 1.10  2002/10/08 18:07:56  scholz
 * Prepared code for parallel execution of multiple compile-Threads.
 * Calls to static fields like DB.m4Db were removed. Now each
 * Thread has its own DB object, reachable via
 * CompilerAccessLogic or Case.
 * The methods getCase() and getM4Db() were added to M4Object.
 * The static methods of Parameter now need an additional
 * parameter of type DB.
 * All direct calls from Operators to these Parameter methods were
 * removed.
 * All old load() and print() routines were removed.
 * The static calls to Print were removed. Now CompilerAccessLogic
 * references a valid Print object for the current Thread. This is
 * reachable via Case. The methods doPrint for messages and
 * Exceptions were added to M4Object.
 * The Print mechanism is not fully functional, yet.
 * A getStatus method was added to the Interface. It is not
 * functional yet for multiple Threads.
 *
 * Status: Compiles.
 *
 * Revision 1.9  2002/08/05 10:35:48  euler
 * Restructured the operator hierarchy: introduction of SingleCSOperator
 * and MultipleCSOperator. Changed this class accordingly.
 *
 * Revision 1.8  2002/07/11 08:36:35  euler
 * Changed printing of messages.
 *
 * Revision 1.7  2002/06/13 12:56:38  euler
 * Fixed a bug.
 *
 * Revision 1.6  2002/06/06 09:59:59  euler
 * Made operator loopable. Not tested yet.
 *
 * Revision 1.5  2002/05/31 12:35:01  euler
 * *** empty log message ***
 *
 * Revision 1.4  2002/05/21 12:51:01  euler
 * First beta test ok.
 *
 * Revision 1.3  2002/05/15 10:36:43  euler
 * First version that compiles.
 *
 * Revision 1.2  2002/05/07 13:06:56  wiese
 * get+set methods
 *
 * Revision 1.1  2002/04/30 13:00:29  wiese
 * compileable version
 *
 * Revision 1.6  2002/04/19 15:23:12  wiese
 * Initialversion nach der uebergabe
 *
 */


