/*
 * MiningMart Version 1.1
 * 
 * Copyright (C) 2006 Martin Scholz, Timm Euler, 
 *                    Daniel Hakenjos, Katharina Morik
 *
 * Contact: miningmart@ls8.cs.uni-dortmund.de
 *
 * A list of contributing developers (other than the copyright 
 * holders) can be found at
 * http://mmart.cs.uni-dortmund.de/downloads/download.html
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program, see the file MM_HOME/LICENSE; if not, write
 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301, USA.
 */
package edu.udo.cs.miningmart.operator.uep;

import java.util.ArrayList;
import java.util.StringTokenizer;
import java.util.TreeMap;

import edu.udo.cs.miningmart.exception.M4CompilerError;
import edu.udo.cs.miningmart.exception.M4Exception;
import edu.udo.cs.miningmart.m4.BaseAttribute;
import edu.udo.cs.miningmart.m4.Column;
import edu.udo.cs.miningmart.m4.Value;
import edu.udo.cs.miningmart.m4.utils.Print;

/**
 * <p>M4 Compiler</p>
 * <p>Copyright: Copyright (c) 2002</p>
 * <p>Company: University Dortmund</p>
 * 
 * @author Felix Koschin
 * @version $Id: ImplicitErrorBasedDiscretization.java,v 1.7 2006/09/27 15:00:04 euler Exp $
 */
public class ImplicitErrorBasedDiscretization extends Discretization
{
	/* Parameters */
	public Value getFullMerge() throws M4CompilerError
	{
		return (Value) this.getSingleParameter("FullMerge", this.getCurrentLoopNumber());
	}

	public Value getClosedTo() throws M4CompilerError
	{
		return (Value) this.getSingleParameter("ClosedTo", this.getCurrentLoopNumber());
	}
	
	public Value[] getSampleSize() throws M4CompilerError
	{
		return (Value[]) this.getParameter("SampleSize", this.getCurrentLoopNumber());
	}
	
	private long sampleSize() throws M4CompilerError
	{
		Value[] tmp = getSampleSize();
		if (tmp.length == 0)
			return 10000;
		else
			return Long.parseLong(tmp[0].getValue());
	}	

	public BaseAttribute getTheClassAttribute() throws M4CompilerError
	{
		return (BaseAttribute) this.getSingleParameter("TheClassAttribute", this.getCurrentLoopNumber());
	}
	

	/* Basic methods */
//	public double[] generateCutPoints(Column theTargetAttributeColumn, Column theClassAttributeColumn) throws M4CompilerError
	public double[] generateCutPoints(Column theTargetAttributeColumn) throws M4CompilerError	{
		int i;
//		long n;
//		long maxfreq;
		long cnt;
//		long[] label;
//		double[] val;
//		double[] LBound;
//		double[] UBound;
		myData val;
		double v;
		String s;
		ArrayList cutPoint = new ArrayList();
		TreeMap clss = new TreeMap();

		doPrint(Print.OPERATOR,"ImplicitErrorBasedDiscretization:" );
		
		Column theClassAttributeColumn;
		try {
			theClassAttributeColumn = getTheClassAttribute().getCurrentColumn();
		}
		catch (M4Exception m4e)
		{   throw new M4CompilerError("M4 interface error in " + this.getName() + ": " + m4e.getMessage());  }

		/* Exploring data */
//*remove*/		doPrint(100, theClassAttributeColumn.getSQLPlusLocation());
		String[] classes = this.getM4Db().getDistinctElementsWithoutNull(theClassAttributeColumn);
		for(i = 0; i < classes.length; i++){
			clss.put(classes[i], new Long(i + 1));
		}
		
		/* SELECT TA, Count(TA) FROM ... */
		String[] elements = this.getM4Db().getCountOfElements(theClassAttributeColumn, theTargetAttributeColumn, sampleSize());
	
		val = null;
		/* Parse string returned from getCountOfElements ("count,value")*/
//*remove*/		doPrint(100, "Sample size: " + elements.length);
		for(i = 0; i < elements.length; i++){
			StringTokenizer t = new StringTokenizer(elements[i], ",");
			cnt = Long.parseLong(t.nextToken());
			s = t.nextToken();
			v = Double.parseDouble(t.nextToken());
//*remove*/			doPrint(100, cnt + ", " + s + ", " + v);

			if (i == 0)
			{
				val = new myData();
				val.value = v;
				val.maxfreq = 0;
				val.label = 0;
				val.LBound = v - 1;
				val.UBound = v + 1;
				val.count = 0;
				cutPoint.add(val);
			}
			else
			{
				if  (v != val.value)
				{
					double d = (v + val.value) / 2;
					val.UBound = d;
					val = new myData();
					val.value = v;
					val.maxfreq = 0;
					val.label = 0;
					val.LBound = d;
					val.UBound = v + 1;
					val.count = 0;
					cutPoint.add(val);
				}
			}
			val.count = val.count + cnt;
			if (cnt >= val.maxfreq)
			{
				if (cnt == val.maxfreq)
					val.label = val.label + (1 << ((Long)clss.get(s)).longValue() - 1);
				else
				{
					val.maxfreq = cnt;
					val.label = (1 << ((Long)clss.get(s)).longValue() - 1);
				}
			}
		}
		
//*remove*/		doPrint(100, "value count, (Lbound, Uboud), label; maxfreq");

		/* Easy merge */
		i = 0;				 
		while(i < cutPoint.size() - 1)
		{				
			myData x1 = (myData)cutPoint.get(i);
			myData x2 = (myData)cutPoint.get(i + 1);
			
//*remove*/			/* debug output, can't stay in final version */
//*remove*/			doPrint(100, x1.Text());
//*remove*/			doPrint(100, x2.Text());
//*remove*/			doPrint(100, " ");
//*remove*/			/* end of debug */
			
			/* merge two neighbouring intervals, when they have the same label */
			if (x1.label == x2.label)
			{
				x1.maxfreq = x1.maxfreq + x2.maxfreq;
				x1.UBound = x2.UBound;
				x1.count = x1.count + x2.count;
				cutPoint.remove(i + 1);
			}
			else
				i++;
		}
		
		/* Full merge */
		if (getFullMerge().getValue().equalsIgnoreCase("yes"))
		{
			doPrint(Print.OPERATOR, "Satrting FullMerge ...");
//*remove*/			doPrint(100, "Part I.\n");
			i = 1;
			while(i < cutPoint.size() - 1)
			{
				myData x1 = (myData)cutPoint.get(i - 1);
				myData x2 = (myData)cutPoint.get(i);
				myData x3 = (myData)cutPoint.get(i + 1);

//*remove*/				/* debug output, can't stay in final version */
//*remove*/				doPrint(100, x1.Text());
//*remove*/				doPrint(100, x2.Text());
//*remove*/				doPrint(100, x3.Text());
//*remove*/				doPrint(100, " ");
//*remove*/				/* end of debug */

				/* merge three neigbouring intervals, when the border intervals have the same label a the middle is ... */
				if ((x1.label == x3.label) && ((x1.label & x2.label) > 0))
				{
					x1.maxfreq = x1.maxfreq + x2.maxfreq + x3.maxfreq;
					x1.UBound = x3.UBound;
					x1.count = x1.count + x2.count + x3.count;
					cutPoint.remove(i);
					cutPoint.remove(i);
				}
				else
					i++;
			}

//*remove*/			doPrint(100, "Part II.\n");
			i = 1;
			while(i < cutPoint.size() - 1)
			{
				myData x1 = (myData)cutPoint.get(i - 1);
				myData x2 = (myData)cutPoint.get(i);
				myData x3 = (myData)cutPoint.get(i + 1);

//*remove*/				/* debug output, can't stay in final version */
//*remove*/				doPrint(100, x1.Text());
//*remove*/				doPrint(100, x2.Text());
//*remove*/				doPrint(100, x3.Text());
//*remove*/				doPrint(100, " ");
//*remove*/				/* end of debug */

				if (((x1.label & x2.label) > 0) || ((x2.label & x3.label) > 0))
				{
					if (((x1.label & x2.label) > 0) && ((x2.label & x3.label) > 0))
					{
						if (x1.count < x3.count)
						{
							x1.maxfreq = x1.maxfreq + x2.maxfreq;
							x1.UBound = x2.UBound;
							x1.count = x1.count + x2.count;
							cutPoint.remove(i);
							i++;							
						}
						else
						{
							x2.maxfreq = x2.maxfreq + x3.maxfreq;
							x2.UBound = x3.UBound;
							x2.count = x2.count + x3.count;
							cutPoint.remove(i + 1);
							i++;
						}
					}
					else
					{
						if ((x1.label & x2.label) > 0)
						{
							x1.maxfreq = x1.maxfreq + x2.maxfreq;
							x1.UBound = x2.UBound;
							x1.count = x1.count + x2.count;
							cutPoint.remove(i);
							i++;							
						}
						else
						{
							x2.maxfreq = x2.maxfreq + x3.maxfreq;
							x2.UBound = x3.UBound;
							x2.count = x2.count + x3.count;
							cutPoint.remove(i + 1);
							i++;
						}
					}
				}
				else
				{
					i++;
					i++;
				}
			}
		}
		
		/* debug output, can't stay in final version */
		doPrint(Print.OPERATOR, "\nNew intervals:");
		for (i = 0; i < cutPoint.size(); i++)
		{
			doPrint(Print.OPERATOR, ((myData)cutPoint.get(i)).Text());
		}
		/* end of debug */
		
		double[] cp = new double[cutPoint.size() - 1];
		for (i = 0; i < cutPoint.size() - 1; i++)
			cp[i] = ((myData)cutPoint.get(i)).UBound;
		return cp;
	}
	
	public class myData {
		public double value;
		public double LBound;
		public double UBound;
		public long label;
		public long maxfreq;
		public long count;
		
		public String Text()
		{
			return "(" + LBound + ", " + UBound + ") - " + count + " elements";
//*remove*/			return value + " " + count + "x, (" + LBound + ", " + UBound + ") " + Long.toBinaryString(label) + "; " + maxfreq;
		}		
			
	}
}
/*
 * $Log: ImplicitErrorBasedDiscretization.java,v $
 * Revision 1.7  2006/09/27 15:00:04  euler
 * New version 1.1
 *
 * Revision 1.6  2006/04/11 14:10:18  euler
 * Updated license text.
 *
 * Revision 1.5  2006/04/06 16:31:18  euler
 * Prepended license remark.
 *
 * Revision 1.4  2006/03/30 16:07:14  scholz
 * fixed author tags for release
 *
 * Revision 1.3  2006/03/23 11:13:46  euler
 * Improved exception handling.
 *
 * Revision 1.2  2006/01/06 16:27:57  euler
 * Bugfixes
 *
 * Revision 1.1  2006/01/03 09:54:35  hakenjos
 * Initial version!
 *
 */
