/*
 * MiningMart Version 1.0
 * 
 * Copyright (C) 2006 Martin Scholz, Timm Euler, 
 *                    Daniel Hakenjos, Katharina Morik
 *
 * Contact: miningmart@ls8.cs.uni-dortmund.de
 *
 * A list of contributing developers (other than the copyright 
 * holders) can be found at
 * http://mmart.cs.uni-dortmund.de/downloads/download.html
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program, see the file MM_HOME/LICENSE; if not, write
 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301, USA.
 */
package edu.udo.cs.miningmart.operator.uep;

import java.util.ArrayList;
import java.util.StringTokenizer;
import java.util.TreeMap;

import edu.udo.cs.miningmart.exception.M4CompilerError;
import edu.udo.cs.miningmart.exception.M4Exception;
import edu.udo.cs.miningmart.m4.BaseAttribute;
import edu.udo.cs.miningmart.m4.Column;
import edu.udo.cs.miningmart.m4.Value;
import edu.udo.cs.miningmart.m4.utils.Print;

/**
 * <p>M4 Compiler</p>
 * <p>Copyright: Copyright (c) 2002</p>
 * <p>Company: University Dortmund</p>
 *
 * @author Felix Koschin
 * @version $Id: ErrorBasedGroupingGivenMinCardinality.java,v 1.6 2006/04/11 14:10:18 euler Exp $
 */
public class ErrorBasedGroupingGivenMinCardinality extends Grouping
{
	private final int SORT_BY_LABEL = 1;
	private final int SORT_BY_COUNT = 2;

	/* Parameters */
	public Value getMinCardinality() throws M4CompilerError
	{
		return (Value) this.getSingleParameter("MinCardinality", this.getCurrentLoopNumber());
	}

	public Value getMinCardinalityType() throws M4CompilerError
	{
		return (Value) this.getSingleParameter("MinCardinalityType", this.getCurrentLoopNumber());
	}

	public Value[] getSampleSize() throws M4CompilerError
	{
		return (Value[]) this.getParameter("SampleSize", this.getCurrentLoopNumber());
	}

	private long sampleSize() throws M4CompilerError
	{
		Value[] tmp = getSampleSize();
		if (tmp.length == 0)
			return 10000;
		else
			return Long.parseLong(tmp[0].getValue());
	}

	public BaseAttribute getTheClassAttribute() throws M4CompilerError
	{
		return (BaseAttribute) this.getSingleParameter("TheClassAttribute", this.getCurrentLoopNumber());
	}


	/* Basic methods */
	public String[] generateGroups(Column theTargetAttributeColumn) throws M4CompilerError	{
		int i;
		// long n;
		long cnt;
		myData val;
		String v;
		String s;
		ArrayList cutPoint;
		TreeMap cutPointSort = new TreeMap();
		TreeMap clss = new TreeMap();

		defaultValue = true;
		doPrint(Print.OPERATOR,"ErrorBasedGroupingGivenMinCardinality:" );

		Column theClassAttributeColumn;
		try {
			theClassAttributeColumn = getTheClassAttribute().getCurrentColumn();
		}
		catch (M4Exception m4e)
		{   throw new M4CompilerError("M4 interface error in " + this.getName() + ": " + m4e.getMessage());  }
		
		/* Exploring data */
		String[] classes = this.getM4Db().getDistinctElementsWithoutNull(theClassAttributeColumn);
		for(i = 0; i < classes.length; i++){
			clss.put(classes[i], new Long(i + 1));
		}

		/* SELECT TA, Count(TA) FROM ... */
		String[] elements = this.getM4Db().getCountOfElements(theClassAttributeColumn, theTargetAttributeColumn, sampleSize());

		val = null;

		/* Parse string returned from getCountOfElements ("count,value")*/
		for(i = 0; i < elements.length; i++){
			StringTokenizer t = new StringTokenizer(elements[i], ",");
			cnt = Long.parseLong(t.nextToken());
			s = t.nextToken();
			v = "'" + t.nextToken() + "'";
//*remove*/			doPrint(100, cnt + ", " + s + ", " + v);

			if (i == 0)
			{
				val = new myData();
				val.value = v;
				val.maxfreq = 0;
				val.label = 0;
				val.count = 0;
				val.sortBy = SORT_BY_LABEL;
			}
			else
			{
				if  (!(v.equals(val.value)))
				{
					cutPointSort.put(new cmpMyData(val) ,val);

					val = new myData();
					val.value = v;
					val.maxfreq = 0;
					val.label = 0;
					val.count = 0;
					val.sortBy = SORT_BY_LABEL;
				}
			}

			val.count = val.count + cnt;
			if (cnt >= val.maxfreq)
			{
				if (cnt == val.maxfreq)
				{
					val.label = val.label + (1 << ((Long)clss.get(s)).longValue() - 1);
					val.maxgrps++;
				}
				else
				{
					val.maxfreq = cnt;
					val.label = (1 << ((Long)clss.get(s)).longValue() - 1);
					val.maxgrps = 1;
				}
			}
		}

		cutPointSort.put(new cmpMyData(val) ,val);
//*remove*/		doPrint(100, "value count, label; maxfreq");

		/* Easy merge */
		cutPoint = new ArrayList(cutPointSort.values());

		i = 0;
		while(i < cutPoint.size() - 1)
		{
			myData x1 = (myData)cutPoint.get(i);
			myData x2 = (myData)cutPoint.get(i + 1);

//*remove*/			/* debug output, can't stay in final version */
//*remove*/			doPrint(100, x1.Text());
//*remove*/			doPrint(100, x2.Text());
//*remove*/			doPrint(100, " ");
//*remove*/			/* end of debug */

			/* merge two neighbouring intervals, when they have the same label */
			if (x1.label == x2.label)
			{
				x1.maxfreq = x1.maxfreq + x2.maxfreq;
				x1.count = x1.count + x2.count;
				x1.value = x1.value + ", " + x2.value;
				cutPoint.remove(i + 1);
			}
			else
				i++;
		}

		long minCnt = ((myData)cutPoint.get(0)).count;
		for (i = 1; i < cutPoint.size(); i++)
		{
			cnt = ((myData)cutPoint.get(i)).count;
			if (minCnt > cnt) minCnt = cnt;
		}

		/* Full merge */

//*remove*/		doPrint(100, "Minimal cardinality: " + minCnt);
		long minCard = Long.parseLong(getMinCardinality().getValue());
		if (minCard > minCnt)
		{
			doPrint(Print.OPERATOR, "Starting FullMerge ...");
			TreeMap singleMax = new TreeMap();
			TreeMap multiMax = new TreeMap();
			for (i = 0; i < cutPoint.size(); i++)
			{
				myData tmp = (myData)cutPoint.get(i);
				tmp.sortBy = SORT_BY_COUNT;
				if (tmp.maxgrps == 1)
					singleMax.put(new cmpMyData(tmp), tmp);
				else
					multiMax.put(new cmpMyData(tmp), tmp);
			}

			while (((myData)multiMax.get(multiMax.firstKey())).count < minCard)
			{
				myData attach = null;
				myData tmp = (myData)multiMax.get(multiMax.firstKey());
//*remove*/				doPrint(100, "\n" + tmp.Text());
				multiMax.remove(multiMax.firstKey());
				ArrayList tmplst = new ArrayList(singleMax.values());
				for (i = 0; i < tmplst.size(); i++)
				{
//*remove*/					doPrint(100, ((myData)(tmplst.get(i))).Text());
					if ((tmp.label & ((myData)(tmplst.get(i))).label) > 0)
					{
						attach = (myData)(tmplst.get(i));
						break;
					}
				}
				if (attach == null)
				{
					singleMax.put(new cmpMyData(tmp), tmp);
				}
				else
				{
					singleMax.remove(new cmpMyData(attach));
					attach.label = attach.label & tmp.label;
					attach.maxfreq = attach.maxfreq + tmp.maxfreq;
					attach.count = attach.count + tmp.count;
					attach.value = attach.value + ", " + tmp.value;
					singleMax.put(new cmpMyData(attach), attach);
				}
			}

			if ((singleMax.size() > 0) && (((myData)singleMax.get(singleMax.firstKey())).count < minCard))
			{
				while (multiMax.size() > 0)
				{
					cmpMyData x = (cmpMyData)multiMax.firstKey();
					multiMax.remove(x);
					singleMax.put(x, x.value);
				}

				while (((myData)singleMax.get(singleMax.firstKey())).count < minCard)
				{
					myData x1 = (myData)singleMax.get(singleMax.firstKey());
					singleMax.remove(singleMax.firstKey());
					myData x2 = (myData)singleMax.get(singleMax.firstKey());
					singleMax.remove(singleMax.firstKey());
//*remove*/			doPrint(100, x1.Text());
//*remove*/			doPrint(100, x2.Text());
//*remove*/			doPrint(100, " ");
					x1.count = x1.count + x2.count;
					x1.value = x1.value + ", " + x2.value;
					singleMax.put(new cmpMyData(x1), x1);
				}
				cutPoint = new ArrayList(singleMax.values());
			}
			else
			{
				cutPoint = new ArrayList(singleMax.values());
				if (multiMax.size() > 0)
					cutPoint.addAll(multiMax.values());
			}
		}

		/* debug output, can't stay in final version */
		doPrint(Print.OPERATOR, "New groups:");
		for (i = 0; i < cutPoint.size(); i++)
		{
			doPrint(Print.OPERATOR, ((myData)cutPoint.get(i)).Text());
		}
		/* end of debug */

		String[] gr = new String[cutPoint.size()];
		for (i = 0; i < cutPoint.size(); i++)
			gr[i] = ((myData)cutPoint.get(i)).value;
		return gr;
	}

	public class myData
	{
		public final int SORT_BY_LABEL = 1;
		public final int SORT_BY_COUNT = 2;

		public String value;
		public long label;
		public long maxfreq;
		public long count;
		public int sortBy;
		public int maxgrps;

		public String Text()
		{
//*remove*/				return value + " " + count + "x, " + Long.toBinaryString(label) + "; " + maxfreq + "(SortBy: " + sortBy + ")";
//*remove*//*
				return value + " - " + count + " items.";
//*remove*/
		}

		public int compareTo(Object Obj)
		{
			int ret = 1;
			switch (sortBy)
			{
				case SORT_BY_LABEL:
					ret = new Long(label).compareTo(new Long(((myData)Obj).label));
					break;
				case SORT_BY_COUNT:
					ret = new Long(count).compareTo(new Long(((myData)Obj).count));
					break;
			}
			if (ret == 0)
				ret = value.compareTo(((myData)Obj).value);
			return ret;
		}

		public boolean equals(Object Obj)
		{
			boolean ret = false;
			switch (sortBy)
			{
				case SORT_BY_LABEL:
					ret = (label == ((myData)Obj).label);
					break;
				case SORT_BY_COUNT:
					ret = (count == ((myData)Obj).count);
					break;
			}
			return ret && value.equals(((myData)Obj).value);
		}
	}

	public class cmpMyData implements Comparable
	{
		public myData value;

		public cmpMyData(myData val){
			value = val;
		}

		public int compareTo(Object Obj)
		{
			return value.compareTo(((cmpMyData)Obj).value);
		}

		public boolean equals(Object Obj)
		{
			return value.equals(((cmpMyData)Obj).value);
		}
	}
}
/*
 * $Log: ErrorBasedGroupingGivenMinCardinality.java,v $
 * Revision 1.6  2006/04/11 14:10:18  euler
 * Updated license text.
 *
 * Revision 1.5  2006/04/06 16:31:18  euler
 * Prepended license remark.
 *
 * Revision 1.4  2006/03/30 16:07:14  scholz
 * fixed author tags for release
 *
 * Revision 1.3  2006/03/23 11:13:46  euler
 * Improved exception handling.
 *
 * Revision 1.2  2006/01/06 16:27:56  euler
 * Bugfixes
 *
 * Revision 1.1  2006/01/03 09:54:35  hakenjos
 * Initial version!
 *
 */
