/*
 * MiningMart Version 1.0
 * 
 * Copyright (C) 2006 Martin Scholz, Timm Euler, 
 *                    Daniel Hakenjos, Katharina Morik
 *
 * Contact: miningmart@ls8.cs.uni-dortmund.de
 *
 * A list of contributing developers (other than the copyright 
 * holders) can be found at
 * http://mmart.cs.uni-dortmund.de/downloads/download.html
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program, see the file MM_HOME/LICENSE; if not, write
 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301, USA.
 */
package edu.udo.cs.miningmart.schemamatching;

import java.util.Collection;
import java.util.Iterator;

import edu.udo.cs.miningmart.exception.M4Exception;
import edu.udo.cs.miningmart.m4.BaseAttribute;
import edu.udo.cs.miningmart.m4.Concept;

/**
 * Abstract superclass of schema matching methods in the
 * MiningMart environment.
 * 
 * Whenever a concrete subclass of this subclass is implemented,
 * you can add a name representing its matcher to the String array
 * edu.udo.cs.miningmart.m4.M4Interface.AVAILABLE_MATCHERS,
 * and add a call to an instantiation of that class to the method 
 * edu.udo.cs.miningmart.m4.core.M4InterfaceImpl.findConnection(...)
 * at the place indicated there. Then the matcher implemented by your
 * subclass is automatically available in the GUI.
 * 
 * @author Timm Euler
 * @version $Id: MmSchemaMatcher.java,v 1.9 2006/04/11 14:10:17 euler Exp $
 */
public abstract class MmSchemaMatcher {

	private double thresholdForComparingAttribs = 0.5;
	private double thresholdForComparingConcepts = 0.5;

	/**
	 * @return Returns the thresholdForComparingAttribs.
	 */
	public double getThresholdForComparingAttributes() {
		return this.thresholdForComparingAttribs;
	}
	/**
	 * @return Returns the thresholdForComparingConcepts.
	 */
	public double getThresholdForComparingConcepts() {
		return this.thresholdForComparingConcepts;
	}
	
	/**
	 * Returns the double value that this matcher considers as
	 * the similarity between ba1 and ba2, based only on information
	 * related to these BaseAttributes.
	 *  
	 * @param ba1 One BaseAttribute
	 * @param ba2 Another BaseAttribute
	 * @return a similarity value
	 * @throws SchemaMatchException
	 */
	public abstract double getDirectSimilarity(BaseAttribute ba1, BaseAttribute ba2) throws SchemaMatchException;

	/**
	 * Returns the double value that this matcher considers as
	 * the similarity between con1 and con2, based only on the 
	 * information in these concepts but not on dependent attributes
	 * (like BaseAttributes).	 
	 *  
	 * @param con1 One Concept
	 * @param con2 Another Concept
	 * @return a similarity value
	 * @throws SchemaMatchException
	 */
	public abstract double getDirectSimilarity(Concept con1, Concept con2) throws SchemaMatchException;

	/**
	 * Returns the double value that this matcher considers as
	 * the similarity between ba1 and ba2, taking the similarity
	 * of their respective concepts into accounts as well.
	 *  
	 * @param ba1 One BaseAttribute
	 * @param ba2 Another BaseAttribute
	 * @return a similarity value
	 * @throws SchemaMatchException
	 */
	public abstract double getSimilarityInvolvingConcepts(BaseAttribute ba1, BaseAttribute ba2) throws SchemaMatchException;
	
	/**
	 * Returns the global (overall) similarity of the two Concepts,
	 * considering any indirect or direct attributes, for example their
	 * Features.
	 * 
	 * @param con1 A concept
	 * @param con2 Another concept
	 * @return a similarity value
	 * @throws SchemaMatchException
	 */
	public double getSimilarity(Concept con1, Concept con2) throws SchemaMatchException {
		try {
			boolean conceptNamesMatch = false;
			double conceptSimilarity = this.getDirectSimilarity(con1, con2);
			conceptNamesMatch = (conceptSimilarity >= this.getThresholdForComparingConcepts());
			
			boolean numberOfFeaturesMatches = false;
			if (con1.getFeatures().size() == con2.getFeatures().size()) {
				numberOfFeaturesMatches = true;
			}
			// calculate the ratio of the number of features that match
			// to the number of features that can match:
			Collection basOfCon1 = con1.getAllBaseAttributes();
			Collection basOfCon2 = con2.getAllBaseAttributes();
			int maximumNumberOfMatches = basOfCon1.size() * basOfCon2.size();
			int countOfMatchingFeatures = 0;
			Iterator it1 = con1.getAllBaseAttributes().iterator();
			while (it1.hasNext()) {
				BaseAttribute myBa1 = (BaseAttribute) it1.next();
				Iterator it2 = con2.getAllBaseAttributes().iterator();
				while (it2.hasNext()) {
					BaseAttribute myBa2 = (BaseAttribute) it2.next();
					if (this.getDirectSimilarity(myBa1, myBa2) >= this.getThresholdForComparingAttributes()) {
						countOfMatchingFeatures++;
					}
				}
			}
			// paranoia check:
			if (countOfMatchingFeatures > maximumNumberOfMatches) {
				throw new M4Exception("MmSchemaMatcher.getSimilarity: found more matches than possible!");
			}
			double ratio = ((double) countOfMatchingFeatures) / ((double) maximumNumberOfMatches);
			
			// determine overall similarity by some stupid formula:
			if (conceptNamesMatch && numberOfFeaturesMatches && ratio >= 0.99) {
				return 1d;
			}
			if (conceptNamesMatch || numberOfFeaturesMatches) {
				return (1.5 * ratio >= 0.99 ? 1d : 1.5 * ratio);
			}
			return ratio;
		}
		catch (M4Exception m4e) {
			throw new SchemaMatchException("StringEqualityMatcher.getSimilarity(Concept, Concept): M4Exception caught: " + m4e.getMessage());
		}
	}
	
	/**
	 * Returns a matrix (instance of class edu.udo.miningmart.schemamatching.MatchingResult)
	 * of n x m Dimension, where n and m are the number of features of each of the
	 * given concepts. An entry of the matrix represents the direct similarity
	 * that this matcher gives comparing the first concept's feature to the
	 * second concept's feature.
	 *   
	 * @param con1 A concept
	 * @param con2 Another concept
	 * @return An two-dimensional array of <code>MatchingResult</code>s (direct similarity matrix)
	 * @throws SchemaMatchException
	 */
	public MatchingResult[][] getSimilarityMatrix(Concept con1, Concept con2) throws SchemaMatchException {
		try {
			int n = con1.getAllBaseAttributes().size();
			int m = con2.getAllBaseAttributes().size();
			MatchingResult[][] result = new MatchingResult[n][m];
			Iterator it1 = con1.getAllBaseAttributes().iterator();
			int n_index = 0;
			int m_index = 0;
			while (it1.hasNext()) {
				BaseAttribute myBa1 = (BaseAttribute) it1.next();
				Iterator it2 = con2.getAllBaseAttributes().iterator();
				m_index = 0;
				while (it2.hasNext()) {
					BaseAttribute myBa2 = (BaseAttribute) it2.next();
					result[n_index][m_index] = new MatchingResult();
					result[n_index][m_index].setBaOfFirstSchema(myBa1);
					result[n_index][m_index].setBaOfSecondSchema(myBa2);
					result[n_index][m_index].setSimilarity(this.getDirectSimilarity(myBa1, myBa2));
					m_index++;
				}
				n_index++;
			}
			return result;
		}
		catch (M4Exception m4e) {
			throw new SchemaMatchException("StringEqualityMatcher.getSimilarityMatrix: M4Exception caught: " + m4e.getMessage());
		}
	}
}
/*
 * $Log: MmSchemaMatcher.java,v $
 * Revision 1.9  2006/04/11 14:10:17  euler
 * Updated license text.
 *
 * Revision 1.8  2006/04/06 16:31:16  euler
 * Prepended license remark.
 *
 * Revision 1.7  2006/03/02 16:49:59  euler
 * Many bugfixes
 *
 * Revision 1.6  2006/01/03 16:19:09  euler
 * Bugfixes
 *
 * Revision 1.5  2006/01/03 13:57:28  euler
 * Changed hierarchy of matchers, added Editdistance matcher.
 *
 * Revision 1.4  2006/01/03 13:10:49  euler
 * Added a comment.
 *
 * Revision 1.3  2006/01/03 10:55:53  euler
 * Fixed wrong imports.
 *
 * Revision 1.2  2006/01/03 10:44:20  euler
 * Bugfix
 *
 * Revision 1.1  2006/01/03 09:54:33  hakenjos
 * Initial version!
 *
 */
