/*
 * MiningMart Version 1.1
 * 
 * Copyright (C) 2006 Martin Scholz, Timm Euler, 
 *                    Daniel Hakenjos, Katharina Morik
 *
 * Contact: miningmart@ls8.cs.uni-dortmund.de
 *
 * A list of contributing developers (other than the copyright 
 * holders) can be found at
 * http://mmart.cs.uni-dortmund.de/downloads/download.html
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program, see the file MM_HOME/LICENSE; if not, write
 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301, USA.
 */
package edu.udo.cs.miningmart.schemamatching;

/**
 *
 * A simple matcher based on the edit distance between two strings.
 * 
 * @author Timm Euler
 * @version $Id: EditDistanceMatcher.java,v 1.5 2006/09/27 15:00:03 euler Exp $
 */
public class EditDistanceMatcher extends NameAndTypeBasedSchemaMatcher {

	/**
	 * @see edu.udo.cs.miningmart.schemamatching.NameAndTypeBasedSchemaMatcher#getNameSimilarity(String, String)
	 */
	public double getNameSimilarity(String str1, String str2) throws SchemaMatchException {
		int levenshteinDistance = this.getLevenshteinDistance(str1, str2);
		if (levenshteinDistance == 0) {
			return 1d;
		}
		else {
			String longestString = (str1.length() > str2.length() ? str1 : str2);
			return (1 - ((1 / longestString.length()) * levenshteinDistance));
		}
	}	

    /**
     * This method was implemented by Markus Wagner.
     * It computes the Levenshtein distance between the two given
     * Strings.
     */    
    public int getLevenshteinDistance(String s, String t) {
        int d[][]; // matrix
        int n; // length of s
        int m; // length of t
        int i; // iterates through s
        int j; // iterates through t
        char s_i; // ith character of s
        char t_j; // jth character of t
        int cost; // cost

        // Step 1
        n = s.length();
        m = t.length();
        if (n == 0) {
            return m;
        }
        if (m == 0) {
            return n;
        }
        d = new int[n + 1][m + 1];

        // Step 2
        for (i = 0; i <= n; i++) {
            d[i][0] = i;
        }
        for (j = 0; j <= m; j++) {
            d[0][j] = j;
        }

        // Step 3
        for (i = 1; i <= n; i++) {

            s_i = s.charAt(i - 1);

            // Step 4
            for (j = 1; j <= m; j++) {

                t_j = t.charAt(j - 1);

                // Step 5
                if (s_i == t_j) {
                    cost = 0;
                } else {
                    cost = 1;
                }

                // Step 6
                d[i][j] = this.minimum(d[i - 1][j] + 1, d[i][j - 1] + 1,
                        d[i - 1][j - 1] + cost);
            }
        }

        // Step 7
        return d[n][m];
    }
    
    /*
     * Return the minimum value of the three given values.
     */
    private int minimum(int a, int b, int c) {
        int min = a;
        if (b < min) {
            min = b;
        }
        if (c < min) {
            min = c;
        }
        return min;
    }
}
/*
 * $Log: EditDistanceMatcher.java,v $
 * Revision 1.5  2006/09/27 15:00:03  euler
 * New version 1.1
 *
 * Revision 1.4  2006/04/11 14:10:17  euler
 * Updated license text.
 *
 * Revision 1.3  2006/04/06 16:31:16  euler
 * Prepended license remark.
 *
 * Revision 1.2  2006/01/03 14:47:52  euler
 * New NgramMatcher, additional comments.
 *
 * Revision 1.1  2006/01/03 13:57:28  euler
 * Changed hierarchy of matchers, added Editdistance matcher.
 *
 */
