From 13243b7d59ccb89dbd12fdf50b6eb56e16b07f26 Mon Sep 17 00:00:00 2001
From: davidely <davidely@localhost>
Date: Wed, 17 Jan 2007 04:28:07 +0000
Subject: [PATCH] Fixes for several small SearchFilter and Attribute matching issues (730, 695, 688, 689, 693). This also includes tests for the SearchFilter class. I've also elimiated a race condition from the operation test cases.
---
opends/src/server/org/opends/server/schema/DoubleMetaphoneApproximateMatchingRule.java | 141 +++++++++++++++++++++++++---------------------
1 files changed, 76 insertions(+), 65 deletions(-)
diff --git a/opends/src/server/org/opends/server/schema/DoubleMetaphoneApproximateMatchingRule.java b/opends/src/server/org/opends/server/schema/DoubleMetaphoneApproximateMatchingRule.java
index 50acb91..bdc1240 100644
--- a/opends/src/server/org/opends/server/schema/DoubleMetaphoneApproximateMatchingRule.java
+++ b/opends/src/server/org/opends/server/schema/DoubleMetaphoneApproximateMatchingRule.java
@@ -276,7 +276,7 @@
// neither an 'E' nor an 'I' except in "BACHER" and "MACHER".
if ((pos > 1) &&
(! isVowel(posMinusTwo = valueString.charAt(pos-2))) &&
- hasSubstring(valueString, pos-1, pos+2, "ACH") &&
+ hasSubstring(valueString, pos-1, "ACH") &&
((posPlusTwo = valueString.charAt(pos+2)) != 'I') &&
((posPlusTwo != 'E') ||
((valueString.charAt(pos+3) == 'R') &&
@@ -289,7 +289,7 @@
// Check for a special case of "caesar", which will be maped to 'S'.
- if ((pos == 0) && hasSubstring(valueString, pos+1, pos+5, "AESAR"))
+ if ((pos == 0) && hasSubstring(valueString, pos+1, "AESAR"))
{
metaphone.append("S");
pos += 2;
@@ -301,7 +301,7 @@
if ((posPlusOne = valueString.charAt(pos+1)) == 'H')
{
// Check for "chia" as in "chianti" and map to 'K'.
- if (hasSubstring(valueString, pos+2, pos+4, "IA"))
+ if (hasSubstring(valueString, pos+2, "IA"))
{
metaphone.append("K");
pos += 2;
@@ -309,7 +309,7 @@
}
// Check for "chae" as in "michael" and map to 'K'.
- if (hasSubstring(valueString, pos+2, pos+4, "AE"))
+ if (hasSubstring(valueString, pos+2, "AE"))
{
metaphone.append("K");
pos += 2;
@@ -318,13 +318,13 @@
// Check for a Greek root at the beginning of the value like
// chemistry or chorus and map to 'K'.
- if ((pos == 0) && (! hasSubstring(valueString, 2, 5, "ORE")) &&
- (hasSubstring(valueString, 2, 6, "ARAC") ||
- hasSubstring(valueString, 2, 6, "ARIS") ||
- hasSubstring(valueString, 2, 4, "OR") ||
- hasSubstring(valueString, 2, 4, "YM") ||
- hasSubstring(valueString, 2, 4, "IA") ||
- hasSubstring(valueString, 2, 4, "EM")))
+ if ((pos == 0) && (! hasSubstring(valueString, 2, "ORE")) &&
+ (hasSubstring(valueString, 2, "ARAC") ||
+ hasSubstring(valueString, 2, "ARIS") ||
+ hasSubstring(valueString, 2, "OR") ||
+ hasSubstring(valueString, 2, "YM") ||
+ hasSubstring(valueString, 2, "IA") ||
+ hasSubstring(valueString, 2, "EM")))
{
metaphone.append("K");
pos += 2;
@@ -335,9 +335,9 @@
// Check for "CH" values that produce a "KH" sound that will be
// mapped to 'K'.
if (isGermanic(valueString) ||
- hasSubstring(valueString, pos-2, pos+4, "ORCHES") ||
- hasSubstring(valueString, pos-2, pos+4, "ARCHIT") ||
- hasSubstring(valueString, pos-2, pos+4, "ORCHID") ||
+ hasSubstring(valueString, pos-2, "ORCHES") ||
+ hasSubstring(valueString, pos-2, "ARCHIT") ||
+ hasSubstring(valueString, pos-2, "ORCHID") ||
((posPlusTwo = valueString.charAt(pos+2)) == 'T') ||
(posPlusTwo == 'S') ||
(((pos == 0) ||
@@ -359,7 +359,7 @@
// All other "CH" values.
if (pos > 0)
{
- if (hasSubstring(valueString, 0, 2, "MC"))
+ if (hasSubstring(valueString, 0, "MC"))
{
metaphone.append("K");
}
@@ -380,7 +380,7 @@
// Check for "CZ" as in "czerny" but not "wicz" and map to 'S'.
if ((posPlusOne == 'Z') &&
- (! hasSubstring(valueString, pos-2, pos, "WI")))
+ (! hasSubstring(valueString, pos-2, "WI")))
{
metaphone.append("S");
pos += 2;
@@ -406,8 +406,8 @@
(! ((posPlusTwo == 'H') && valueString.charAt(pos+3) == 'U')))
{
if (((pos == 1) && (valueString.charAt(pos-1) == 'A')) ||
- hasSubstring(valueString, pos-1, pos+3, "UCCEE") ||
- hasSubstring(valueString, pos-1, pos+3, "UCCES"))
+ hasSubstring(valueString, pos-1, "UCCEE") ||
+ hasSubstring(valueString, pos-1, "UCCES"))
{
// Values like "accident", "accede", and "succeed".
metaphone.append("K");
@@ -619,7 +619,7 @@
}
else
{
- if ((! hasSubstring(valueString, pos+2, pos+4, "EY")) &&
+ if ((! hasSubstring(valueString, pos+2, "EY")) &&
(! isSlavoGermanic(valueString)))
{
metaphone.append("N");
@@ -666,11 +666,11 @@
(posPlusOne == 'Y')) &&
((posMinusOne = valueString.charAt(pos-1)) != 'E') &&
(posMinusOne != 'I') &&
- (! hasSubstring(valueString, 0, 6, "DANGER")) &&
- (! hasSubstring(valueString, 0, 6, "RANGER")) &&
- (! hasSubstring(valueString, 0, 6, "MANGER")) &&
- (! hasSubstring(valueString, pos-1, pos+2, "RGY")) &&
- (! hasSubstring(valueString, pos-1, pos+2, "OGY")))
+ (! hasSubstring(valueString, 0, "DANGER")) &&
+ (! hasSubstring(valueString, 0, "RANGER")) &&
+ (! hasSubstring(valueString, 0, "MANGER")) &&
+ (! hasSubstring(valueString, pos-1, "RGY")) &&
+ (! hasSubstring(valueString, pos-1, "OGY")))
{
metaphone.append("K");
pos += 2;
@@ -681,12 +681,12 @@
// Check for Italian uses like 'biaggi" and map to 'J'.
if ((posPlusOne == 'E') || (posPlusOne == 'I') ||
(posPlusOne == 'Y') ||
- hasSubstring(valueString, pos-1, pos+3, "AGGI") ||
- hasSubstring(valueString, pos-1, pos+3, "OGGI"))
+ hasSubstring(valueString, pos-1, "AGGI") ||
+ hasSubstring(valueString, pos-1, "OGGI"))
{
// Germanic uses will be mapped to 'K'.
if (isGermanic(valueString) ||
- hasSubstring(valueString, pos+1, pos+3, "ET"))
+ hasSubstring(valueString, pos+1, "ET"))
{
metaphone.append("K");
}
@@ -732,14 +732,14 @@
case 'J':
// Take care of obvious Spanish uses that should map to 'H'.
- if (hasSubstring(valueString, 0, 4, "SAN "))
+ if (hasSubstring(valueString, 0, "SAN "))
{
metaphone.append("H");
pos++;
break;
}
- if (hasSubstring(valueString, pos, pos+4, "JOSE"))
+ if (hasSubstring(valueString, pos, "JOSE"))
{
if ((pos == 0) && (valueString.charAt(pos+4) == ' '))
{
@@ -803,10 +803,10 @@
{
pos++;
}
- else if (hasSubstring(valueString, pos-1, pos+2, "UMB"))
+ else if (hasSubstring(valueString, pos-1, "UMB"))
{
if (((pos+1) == last) ||
- hasSubstring(valueString, pos+2, pos+4, "ER"))
+ hasSubstring(valueString, pos+2, "ER"))
{
pos++;
}
@@ -868,9 +868,9 @@
case 'R':
// Ignore R at the end of French words.
if ((pos == last) && (! isSlavoGermanic(valueString)) &&
- hasSubstring(valueString, pos-2, pos, "IE") &&
- (! hasSubstring(valueString, pos-4, pos-2, "ME")) &&
- (! hasSubstring(valueString, pos-4, pos-2, "MA")))
+ hasSubstring(valueString, pos-2, "IE") &&
+ (! hasSubstring(valueString, pos-4, "ME")) &&
+ (! hasSubstring(valueString, pos-4, "MA")))
{
pos++;
break;
@@ -891,8 +891,8 @@
case 'S':
// Special cases like isle and carlysle will be silent.
- if (hasSubstring(valueString, pos-1, pos+2, "ISL") ||
- hasSubstring(valueString, pos-1, pos+2, "YSL"))
+ if (hasSubstring(valueString, pos-1, "ISL") ||
+ hasSubstring(valueString, pos-1, "YSL"))
{
pos++;
break;
@@ -900,7 +900,7 @@
// Special case of sugar mapped to 'X'.
- if (hasSubstring(valueString, pos+1, pos+5, "UGAR"))
+ if (hasSubstring(valueString, pos+1, "UGAR"))
{
metaphone.append("X");
pos++;
@@ -911,10 +911,10 @@
// SH is generally mapped to 'X', but not in Germanic cases.
if ((posPlusOne = valueString.charAt(pos+1)) == 'H')
{
- if (hasSubstring(valueString, pos+1, pos+5, "HEIM") ||
- hasSubstring(valueString, pos+1, pos+5, "HOEK") ||
- hasSubstring(valueString, pos+1, pos+5, "HOLM") ||
- hasSubstring(valueString, pos+1, pos+5, "HOLZ"))
+ if (hasSubstring(valueString, pos+1, "HEIM") ||
+ hasSubstring(valueString, pos+1, "HOEK") ||
+ hasSubstring(valueString, pos+1, "HOLM") ||
+ hasSubstring(valueString, pos+1, "HOLZ"))
{
metaphone.append("S");
}
@@ -929,8 +929,8 @@
// Italian and Armenian cases will map to "S".
- if (hasSubstring(valueString, pos+1, pos+3, "IO") ||
- hasSubstring(valueString, pos+1, pos+3, "IA"))
+ if (hasSubstring(valueString, pos+1, "IO") ||
+ hasSubstring(valueString, pos+1, "IA"))
{
metaphone.append("S");
pos += 3;
@@ -964,10 +964,10 @@
{
if ((posPlusTwo = valueString.charAt(pos+2)) == 'H')
{
- if (hasSubstring(valueString, pos+3, pos+5, "OO") ||
- hasSubstring(valueString, pos+3, pos+5, "UY") ||
- hasSubstring(valueString, pos+3, pos+5, "ED") ||
- hasSubstring(valueString, pos+3, pos+5, "EM"))
+ if (hasSubstring(valueString, pos+3, "OO") ||
+ hasSubstring(valueString, pos+3, "UY") ||
+ hasSubstring(valueString, pos+3, "ED") ||
+ hasSubstring(valueString, pos+3, "EM"))
{
metaphone.append("SK");
}
@@ -997,8 +997,8 @@
// Ignore a trailing S in French words. All others will be mapped to
// 'S'.
if (! ((pos == last) &&
- (hasSubstring(valueString, pos-2, pos, "AI") ||
- hasSubstring(valueString, pos-2, pos, "OI"))))
+ (hasSubstring(valueString, pos-2, "AI") ||
+ hasSubstring(valueString, pos-2, "OI"))))
{
metaphone.append("S");
}
@@ -1014,9 +1014,9 @@
case 'T':
// "TION", "TIA", and "TCH" will be mapped to 'X'.
- if (hasSubstring(valueString, pos, pos+4, "TION") ||
- hasSubstring(valueString, pos, pos+3, "TIA") ||
- hasSubstring(valueString, pos, pos+3, "TCH"))
+ if (hasSubstring(valueString, pos, "TION") ||
+ hasSubstring(valueString, pos, "TIA") ||
+ hasSubstring(valueString, pos, "TCH"))
{
metaphone.append("X");
pos += 3;
@@ -1030,8 +1030,8 @@
((posPlusOne == 'T') && (valueString.charAt(pos+2) == 'H')))
{
if (isGermanic(valueString) ||
- hasSubstring(valueString, pos+2, pos+4, "OM") ||
- hasSubstring(valueString, pos+2, pos+4, "AM"))
+ hasSubstring(valueString, pos+2, "OM") ||
+ hasSubstring(valueString, pos+2, "AM"))
{
metaphone.append("T");
}
@@ -1092,8 +1092,8 @@
// A Polish value like WICZ or WITZ should be mapped to TS.
- if (hasSubstring(valueString, pos+1, pos+4, "WICZ") ||
- hasSubstring(valueString, pos+1, pos+4, "WITZ"))
+ if (hasSubstring(valueString, pos+1, "WICZ") ||
+ hasSubstring(valueString, pos+1, "WITZ"))
{
metaphone.append("TS");
pos += 4;
@@ -1109,10 +1109,10 @@
case 'X':
// X maps to KS except at the end of French words.
if (! ((pos == last) &&
- (hasSubstring(valueString, pos-3, pos, "IAU") ||
- hasSubstring(valueString, pos-3, pos, "EAU") ||
- hasSubstring(valueString, pos-2, pos, "AU") ||
- hasSubstring(valueString, pos-2, pos, "OU"))))
+ (hasSubstring(valueString, pos-3, "IAU") ||
+ hasSubstring(valueString, pos-3, "EAU") ||
+ hasSubstring(valueString, pos-2, "AU") ||
+ hasSubstring(valueString, pos-2, "OU"))))
{
metaphone.append("KS");
}
@@ -1206,24 +1206,35 @@
* determination.
* @param start The position in the value at which to start the
* comparison.
- * @param end The position in the value at which to stop the
- * comparison. This character will not actually be
- * compared against the provided substring.
* @param substring The substring to compare against the specified value
* range.
*
* @return <CODE>true</CODE> if the specified portion of the value matches
* the given substring, or <CODE>false</CODE> if it does not.
*/
- private boolean hasSubstring(String value, int start, int end,
+ private boolean hasSubstring(String value, int start,
String substring)
{
assert debugEnter(CLASS_NAME, "hasSubstring", String.valueOf(value),
- String.valueOf(start), String.valueOf(end),
+ String.valueOf(start),
String.valueOf(substring));
try
{
+ // This can happen since a lot of the rules "look behind" and
+ // rightfully don't check if it's the first character
+ if (start < 0) {
+ return false;
+ }
+
+ int end = start + substring.length();
+
+ // value isn't big enough to do the comparison
+ if (end > value.length())
+ {
+ return false;
+ }
+
for (int i=0,pos=start; pos < end; i++,pos++)
{
if (value.charAt(pos) != substring.charAt(i))
--
Gitblit v1.10.0