更新:
这是我的解决方案。基于我之前的解释。
import com.google.common.collect.*;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.commons.math3.util.Combinations;
import java.util.*;
/**
* Created by carlos on 2/14/15.
*/
public class PatternMatcher {
public static boolean isMatch(char[] pattern, String searchString){
return isMatch(pattern, searchString, new TreeMap<Integer, Pair<Integer, Integer>>(), Sets.newHashSet());
}
private static boolean isMatch(char[] pattern, String searchString, Map<Integer, Pair<Integer, Integer>> candidateSolution, Set<String> mappedStrings) {
List<Integer> occurrencesOfCharacterInPattern = getNextUnmappedPatternOccurrences(candidateSolution, pattern);
if(occurrencesOfCharacterInPattern.size() == 0)
return isValidSolution(candidateSolution, searchString, pattern, mappedStrings);
List<Pair<Integer, Integer>> sectionsOfUnmappedStrings = sectionsOfUnmappedStrings(searchString, candidateSolution);
if(sectionsOfUnmappedStrings.size() == 0)
return false;
String firstUnmappedString = substring(searchString, sectionsOfUnmappedStrings.get(0));
for (int substringSize = 1; substringSize <= firstUnmappedString.length(); substringSize++) {
String candidateSubstring = firstUnmappedString.substring(0, substringSize);
if(mappedStrings.contains(candidateSubstring))
continue;
List<Pair<Integer, Integer>> listOfAllOccurrencesOfSubstringInString = Lists.newArrayList();
for (int currentIndex = 0; currentIndex < sectionsOfUnmappedStrings.size(); currentIndex++) {
Pair<Integer,Integer> currentUnmappedSection = sectionsOfUnmappedStrings.get(currentIndex);
List<Pair<Integer, Integer>> occurrencesOfSubstringInString =
findAllInstancesOfSubstringInString(searchString, candidateSubstring,
currentUnmappedSection);
for(Pair<Integer,Integer> possibleAddition:occurrencesOfSubstringInString) {
listOfAllOccurrencesOfSubstringInString.add(possibleAddition);
}
}
if(listOfAllOccurrencesOfSubstringInString.size() < occurrencesOfCharacterInPattern.size())
return false;
Iterator<int []> possibleSolutionIterator =
new Combinations(listOfAllOccurrencesOfSubstringInString.size(),
occurrencesOfCharacterInPattern.size()).iterator();
iteratorLoop:
while(possibleSolutionIterator.hasNext()) {
Set<String> newMappedSets = Sets.newHashSet(mappedStrings);
newMappedSets.add(candidateSubstring);
TreeMap<Integer,Pair<Integer,Integer>> newCandidateSolution = Maps.newTreeMap();
// why doesn't Maps.newTreeMap(candidateSolution) work?
newCandidateSolution.putAll(candidateSolution);
int [] possibleSolutionIndexSet = possibleSolutionIterator.next();
for(int i = 0; i < possibleSolutionIndexSet.length; i++) {
Pair<Integer, Integer> candidatePair = listOfAllOccurrencesOfSubstringInString.get(possibleSolutionIndexSet[i]);
//if(candidateSolution.containsValue(Pair.of(0,1)) && candidateSolution.containsValue(Pair.of(9,10)) && candidateSolution.containsValue(Pair.of(18,19)) && listOfAllOccurrencesOfSubstringInString.size() == 3 && candidateSolution.size() == 3 && possibleSolutionIndexSet[0]==0 && possibleSolutionIndexSet[1] == 2){
if (makesSenseToInsert(newCandidateSolution, occurrencesOfCharacterInPattern.get(i), candidatePair))
newCandidateSolution.put(occurrencesOfCharacterInPattern.get(i), candidatePair);
else
break iteratorLoop;
}
if (isMatch(pattern, searchString, newCandidateSolution,newMappedSets))
return true;
}
}
return false;
}
private static boolean makesSenseToInsert(TreeMap<Integer, Pair<Integer, Integer>> newCandidateSolution, Integer startIndex, Pair<Integer, Integer> candidatePair) {
if(newCandidateSolution.size() == 0)
return true;
if(newCandidateSolution.floorEntry(startIndex).getValue().getRight() > candidatePair.getLeft())
return false;
Map.Entry<Integer, Pair<Integer, Integer>> ceilingEntry = newCandidateSolution.ceilingEntry(startIndex);
if(ceilingEntry !=null)
if(ceilingEntry.getValue().getLeft() < candidatePair.getRight())
return false;
return true;
}
private static boolean isValidSolution( Map<Integer, Pair<Integer, Integer>> candidateSolution,String searchString, char [] pattern, Set<String> mappedStrings){
List<Pair<Integer,Integer>> values = Lists.newArrayList(candidateSolution.values());
return areIntegersConsecutive(Lists.newArrayList(candidateSolution.keySet())) &&
arePairsConsecutive(values) &&
values.get(values.size() - 1).getRight() == searchString.length() &&
patternsAreUnique(pattern,mappedStrings);
}
private static boolean patternsAreUnique(char[] pattern, Set<String> mappedStrings) {
Set<Character> uniquePatterns = Sets.newHashSet();
for(Character character:pattern)
uniquePatterns.add(character);
return uniquePatterns.size() == mappedStrings.size();
}
private static List<Integer> getNextUnmappedPatternOccurrences(Map<Integer, Pair<Integer, Integer>> candidateSolution, char[] searchArray){
List<Integer> allMappedIndexes = Lists.newLinkedList(candidateSolution.keySet());
if(allMappedIndexes.size() == 0){
return occurrencesOfCharacterInArray(searchArray,searchArray[0]);
}
if(allMappedIndexes.size() == searchArray.length){
return Lists.newArrayList();
}
for(int i = 0; i < allMappedIndexes.size()-1; i++){
if(!areIntegersConsecutive(allMappedIndexes.get(i),allMappedIndexes.get(i+1))){
return occurrencesOfCharacterInArray(searchArray,searchArray[i+1]);
}
}
List<Integer> listOfNextUnmappedPattern = Lists.newArrayList();
listOfNextUnmappedPattern.add(allMappedIndexes.size());
return listOfNextUnmappedPattern;
}
private static String substring(String string, Pair<Integer,Integer> bounds){
try{
string.substring(bounds.getLeft(),bounds.getRight());
}catch (StringIndexOutOfBoundsException e){
System.out.println();
}
return string.substring(bounds.getLeft(),bounds.getRight());
}
private static List<Pair<Integer, Integer>> sectionsOfUnmappedStrings(String searchString, Map<Integer, Pair<Integer, Integer>> candidateSolution) {
if(candidateSolution.size() == 0) {
return Lists.newArrayList(Pair.of(0, searchString.length()));
}
List<Pair<Integer, Integer>> sectionsOfUnmappedStrings = Lists.newArrayList();
List<Pair<Integer,Integer>> allMappedPairs = Lists.newLinkedList(candidateSolution.values());
// Dont have to worry about the first index being mapped because of the way the first candidate solution is made
for(int i = 0; i < allMappedPairs.size() - 1; i++){
if(!arePairsConsecutive(allMappedPairs.get(i), allMappedPairs.get(i + 1))){
Pair<Integer,Integer> candidatePair = Pair.of(allMappedPairs.get(i).getRight(), allMappedPairs.get(i + 1).getLeft());
sectionsOfUnmappedStrings.add(candidatePair);
}
}
Pair<Integer,Integer> lastMappedPair = allMappedPairs.get(allMappedPairs.size() - 1);
if(lastMappedPair.getRight() != searchString.length()){
sectionsOfUnmappedStrings.add(Pair.of(lastMappedPair.getRight(),searchString.length()));
}
return sectionsOfUnmappedStrings;
}
public static boolean areIntegersConsecutive(List<Integer> integers){
for(int i = 0; i < integers.size() - 1; i++)
if(!areIntegersConsecutive(integers.get(i),integers.get(i+1)))
return false;
return true;
}
public static boolean areIntegersConsecutive(int left, int right){
return left == (right - 1);
}
public static boolean arePairsConsecutive(List<Pair<Integer,Integer>> pairs){
for(int i = 0; i < pairs.size() - 1; i++)
if(!arePairsConsecutive(pairs.get(i), pairs.get(i + 1)))
return false;
return true;
}
public static boolean arePairsConsecutive(Pair<Integer, Integer> left, Pair<Integer, Integer> right){
return left.getRight() == right.getLeft();
}
public static List<Integer> occurrencesOfCharacterInArray(char[] searchArray, char searchCharacter){
assert(searchArray.length>0);
List<Integer> occurrences = Lists.newLinkedList();
for(int i = 0;i<searchArray.length;i++){
if(searchArray[i] == searchCharacter)
occurrences.add(i);
}
return occurrences;
}
public static List<Pair<Integer,Integer>> findAllInstancesOfSubstringInString(String searchString, String substring, Pair<Integer,Integer> bounds){
String string = substring(searchString,bounds);
assert(StringUtils.isNoneBlank(substring,string));
int lastIndex = 0;
List<Pair<Integer,Integer>> listOfOccurrences = Lists.newLinkedList();
while(lastIndex != -1){
lastIndex = string.indexOf(substring,lastIndex);
if(lastIndex != -1){
int newIndex = lastIndex + substring.length();
listOfOccurrences.add(Pair.of(lastIndex + bounds.getLeft(), newIndex + bounds.getLeft()));
lastIndex = newIndex;
}
}
return listOfOccurrences;
}
}
它适用于提供的案例,但未经彻底测试。如果有任何错误,请告诉我。
原始回复:
假设您正在搜索的字符串可以具有任意长度的标记(您的一些示例确实如此),那么:
您想开始尝试将字符串分解为与模式匹配的部分。沿途寻找矛盾以减少您的搜索树。
当您开始处理时,您将选择字符串开头的 N 个字符。现在,去看看你是否可以在字符串的其余部分找到那个子字符串。如果你不能,那么它不可能是一个解决方案。如果可以,那么您的字符串看起来像这样
(N 个字符)<...>[(N 个字符)<...>] 其中任一 <...> 包含 0+ 个字符并且不一定是相同的子字符串。 [] 里面的内容可以重复的次数等于字符串中出现的次数(N 个字符)。
现在,您已经匹配了模式的第一个字母,您不确定模式的其余部分是否匹配,但是您基本上可以重新使用此算法(经过修改)来询问 <...> 部分细绳。
当 N = 1,2,3,4...
有意义吗?
我将举一个例子(它不涵盖所有情况,但希望能说明)注意,当我指代模式中的子字符串时,我将使用单引号,而当我指的是字符串我将使用双引号。
isMatch("ababac", "bluegreenbluegreenbluewhite")
好的,'a' 是我的第一个模式。
对于 N = 1,我得到字符串“b”
搜索字符串中的“b”在哪里?
bluegreenbluegreenbluewhite。
好的,所以此时此字符串可能与“b”匹配,即模式“a”。让我们看看我们是否可以对模式“b”做同样的事情。从逻辑上讲,“b”必须是整个字符串“luegreen”(因为它被压缩在两个连续的“a”模式之间)然后我在第二个和第三个“a”之间检查。是的,它的“绿光”。
好的,到目前为止,我已经匹配了除“c”之外的所有模式。简单的情况,它的其余部分。匹配。
这基本上是在编写一个 Perl 正则表达式解析器。 ababc = (.+)(.+)(\1)(\2)(.+)。所以你只需要将它转换为 Perl 正则表达式