# KMP(Knuth-Morris-Pratt)算法
通常用来解决单个字符串快速匹配问题(时间复杂度为)
通过初始化 fail 数组使得匹配失败时按 fail 数组回退即可无需重复匹配
扩展:
- 扩展 kmp 算法(Extended KMP)
# AC 自动机 (Aho–Corasick Automata)
- 多个字符串快速匹配,是 trie(字典树)和 kmp 的结合
- 其主要流程如下:
- 选取待匹配字符串构建 trie
- 利用 BFS 构建 fail 指针(即匹配失败的回退点)
472. 连接词 - 力扣(LeetCode) (leetcode-cn.com)
class TrieNode{ | |
public: | |
TrieNode(int len,char value,int originIndex):fail(nullptr),value(value),len(len),originIndex(originIndex){ | |
} | |
unordered_map<char,TrieNode*> children; | |
int len=0; | |
char value; | |
int originIndex; | |
TrieNode * fail; | |
}; | |
class Trie{ | |
private: | |
TrieNode * root; | |
public: | |
Trie():root(new TrieNode(0,' ',-1)){ | |
} | |
void emplace(const string & s,int index){ | |
auto p = root; | |
auto ptr = 0; | |
while (ptr<s.size()){ | |
if (p->children.find(s[ptr])==p->children.end()){ | |
p->children[s[ptr]] = new TrieNode(-(ptr+1),s[ptr],-1); | |
} | |
p=p->children[s[ptr++]]; | |
} | |
p->len = s.size(); | |
p->originIndex = index; | |
} | |
void buildACAutomata(){ | |
queue<TrieNode *> q; | |
for (auto const &[key,value]:root->children){ | |
q.emplace(value); | |
value->fail = root; | |
} | |
while (!q.empty()){ | |
auto cur = q.front(); | |
q.pop(); | |
for (auto const &[key,value]:cur->children){ | |
auto failto = cur->fail; | |
while (true){ | |
if (failto==nullptr){ | |
value->fail = root; | |
break; | |
}else if (failto->children.find(key)!=failto->children.end() && value->value==key){ | |
value->fail = failto->children[key]; | |
break; | |
}else{ | |
failto=failto->fail; | |
} | |
}; | |
q.emplace(value); | |
} | |
} | |
} | |
bool match(const string &s,int index){ | |
if (s.size()==0) return false; | |
vector<int> matchSize(s.size()); | |
auto p = root; | |
for (auto i=0;i<s.size();++i){ | |
// 下一个节点不匹配,则需要回退 | |
if (p->children.find(s[i])==p->children.end() || p->children[s[i]]->originIndex==index){ | |
if (p->fail==nullptr){ // 无法退回,返回 false | |
return false; | |
}else{ | |
p=p->fail; // 回退一个节点 | |
while (p!=nullptr && (p->children.find(s[i])==p->children.end() || matchSize[i-abs(p->len)-1]==0)){ | |
// 判断回退后是否可匹配,否则继续回退 | |
p=p->fail; | |
} | |
if (p==nullptr) return false; | |
} | |
}; | |
p=p->children[s[i]]; | |
if (p==nullptr) return false; | |
if (p->len>0){ | |
matchSize[i] = p->len; | |
} | |
auto fa = p->fail; | |
while (fa!=root){ | |
if (fa->len>0 && matchSize[i-fa->len]>0){ | |
matchSize[i] = fa->len; | |
break; | |
} | |
fa=fa->fail; | |
} | |
} | |
auto last = matchSize.size()-1; | |
return matchSize[last]>0; | |
} | |
}; | |
class Solution { | |
public: | |
vector<string> findAllConcatenatedWordsInADict(vector<string>& words) { | |
Trie trie; | |
for (auto i=0;i<words.size();++i){ | |
trie.emplace(words[i],i); | |
} | |
trie.buildACAutomata(); | |
vector<string> ans; | |
for (auto i=0;i<words.size();++i){ | |
if (trie.match(words[i],i)){ | |
ans.emplace_back(words[i]); | |
} | |
} | |
return ans; | |
} | |
}; |