# KMP(Knuth-Morris-Pratt)算法
通常用来解决单个字符串快速匹配问题(时间复杂度为)
通过初始化 fail 数组使得匹配失败时按 fail 数组回退即可无需重复匹配
扩展:
- 扩展 kmp 算法(Extended KMP)
 
# AC 自动机 (Aho–Corasick Automata)
- 多个字符串快速匹配,是 trie(字典树)和 kmp 的结合
 - 其主要流程如下:
- 选取待匹配字符串构建 trie
 - 利用 BFS 构建 fail 指针(即匹配失败的回退点)
 
 
472. 连接词 - 力扣(LeetCode) (leetcode-cn.com)
class TrieNode{  | |
public:  | |
TrieNode(int len,char value,int originIndex):fail(nullptr),value(value),len(len),originIndex(originIndex){  | |
        } | |
unordered_map<char,TrieNode*> children;  | |
int len=0;  | |
char value;  | |
int originIndex;  | |
TrieNode * fail;  | |
};  | |
class Trie{  | |
private:  | |
TrieNode * root;  | |
public:  | |
Trie():root(new TrieNode(0,' ',-1)){  | |
        } | |
void emplace(const string & s,int index){  | |
auto p = root;  | |
auto ptr = 0;  | |
while (ptr<s.size()){  | |
if (p->children.find(s[ptr])==p->children.end()){  | |
p->children[s[ptr]] = new TrieNode(-(ptr+1),s[ptr],-1);  | |
                } | |
p=p->children[s[ptr++]];  | |
            } | |
p->len = s.size();  | |
p->originIndex = index;  | |
        } | |
void buildACAutomata(){  | |
queue<TrieNode *> q;  | |
for (auto const &[key,value]:root->children){  | |
q.emplace(value);  | |
value->fail = root;  | |
            } | |
while (!q.empty()){  | |
auto cur = q.front();  | |
q.pop();  | |
for (auto const &[key,value]:cur->children){  | |
auto failto = cur->fail;  | |
while (true){  | |
if (failto==nullptr){  | |
value->fail = root;  | |
break;  | |
}else if (failto->children.find(key)!=failto->children.end() && value->value==key){  | |
value->fail = failto->children[key];  | |
break;  | |
}else{  | |
failto=failto->fail;  | |
                        } | |
};  | |
q.emplace(value);  | |
                }    | |
            } | |
        } | |
bool match(const string &s,int index){  | |
if (s.size()==0) return false;  | |
vector<int> matchSize(s.size());  | |
auto p = root;  | |
for (auto i=0;i<s.size();++i){  | |
                // 下一个节点不匹配,则需要回退 | |
if (p->children.find(s[i])==p->children.end() || p->children[s[i]]->originIndex==index){  | |
if (p->fail==nullptr){ // 无法退回,返回 false  | |
return false;  | |
}else{  | |
p=p->fail; // 回退一个节点  | |
while (p!=nullptr && (p->children.find(s[i])==p->children.end() || matchSize[i-abs(p->len)-1]==0)){  | |
                            // 判断回退后是否可匹配,否则继续回退 | |
p=p->fail;  | |
                        } | |
if (p==nullptr) return false;  | |
                    } | |
};  | |
p=p->children[s[i]];  | |
if (p==nullptr) return false;  | |
if (p->len>0){  | |
matchSize[i] = p->len;  | |
                } | |
auto fa = p->fail;  | |
while (fa!=root){  | |
if (fa->len>0 && matchSize[i-fa->len]>0){  | |
matchSize[i] = fa->len;  | |
break;  | |
                    } | |
fa=fa->fail;  | |
                } | |
            } | |
auto last = matchSize.size()-1;  | |
return matchSize[last]>0;  | |
        } | |
};  | |
class Solution {  | |
public:  | |
vector<string> findAllConcatenatedWordsInADict(vector<string>& words) {  | |
        Trie trie; | |
for (auto i=0;i<words.size();++i){  | |
trie.emplace(words[i],i);  | |
        } | |
trie.buildACAutomata();  | |
vector<string> ans;  | |
for (auto i=0;i<words.size();++i){  | |
if (trie.match(words[i],i)){  | |
ans.emplace_back(words[i]);  | |
            } | |
        } | |
return ans;  | |
    } | |
};  |