前缀树

前缀树概念

前缀树实际上就是一种N叉树,也被称为字典树。从名字可以看出,字典树就是用来做类似于字典查找的一种数据结构,在搜索引擎中,可以用来做字符匹配,以及字符前缀相关的操作。

典型的前缀树结构如下所示:

数据结构定义如下所示:

1
2
3
4
5
class TreeNode {
public:
int val = -1; // 如果为-1,则表示非词根,如果是大于0的数,表明这个是词根,也就是key
vector<TreeNode*> children;
};

我们用children中的每个指针的索引来表示各个字符,如果限定为小写字母,则TreeNode[26], 如果限定为ASCII字符,则为TreeNode[128]。

有了前缀树的概念,我们看下前缀树可以有哪些操作。

前缀树的操作

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
// 下面的代码有些细节是不好的,比如public成员,以及在构造函数中new。这里我们先忽略这些问题
#include "string"
#include "vector"
class TreeNode {
public:
int val = -1; // 如果为-1,则表示非词根,如果是大于0的数,表明这个是词根,也就是key
vector<TreeNode*> children;
};

class Trie {
public:
Trie(int size) : childrenSize(size) {
root = new TreeNode();
root->children.resize(size);
}

// 获取匹配的key的位置
TreeNode* getNode(TreeNode* root, string key);

// 添加key以及相应的节点值
void put(string key, int val);
// 删除key
void remove(string key);
// 获取key对应的节点值
int get(string key);
// 判断前缀树中是否包含节点key
bool containKey(string key);

// 在前缀树中搜索query的最短前缀
string shortestPrefixOf(string query);

// 在前缀树中搜索query的最长前缀
string longestPrefixOf(string query);

// 在前缀树中搜索前缀为query的所有key
vector<string> keyWithPrefix(string prefix);

// 判断是否存在前缀为prefix的健
bool hasKeyWithPrefix(string prefix);

// 查找所有符合通配符.的key
vector<string> keyWithPattern(string pattern);

// 判断是否存在符合通配符.的key
bool hasKeyWithPattern(string pattern);

void dfs(TreeNode* node, string path, vector<string> ret);
void dfs(TreeNode* node, string path, string pattern, int i, vector<string> ret);
TreeNode* put(TreeNode* node, string key, int val, int i);
TreeNode* remove(TreeNode* node, string key, int i);
bool hasKeyWithPattern(TreeNode* node, string pattern, int i);

private:
TreeNode* root;
int childrenSize;
int trieSize;
};

前缀树操作的代码实现

这个函数用来获取指定key的末端的位置,如果不存在key,则返回nullptr

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
// 获取匹配的key的位置
TreeNode* Trie::getNode(TreeNode* root, string key) {
TreeNode* p = root;
for (int i = 0; i < key.length(); ++i) {
if (p == nullptr) {
return nullptr;
}
char c = key[i];
p = p->children[c - 'a']; // 这里我们假定只是小写字母
}
return p;
}

// 获取key对应的节点值
int Trie::get(string key) {
TreeNode* node = getNode(root, key);
if (node == nullptr || node->val == -1) {
return -1;
}
return node->val;
}

1
2
3
4
// 判断前缀树中是否包含节点key
bool Trie::containKey(string key) {
return get(key) != -1;
}
1
2
3
4
// 判断是否存在前缀为prefix的健
bool Trie::hasKeyWithPrefix(string prefix) {
return getNode(root, prefix) != nullptr;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
// 在前缀树中搜索query的最短前缀
string Trie::shortestPrefixOf(string query) {
TreeNode* p = root;
for (int i = 0; i < query.length(); ++i) {
if (p == nullptr) {
return "";
}
if (p->val != -1) {
return query.substr(0, i);
}
p = p->children[query[i]];
}
if (p != nullptr && p->val != -1) {
return query;
}
return "";
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
// 在前缀树中搜索query的最长前缀
string Trie::longestPrefixOf(string query) {
TreeNode* p = root;
int maxLen = 0;
for (int i = 0; i < query.length(); ++i) {
if (p == nullptr) {
break;
}
if (p->val != -1) {
maxLen = i;
}
p = p->children[query[i]];
}

if (p != nullptr && p->val != -1) {
return query;
}

return query.substr(0, maxLen);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
// 在前缀树中搜索前缀为query的所有key
vector<string> Trie::keyWithPrefix(string prefix) {
vector<string> ret;
TreeNode* curr = getNode(root, prefix);
if (curr == nullptr) {
return ret;
}
dfs(curr, prefix, ret);
return ret;
}

void Trie::dfs(TreeNode* node, string path, vector<string> ret) {
if (node == nullptr) {
return;
}
if (node->val != -1) {
ret.push_back(path);
}
for (int i = 0; i < childrenSize; ++i) {
path.push_back('a' + i);
dfs(node->children[i], path, ret);
path.pop_back();
}
}
1
2
3
4
// 判断是否存在前缀为prefix的key
bool Trie::hasKeyWithPrefix(string prefix) {
return getNode(root, prefix) != nullptr;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
// 查找所有符合通配符.的key
vector<string> Trie::keyWithPattern(string pattern) {
vector<string> ret;
dfs(root, "", pattern, 0, ret);
}

void Trie::dfs(TreeNode* node, string path, string pattern, int i, vector<string> ret) {
if (node == nullptr) {
return;
}

if (i == pattern.length()) {
if (node->val != -1) {
ret.push_back(path);
}
}

char c = pattern[i];
if (c == '.') {
// 通配所有的节点
for (int j = 0; j < childrenSize; ++j) {
path.push_back('a' + j);
dfs(node->children[j], path, pattern, i + 1, ret);
path.pop_back();
}
}
else {
path.push_back(c);
dfs(node->children[c - 'a'], path, pattern, i + 1, ret);
path.pop_back();
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
// 判断是否存在符合通配符.的key
bool Trie::hasKeyWithPattern(string pattern) {
return hasKeyWithPattern(root, pattern, i);
}

bool Trie::hasKeyWithPattern(TreeNode* node, string pattern, int i) {
if (node == nullptr) {
return false;
}
if (i == pattern.length()) {
return node->val != -1; // 表明这是一个key
}

char c = pattern[i];

if (c != '.') {
return hasKeyWithPattern(node->children[c - 'a'], pattern, i + 1);
}

for (int j = 0; j < childrenSize; ++j) {
if (hasKeyWithPattern(node->children[j], pattern, i + 1)) {
return true;
}
}

return false;
}
1
2
3
4
5
6
void Trie::put(string key, int val) {
if (!containKey(key)) {
trieSize++;
}
root = put(root, key, val, 0);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
TreeNode* Trie::put(TreeNode* node, string key, int val, int i) {
if (node == nullptr) {
node = new TreeNode();
}

if (i == key.length()) {
node->val = val;
return node;
}

char c = key[i];
node->children[c] = put(node->children[c], key, val, i + 1);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
// 删除key
void Trie::remove(string key) {
if (!containKey(key)) {
return;
}
root = remove(root, key, 0);
trieSize--;
}

TreeNode* Trie::remove(TreeNode* node, string key, int i) {
if (node == nullptr) {
return nullptr;
}

if (i == key.length()) {
// 找到key对应的最后节点,设置为-1
node->val = -1;
}
else {
char c = key[i];
node->children[c] = remove(node->children[c], key, i + 1);
}

if (node->val != -1) {
// 不等于-1表示这有可能是其他的key的最后一个节点
return node;
}

for (int i = 0; i < childrenSize; ++i) {
if (node->children[i] != nullptr) {
// 表明还有后缀节点,那么就不应该被清理
return node;
}
}
// 表明节点要被清理,所以设置为nullptr
return nullptr;
}

leetcode例题

208. 实现 Trie (前缀树)

这个题目很简单,只是要求实现insert/search/startsWith三个函数,比较简单。甚至不需要用到上面的一整套逻辑

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
struct Node {
int val = -1;
vector<Node*> children;
};

class Trie {
public:
Trie() {
root = new Node();
root->children.resize(26);
}

void insert(string word) {
Node* p = root;
for (int i = 0; i < word.length(); ++i) {
if (p->children[word[i] - 'a'] == nullptr) {
p->children[word[i] - 'a'] = new Node();
p->children[word[i] - 'a']->children.resize(26);
}
p = p->children[word[i] - 'a'];
}
if (p != nullptr) {
p->val = 1;
}
}

bool search(string word) {
Node* p = root;
for (int i = 0; i < word.length(); ++i) {
if (p == nullptr || p->children[word[i] - 'a'] == nullptr) {
return false;
}
p = p->children[word[i] - 'a'];
}
if (p == nullptr) {
return false;
}
if (p->val == -1) {
return false;
}
return true;
}

bool startsWith(string prefix) {
Node* p = root;
for (int i = 0; i < prefix.length(); ++i) {
if (p == nullptr || p->children[prefix[i] - 'a'] == nullptr) {
return false;
}
p = p->children[prefix[i] - 'a'];
}
return true;
}
private:
Node* root;
};

211. 添加与搜索单词 - 数据结构设计

这个题目跟上面的内容很相似,只是多了一个通配符’.’的处理。但是leetcode这道题的官方的超时时间设置的真的是很无语。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
struct Node {
bool val = false;
vector<Node*> children;
Node() {
this->children = vector<Node *>(26,nullptr);
}
};

class WordDictionary {
public:
WordDictionary() {
root = new Node();
}

void addWord(string word) {
Node* p = root;
for (int i = 0; i < word.length(); ++i) {
if (p->children[word[i] - 'a'] == nullptr) {
p->children[word[i] - 'a'] = new Node();
}
p = p->children[word[i] - 'a'];
}
p->val = true;
}

bool search(string word) {
return search(root, word, 0);
}

bool search(Node* node, string word, int index) {

if (index == word.length()) {
return node->val;
}

char c = word[index];
if (c == '.') {
for (int j = 0; j < 26; ++j) {
Node* child = node->children[j];
if (child && search(child, word, index + 1)) {
return true;
}
}
return false;
} else {
Node* child = node->children[c - 'a'];
return child && search(child, word, index + 1);
}
}
private:
Node* root;
};

648. 单词替换

这个题目就稍微有一些复杂了,但是也不是很难,只是需要预先构造前缀树,然后分割字符串,然后从前缀树中拿到最短的前缀,然后再输出即可。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
class TrieNode {
public:
TrieNode () {
this->val = false;
this->children.resize(26, nullptr);
}
bool val;
vector<TrieNode*> children;
};

class Trie {
public:
Trie () {
root = new TrieNode();
}
void insert(string word) {
TrieNode* p = root;
for (int i = 0; i < word.length(); ++i) {
if (p->children[word[i] - 'a'] == nullptr) {
p->children[word[i] - 'a'] = new TrieNode();
}
p = p->children[word[i] - 'a'];
}
p->val = true;
}

string searchShortestPrefix(string word) {
TrieNode* p = root;
for (int i = 0; i < word.length(); ++i) {
if (p == nullptr) {
return "";
}
if (p->val) {
return word.substr(0, i);
}
p = p->children[word[i] - 'a'];
}
if (p != nullptr && p->val ) {
return word;
}
return "";
}
private:
TrieNode* root;
};

class Solution {
public:
string replaceWords(vector<string>& dictionary, string sentence) {
Trie* trie = new Trie();
for (int i = 0; i < dictionary.size(); ++i) {
trie->insert(dictionary[i]);
}

vector<string> words;
int start = 0;
for (int i = 0; i < sentence.length(); ++i) {
if (isspace(sentence[i])) {
words.push_back(sentence.substr(start, i - start));
start = i + 1;
}
}
words.push_back(sentence.substr(start, sentence.length() - start));

vector<string> wordsNew;
for (int i = 0; i < words.size(); ++i) {
string rootWord = trie->searchShortestPrefix(words[i]);
if (rootWord.empty()) {
wordsNew.push_back(words[i]);
} else {
wordsNew.push_back(rootWord);
}

}

string ret;
for (int i = 0; i < wordsNew.size(); ++i) {
ret += wordsNew[i];
ret += " ";
}
ret.pop_back();
return ret;
}
};

677. 键值映射

这个题目本质上就是构造前缀树,只不过需要再search的时候,添加一点点小变形而已。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
class TrieNode {
public:
TrieNode () {
this->val = 0;
this->children.resize(26, nullptr);
}
int val;
vector<TrieNode*> children;
};

class MapSum {
public:
MapSum() {
root = new TrieNode();
}

void insert(string key, int val) {
TrieNode* p = root;
for (int i = 0; i < key.length(); ++i) {
if (p->children[key[i] - 'a'] == nullptr) {
p->children[key[i] - 'a'] = new TrieNode();
}
p = p->children[key[i] - 'a'];
}
p->val = val;
}

TrieNode* getNode(string key) {
TrieNode* p = root;
for (int i = 0; i < key.length(); ++i) {
if (p == nullptr) {
return nullptr;
}
char c = key[i];
p = p->children[c - 'a'];
}
return p;
}

int sum(string prefix) {
TrieNode* node = getNode(prefix);
int ret = 0;
dfs(node, &ret);
return ret;
}

void dfs(TrieNode* node, int* ret) {
if (node == nullptr) {
return;
}
if (node->val != 0) {
*ret += node->val; // 这就是小变形,就这一行
}

for (int i = 0; i < 26; i++) {
if (node != nullptr && node->children[i] != nullptr) {
dfs(node->children[i], ret);
}
}
}
private:
TrieNode* root;
};
显示 Gitment 评论