【九章算法强化班】课程笔记2——Trie树

Trie树

leetcode相关题目

Trie字典树

源自单词:retrieve

Trie树,即字典树/前缀树,又称单词查找树或键树,是一种树形结构。典型应用是用于统计和排序大量的字符串(但不仅限于字符串),所以经常被搜索引擎系统用于文本词频统计。它的优点是最大限度地减少无谓的字符串比较,查询效率比较高。

假设有[b,abc,abd,bcd,abcd,efg,hii ]这6个单词 , 查找abc 在不在字典里面

将单词插入Trie树,只在跟之前的字符串出现分歧时分裂,对最后一个字母做标记,这样查找的时候,根据最后一个字母的标记,即可判断出该单词是否出现过。

这里有一个巧妙的操作,可以让插入和查询操作同时完成,所以查询的时间复杂度简化为所要查询的单词的长度,即

它有3个基本性质:

  1. 根节点不包含字符,除根节点外每一个节点都只包含一个字符。
  2. 从根节点到某一节点,路径上经过的字符连接起来,为该节点对应的字符串。
  3. 每个节点的所有子节点包含的字符都不相同。

hash和trie的比较

hash_table TIRE树
查找时间复杂度 O(1) O(1)
空间复杂度 优于hash_table

对于a,aa,aaa,aaaa的情况

hash trie
存储 10个a 5个a节点
可用操作 有/无/查询 有/无/前缀查询
1行 75~100行

所以选择hash原因是代码量小, 但是涉及到前缀查询的时候, 考虑trie树

什么时候更适合用trie树

一个一个字符串遍历的时候。

需要节约空间

查找前缀

Trie模板

有两种方式来实现Trie树,对于存储char类型的Trie树,因为只有26个字母,故可采用映射的方式将字母映射到长度为26的数组上,而下标就是字母。

而对于其他类型,比如int数目未知,可以考虑用hashmap的方式来实现。

1. hashmap实现Trie树

c++版:

#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <algorithm>
#include <vector>
#include <string>
#include <queue>
#include <math.h>
#include <unordered_map>
#include <queue>
#include <functional>
#include <unordered_set>
using namespace std;
class TrieNode {
public:
char ch;
bool istail;
unordered_map<char, TrieNode*>* childern;
TrieNode() {
childern = new unordered_map<char, TrieNode*>();
//childern = NULL;
istail = false;
}
TrieNode(char c) {
ch = c;
childern = new unordered_map<char, TrieNode*>();
//childern = NULL;
istail = false;
}
};
class Trie {
private:
TrieNode* root;
public:
/** Initialize your data structure here. */
Trie() {
root = new TrieNode();
}
/** Inserts a word into the trie. */
void insert(string word) {
TrieNode* node = this->root;
for (int i = 0; i < word.size(); i++) {
char chtemp = word[i];
unordered_map<char, TrieNode*>* childrenmap = node->childern;
if (!(*childrenmap).empty()) {
unordered_map<char, TrieNode*>::iterator iter;
iter = (*childrenmap).find(chtemp);
if (iter != (*childrenmap).end()) {//包含此字母
node = iter->second;
}
else {
TrieNode* newnode = new TrieNode(chtemp);
(*childrenmap).insert(make_pair(chtemp, newnode));
node = newnode;
}
}
else {
unordered_map<char, TrieNode*> newmap;
TrieNode *newnode = new TrieNode(chtemp);
(*childrenmap).insert(make_pair(chtemp, newnode));
node = newnode;
}
}
node->istail = true;
}
/** Returns if the word is in the trie. */
bool search(string word) {
if (word.size() == 0) {
return false;
}
TrieNode* node = root;
for (int i = 0; i < word.size(); i++) {
char chtemp = word[i];
unordered_map<char, TrieNode*>* childrenmap = node->childern;
if (!(*childrenmap).empty()) {
unordered_map<char, TrieNode*>::iterator iter;
iter = (*childrenmap).find(chtemp);
if (iter != (*childrenmap).end()) {//包含此字母
node = iter->second;
}
else {//不包含此字母
return false;
}
}
else {//children为空
return false;
}
}
if (node->ch == word[word.size() - 1] && node->istail) {
return true;
}
else return false;
}
/** Returns if there is any word in the trie that starts with the given prefix. */
bool startsWith(string prefix) {
TrieNode* node = root;
for (int i = 0; i < prefix.size(); i++) {
char chtemp = prefix[i];
unordered_map<char, TrieNode*>* childrenmap = node->childern;
if (!(*childrenmap).empty()) {
unordered_map<char, TrieNode*>::iterator iter;
iter = (*childrenmap).find(chtemp);
if (iter != (*childrenmap).end()) {//包含此字母
node = iter->second;
}
else {//不包含此字母
return false;
}
}
else {//children空
return false;
}
}
return true;
}
};

java版本:

class TrieNode{
HashMap<Character,TrieNode> children;
boolean istail;
TrieNode(){
children = new HashMap<>();
this.istail=false;
}
};
class Trie{
TrieNode root;
Trie(){
root = new TrieNode();
}
public void insert(String word){
TrieNode node = this.root;
for(int i=0;i <word.length();i++) {
char chtemp = word.charAt(i);
if (node.children.containsKey(chtemp)) {//已经包含此字母
} else {//不包含此字母
node.children.put(chtemp, new TrieNode());
}
node = node.children.get(chtemp);
}
node.istail=true;
}
public boolean search(String word){
if(word.isEmpty()){
return false;
}
TrieNode node = this.root;
for(int i = 0 ; i < word.length();i++){
char chtemp = word.charAt(i);
if(node.children.containsKey(chtemp)) {//包含此字母
node = node.children.get(chtemp);
}
else{
return false;
}
}
if(node.istail)
return true;
else
return false;
}
public boolean startsWith(String prefix){
if (prefix.isEmpty()){
return false;
}
TrieNode node = this.root;
for(int i = 0 ; i < prefix.length();i++){
if(node.children.containsKey(prefix.charAt(i))){
node = node.children.get(prefix.charAt(i));
}
else{
return false;
}
}
return true;
}
};

2. 数组实现Trie树

对于char类型的数据,只有26个字母,所以,可以用一个长度为26的数组存储后续的节点,数组index对应的就是字母的顺序:a~z

class Trie {
class TrieNode{
TrieNode[] children;
boolean istail;//记录是否是某个单词的末尾
public TrieNode(){
this.children = new TrieNode[26];
this.istail=false;
}
}
public TrieNode root;
public Trie(){
this.root = new TrieNode();
}
public void insert(String word){
TrieNode node = this.root;
for(int i = 0 ;i < word.length();i++){
if(node.children[word.charAt(i)-'a'] == null){//如果有这个字母
node.children[word.charAt(i)-'a'] = new TrieNode();
}
node = node.children[word.charAt(i)-'a'];
}
node.istail=true;
}
public boolean search(String word){
TrieNode node = this.root;
for(int i = 0 ;i < word.length();i++){
if(node.children[word.charAt(i)-'a']==null){//如果没有这个字母
return false;
}
else {
node = node.children[word.charAt(i)-'a'];
}
}
if(node.istail) return true;
else return false;
}
public boolean startsWith(String prefix){
TrieNode node = this.root;
for(int i = 0 ;i < prefix.length();i++){
if(node.children[prefix.charAt(i)-'a']==null){//如果没有这个字母
return false;
}
else {
node = node.children[prefix.charAt(i)-'a'];
}
}
return true;
}
}

leetcode相关题目

Add and Search Word - Data structure design

题目

Design a data structure that supports the following two operations:

> void addWord(word)
> bool search(word)
>
>

>

search(word) can search a literal word or a regular expression string containing only letters a-z or .. A . means it can represent any one letter.

For example:

> addWord("bad")
> addWord("dad")
> addWord("mad")
> search("pad") -> false
> search("bad") -> true
> search(".ad") -> true
> search("b..") -> true
>

思路

利用Tire树,搜索时遇到”.”对所有节点进行DFS

代码

class WordDictionary {
class TrieNode{
TrieNode[] children;
boolean istail;//记录是否是某个单词的末尾
public TrieNode(){
this.children = new TrieNode[26];
this.istail=false;
}
}
public TrieNode root;
/** Initialize your data structure here. */
public WordDictionary() {
this.root = new TrieNode();
}
/** Adds a word into the data structure. */
public void addWord(String word) {
TrieNode node = this.root;
for(int i = 0 ;i < word.length();i++){
if(node.children[word.charAt(i)-'a'] == null){//如果有这个字母
node.children[word.charAt(i)-'a'] = new TrieNode();
}
node = node.children[word.charAt(i)-'a'];
}
node.istail=true;
}
public boolean searchHelper(String word,int startIdx,TrieNode node){
for(int i = startIdx ;i < word.length();i++){
if(word.charAt(i) == '.'){
for(int j = 0; j < 26;j++) {
if (node.children[j] != null) {
//TrieNode nodetemp = node.children[j];
boolean has = searchHelper(word, i + 1, node.children[j]);
if (has) return true;
else continue;
}
}
return false;
}
else if(node.children[word.charAt(i)-'a']==null){//如果没有这个字母
return false;
}
else {
node = node.children[word.charAt(i)-'a'];
}
}
if(node.istail) return true;
else return false;
}
/** Returns if the word is in the data structure. A word could contain the dot character '.' to represent any one letter. */
public boolean search(String word) {
return searchHelper(word,0,this.root);
}
}
/**
* Your WordDictionary object will be instantiated and called as such:
* WordDictionary obj = new WordDictionary();
* obj.addWord(word);
* boolean param_2 = obj.search(word);
*/

Map Sum Pairs

题目

Implement a MapSum class with insert, and sum methods.

For the method insert, you’ll be given a pair of (string, integer). The string represents the key and the integer represents the value. If the key already existed, then the original key-value pair will be overridden to the new one.

For the method sum, you’ll be given a string representing the prefix, and you need to return the sum of all the pairs’ value whose key starts with the prefix.

Example 1:

> Input: insert("apple", 3), Output: Null
> Input: sum("ap"), Output: 3
> Input: insert("app", 2), Output: Null
> Input: sum("ap"), Output: 5
>

思路

建立Trie树,将原来的标记是否为单词结尾的bool型属性改为int型权重属性,单词结尾的字母值为该单词的权重值,其余字母权重设为0,找到前缀所在分支之后,DFS该前缀词下面的所有节点,累加权重值。

代码

class MapSum {
class TrieNode{
TrieNode[] children;
int weight;//记录是否是某个单词的末尾
public TrieNode(){
this.children = new TrieNode[26];
this.weight = 0;
}
}
public TrieNode root;
int res = 0;
/** Initialize your data structure here. */
public MapSum() {
this.root = new TrieNode();
}
public void insert(String key, int val) {
TrieNode node = this.root;
for(int i = 0 ;i < key.length();i++){
if(node.children[key.charAt(i)-'a'] == null){//如果有这个字母
node.children[key.charAt(i)-'a'] = new TrieNode();
}
node = node.children[key.charAt(i)-'a'];
}
node.weight=val;
}
public void dfs(TrieNode node){
res = res+node.weight;
for(int i = 0; i < 26;i++){
if(node.children[i]!=null){
dfs(node.children[i]);
}
}
}
public int sum(String prefix) {
TrieNode node = this.root;
for(int i = 0 ;i < prefix.length();i++) {
if (node.children[prefix.charAt(i) - 'a'] == null) {//如果没有这个字母
return 0;
} else {
node = node.children[prefix.charAt(i) - 'a'];
}
}
res=0;
dfs(node);
return res;
}
}
/**
* Your MapSum object will be instantiated and called as such:
* MapSum obj = new MapSum();
* obj.insert(key,val);
* int param_2 = obj.sum(prefix);
*/

Word Search II

题目

Given a 2D board and a list of words from the dictionary, find all words in the board.

Each word must be constructed from letters of sequentially adjacent cell, where “adjacent” cells are those horizontally or vertically neighboring. The same letter cell may not be used more than once in a word.

For example,
Given words = ["oath","pea","eat","rain"] and board =

> [
> ['o','a','a','n'],
> ['e','t','a','e'],
> ['i','h','k','r'],
> ['i','f','l','v']
> ]
>
>

>

Return

> ["eat","oath"]
>

分析

Word Search 1要求我们返回某一个单词是否在矩阵中,Word Search 2作为升级版。给出了一串单词,搜索是否在矩阵中,如果一个一个搜索效率会很低,尤其是在要搜索的单词序列中有大量相同的前缀时,所以,考虑将搜索的单词序列构建Trie树,然后再在字母矩阵中用DFS的方式搜索

然而想到了思路,要想完整地写出这道题,也十分艰难。

这里有几个需要注意的点:

  1. 要将能够搜索到的单词加入最终的结果表中,可以适当修改Trie的结构,在叶子节点存储该条路径对应的word,方便后续找到路径之后将该单词加入结果表

  2. 已经用过的字母不能用第二次,所以要对字母矩阵中遍历过的字母做标记,DFS搜索结束后要恢复标记,后面还可以继续使用。这里我一开始用的方法是额外建立一个boolean型矩阵进行存储,看了大神的代码发现可以直接在原字母矩阵中进行标记即可。

  3. 在矩阵中某点周围寻找下一个字母是否存在时,我一开始采用的方式是:

    在Trie树中遍历下一层node中的字母然后再去字母矩阵中某点周围四个点搜索,这样遍历下一层node判断是否还有字母,每次需要遍历26个字母,看了大神的代码有更好的方式:

    从矩阵当前点周围的四个点入手,获取周围四个点的字母(实际上最多是三个,因为至少已经有一个被访问了),然后再去node中直接获取下层节是否存在该字母即可。

代码

class Solution {
class TrieNode{
TrieNode[] children;
//boolean istail;//记录是否是某个单词的末尾
String word;
public TrieNode(){
this.children = new TrieNode[26];
//this.istail=false;
this.word = null;
}
}
public TrieNode root = new TrieNode();
public void insert(String word){
TrieNode node = this.root;
for(int i = 0 ;i < word.length();i++){
if(node.children[word.charAt(i)-'a'] == null){//如果有这个字母
node.children[word.charAt(i)-'a'] = new TrieNode();
}
node = node.children[word.charAt(i)-'a'];
}
node.word = word;
//node.istail=true;
}
public void searchBoard(char[][] board, TrieNode node,int x_idx,int y_idx,List<String> result){
char ch = board[x_idx][y_idx];
if(ch == '#' || node.children[ch-'a'] == null){
return;
}
node = node.children[ch-'a'];
if(node.word != null){
result.add(node.word);
node.word = null;
}
board[x_idx][y_idx] = '#';
int[] x_delta = {0,0,1,-1};
int[] y_delta = {1,-1,0,0};
for(int i = 0 ;i < 4;i++){
int x = x_idx+x_delta[i];
int y = y_idx+y_delta[i];
if(x>=0 && x< board.length && y >= 0 && y < board[0].length){
searchBoard(board,node,x,y,result);
}
}
board[x_idx][y_idx] = ch;
}
public List<String> findWords(char[][] board, String[] words) {
List<String> result = new ArrayList<String>();
for(int i = 0; i < words.length;i++){
insert(words[i]);
}
for(int j = 0;j < board.length;j++){
for(int k = 0; k < board[0].length;k++){
searchBoard(board,root,j,k,result);
}
}
return result;
}
}