Implement Trie (Prefix Tree)
class TrieNode {
// Initialize your data structure here.
char c;
HashMap<Character, TrieNode> children = new HashMap<Character, TrieNode>();
boolean isLeaf;
public TrieNode() {
}
public TrieNode(char c){
this.c = c;
}
}
public class Trie {
private TrieNode root;
public Trie() {
root = new TrieNode();
}
// Inserts a word into the trie.
public void insert(String word) {
HashMap<Character, TrieNode> children = root.children;
for(int i=0;i<word.length();i++){
TrieNode t;
char c = word.charAt(i);
if(children.containsKey(c)){
t = children.get(c);
}else{
t = new TrieNode(c);
children.put(c,t);
}
children = t.children;
if(i==word.length()-1){
t.isLeaf = true;
}
}
}
// Returns if the word is in the trie.
public boolean search(String word) {
TrieNode t = searchNode(word);
if(t != null && t.isLeaf)
return true;
else
return false;
}
// Returns if there is any word in the trie
// that starts with the given prefix.
public boolean startsWith(String prefix) {
if(searchNode(prefix) == null)
return false;
else
return true;
}
public TrieNode searchNode(String str){
Map<Character, TrieNode> children = root.children;
TrieNode t = null;
for(int i=0; i<str.length(); i++){
char c = str.charAt(i);
if(children.containsKey(c)){
t = children.get(c);
children = t.children;
}else{
return null;
}
}
return t;
}
}
// Your Trie object will be instantiated and called as such:
// Trie trie = new Trie();
// trie.insert("somestring");
// trie.search("key");
Add and Search Word - Data structure design
其实这题如果没有 wildcard,直接用 hashmap 就撸完了。。。因为没有 “前缀查询” 这个可以明显区分 trie 和 hashmap 查询性能的操作。
题目设计者不会这么善罢甘休的,于是他们引入了 wildcard.
第一版写法,自定义新的 search 函数,自带 parent node,默认为 root. 每次遇到 wildcard 就以所有当前节点的分叉为 parent node 去遍历后面的。
然而 TLE 了。。看了一圈论坛发现写法和我的基本一样,不同之处是我用了 HashMap 省空间,他们用了 Array 省时间,于是我 TLE 了,他们却没有 MLE....
改进了一下,避免在 dfs 时用 substring 花费太多时间空间,而只用一个 string copy + pos index.
六个月前写的代码, 处理的不好, 有以下几个问题
TrieNode中不用存char
dfs递归中不要传入children map, 传trie node
终止条件判断, start == word.length()-1 && child.getValue().isLeaf
boolean isLeaf;
char value;
HashMap<Character, TrieNode> children = new HashMap<Character, TrieNode>();
TrieNode(){
}
TrieNode(char c){
value = c;
}
}
public class WordDictionary {
TrieNode root = new TrieNode();
// Adds a word into the data structure.
public void addWord(String word) {
HashMap<Character, TrieNode> children = root.children;
for(int i=0;i<word.length();i++){
if(!children.containsKey(word.charAt(i))){
TrieNode tmp = new TrieNode(word.charAt(i));
children.put(word.charAt(i), tmp);
}
if(i==word.length()-1){
TrieNode tmp = children.get(word.charAt(i));
tmp.isLeaf = true;
}
children = children.get(word.charAt(i)).children;
}
}
// Returns if the word is in the data structure. A word could
// contain the dot character '.' to represent any one letter.
public boolean search(String word) {
return dfs(root.children, word, 0);
}
public boolean dfs(HashMap<Character, TrieNode> children, String word, int start){
if(start >= word.length()) return false;
char c = word.charAt(start);
if(c=='.'){
for(Map.Entry<Character, TrieNode> child: children.entrySet()){
if(start == word.length()-1 && child.getValue().isLeaf)
return true;
if(dfs(child.getValue().children, word, start+1))
return true;
}
return false;
}
if(children.containsKey(c)){
TrieNode t = children.get(c);
if(start == word.length()-1 && t.isLeaf)
return true;
else
return dfs(t.children, word, start+1);
}
else
return false;
}
}
// Your WordDictionary object will be instantiated and called as such:
// WordDictionary wordDictionary = new WordDictionary();
// wordDictionary.addWord("word");
// wordDictionary.search("pattern");
重新写了一个好看的版本, 一次AC
class TrieNode{
HashMap<Character,TrieNode> map = new HashMap<Character, TrieNode>();
boolean isLeaf;
public TrieNode(){}
}
public class WordDictionary {
TrieNode root;
/** Initialize your data structure here. */
public WordDictionary() {
root = new TrieNode();
}
/** Adds a word into the data structure. */
public void addWord(String word) {
TrieNode cur = root;
for(int i=0;i<word.length();i++){
char c = word.charAt(i);
if(!cur.map.containsKey(c)) cur.map.put(c, new TrieNode());
cur = cur.map.get(c);
if(i==word.length()-1) cur.isLeaf = true;
}
}
/** Returns if the word is in the data structure. A word could contain the dot character '.' to represent any one letter. */
public boolean search(String word) {
return dfs(word, 0, root);
}
public boolean dfs(String word, int start, TrieNode cur){
if(start==word.length()) return cur.isLeaf;
char c = word.charAt(start);
if(c=='.'){
for(Character key : cur.map.keySet()){
if(dfs(word, start+1, cur.map.get(key))) return true;
}
return false;
}
if(cur.map.containsKey(c)){
return dfs(word, start+1, cur.map.get(c));
}
return false;
}
}
/**
* Your WordDictionary object will be instantiated and called as such:
* WordDictionary obj = new WordDictionary();
* obj.addWord(word);
* boolean param_2 = obj.search(word);
*/
Word Search II
这题难度是 Hard,一是因为要用到数据结构 trie,另一方面又融合了 Word Search I 里面在矩阵里面搜索的 dfs + backtracking;
- 犯的错误:
- 矩阵里 DFS 搜索要记得把当前格子 mark 成特殊符号,不然会死循环;
- 同一个单词可能在矩阵中出现多次,不要重复添加。
- 因为是先添加新 char 然后直接 dfs 下个位置,并且在递归一开始添加单词,正确的检查位置是在边界检查之前,否则 ["a"] 和 ["a"] 的情况无法正确添加结果,因为 "a" 的四个方向都越界了。
查找多字符串首选就是Trie
public class Solution {
class TrieNode{
boolean isLeaf;
HashMap<Character, TrieNode> children = new HashMap<Character, TrieNode>();
public TrieNode(){
}
}
public TrieNode buildTrie(String[] words){
TrieNode root = new TrieNode();
for(String word : words){
TrieNode mover = root;
for(int i=0;i<word.length();i++){
char c= word.charAt(i);
if(!mover.children.containsKey(c)){
mover.children.put(c, new TrieNode());
}
mover = mover.children.get(c);
if(i==word.length()-1) mover.isLeaf = true; //after mover move, set leaf
}
}
return root;
}
public void dfs(int x, int y, String cur, char[][] board, TrieNode curTrie, List<String> ans){
if(x<0 || x>=board.length || y<0 || y>=board[0].length || board[x][y]=='#')
return;
char c = board[x][y];
if(!curTrie.children.containsKey(c)) return;
TrieNode mover = curTrie.children.get(c);
if(mover.isLeaf){
if(!ans.contains(cur+c))ans.add(cur+c); //key point don't stop here
}
board[x][y] = '#'; //set to be pound
dfs(x-1, y, cur+c, board, mover, ans);
dfs(x+1, y, cur+c, board, mover, ans);
dfs(x, y-1, cur+c, board, mover, ans);
dfs(x, y+1, cur+c, board, mover, ans);
board[x][y] = c;
}
public List<String> findWords(char[][] board, String[] words) {
TrieNode root = buildTrie(words);
List<String> ans = new ArrayList<String>();
for(int i=0;i<board.length;i++)
for(int j=0;j<board[0].length;j++){
dfs(i, j, "", board, root, ans);
}
return ans;
}
}
HashMap trie:
- 优点
- 支持所有 Character
- 相对更省空间
- 缺点
- 查询时间相对长
- 尤其是有 wildcard 做 DFS 时
- 优点
Array trie:
- 优点
- 查询速度快,尤其是有 wildcard 做 DFS 时
- 缺点
- 每个 node 空间占用大
- 比较依赖指定字符集,比如 'a-z' 这种
- 优点
LeetCode OJ 事实说明,这题更适合用 array trie 去解,也算是一个根据输入信息选择合适implementation 的好例子,同样也可以推广到 union-find 的两种 implementation.
Trie Serialization
九章答案, 我得承认这个写法比我自己之前写的序列化二叉树更巧妙。
这题的解法和我之前自己写的那个 binary tree serialization 其实挺像的,结构都是
root【子树1】【子树2】【子树3】
考虑到 Trie 都有个 dummy root,所以是
【root【子树1】【子树2】【子树3】】
追求这题的 AC 稍微有点太细节了,先放着,我先看看别的题去吧。
Concatenated Words
简单的trie变形, 在搜索trie的时候, 每搜索完一个word的时候, 如果当前TrieNode是leaf, 同时wordcount>=1 (此时wordcount加上当前的word为2), 则将当前遍历过的字符串加入.
为trie Node存入了当前所代表的string
通过cur.isLeaf来判断是不对的, 有可能一个点是leaf但不word.length没有走到尽头
字符串可能存在" ", 在加字符串和搜索时都要判断length
8/44 caeses TLE, 改成array trie后10/44 依然TLE
class TrieNode{
HashMap<Character,TrieNode> map = new HashMap<Character, TrieNode>();
boolean isLeaf;
String str;
public TrieNode(){}
}
public class Solution {
public void addWord(String word) {
TrieNode cur = root;
for(int i=0;i<word.length();i++){
char c = word.charAt(i);
if(!cur.map.containsKey(c)) cur.map.put(c, new TrieNode());
cur = cur.map.get(c);
if(i==word.length()-1) {cur.isLeaf = true;cur.str = word;}
}
}
TrieNode root = new TrieNode();
public List<String> findAllConcatenatedWordsInADict(String[] words) {
List<String> ans = new ArrayList<String>();
for(String word: words) {
if(word.length()!=0)
addWord(word);
}
for(String word: words){
if(word.length()!=0)
dfs(words, word, 0, root, 0, ans);
}
return ans;
}
public void dfs(String[] words, String word, int start, TrieNode cur, int count, List<String> ans){
if(start==word.length()){
if(cur.isLeaf && count>0 && !ans.contains(cur.str)){
ans.add(cur.str);
}
for(String newword: words){
if(newword.length()!=0)
dfs(words, newword, 0, cur, count+1, ans);
}
return;
}
char c = word.charAt(start);
if(cur.map.containsKey(c))
dfs(words, word, start+1, cur.map.get(c), count, ans);
}
}
论坛上的代码, 思路一样, 稍有不同的是每次搜索新词的时候, 又从root节点开始搜索, 这样不会TLE.
public List<String> findAllConcatenatedWordsInADict(String[] words) {
List<String> res = new ArrayList<String>();
if (words == null || words.length == 0) {
return res;
}
TrieNode root = new TrieNode();
for (String word : words) { // construct Trie tree
if (word.length() == 0) {
continue;
}
addWord(word, root);
}
for (String word : words) { // test word is a concatenated word or not
if (word.length() == 0) {
continue;
}
if (testWord(word.toCharArray(), 0, root, 0)) {
res.add(word);
}
}
return res;
}
public boolean testWord(char[] chars, int index, TrieNode root, int count) { // count means how many words during the search path
TrieNode cur = root;
int n = chars.length;
for (int i = index; i < n; i++) {
if (cur.sons[chars[i] - 'a'] == null) {
return false;
}
if (cur.sons[chars[i] - 'a'].isEnd) {
if (i == n - 1) { // no next word, so test count to get result.
return count >= 1;
}
if (testWord(chars, i + 1, root, count + 1)) {
return true;
}
}
cur = cur.sons[chars[i] - 'a'];
}
return false;
}
public void addWord(String str, TrieNode root) {
char[] chars = str.toCharArray();
TrieNode cur = root;
for (char c : chars) {
if (cur.sons[c - 'a'] == null) {
cur.sons[c - 'a'] = new TrieNode();
}
cur = cur.sons[c - 'a'];
}
cur.isEnd = true;
}
}
class TrieNode {
TrieNode[] sons;
boolean isEnd;
public TrieNode() {
sons = new TrieNode[26];
}
这道题DP也能解, 和word break如出一辙. DP速度显然不如Trie快, Trie 102ms, DP 443ms
words先按长度小到大排序,排在后面长度长的词一定是被前面长度短的词构造。方便剪枝
canform函数判断每个string
dp[i] = dp[j]==true&&preSet.contains(str(j,i));
dp[i]已经找到,设为true,不用继续再找j,break
dp[j]==false, continue判断下一个j
注意剪枝, 一旦dp[i] = dp[j] &&dict.contains(word.substring(j, i)), dp[j]==false, 马上返回. dp[i]一旦是true, break当前循环.
public class Solution {
public static List<String> findAllConcatenatedWordsInADict(String[] words) {
List<String> result = new ArrayList<>();
Set<String> preWords = new HashSet<>();
Arrays.sort(words, new Comparator<String>() {
public int compare (String s1, String s2) {
return s1.length() - s2.length();
}
});
for (int i = 0; i < words.length; i++) {
if (canForm(words[i], preWords)) {
result.add(words[i]);
}
preWords.add(words[i]);
}
return result;
}
private static boolean canForm(String word, Set<String> dict) {
if (dict.isEmpty()) return false;
boolean[] dp = new boolean[word.length() + 1];
dp[0] = true;
for (int i = 1; i <= word.length(); i++) {
for (int j = 0; j < i; j++) {
if (!dp[j]) continue;
if (dict.contains(word.substring(j, i))) {
dp[i] = true;
break;
}
}
}
return dp[word.length()];
}
}
Design Search Autocomplete System
自己的代码,42/43后TLE了,还是HashMap慢的问题。注意'#'的处理,insert(string, 1)清空当前string.实现getLeaves function
public class AutocompleteSystem {
class TrieNode{
String s;
HashMap<Character, TrieNode> children;
int times = 0;
boolean isLeaf;
public TrieNode(){
children = new HashMap<Character, TrieNode>();
}
}
TrieNode root;
public void insert(String s, int t){
char[] arr = s.toCharArray();
TrieNode cur = root;
for(int i=0;i<arr.length;i++){
if(!cur.children.containsKey(arr[i]))
cur.children.put(arr[i], new TrieNode());
cur = cur.children.get(arr[i]);
if(i==arr.length-1){
cur.s = s;
cur.times += t;
}
}
}
public AutocompleteSystem(String[] sentences, int[] times) {
root = new TrieNode();
for(int i=0;i<sentences.length;i++){
insert(sentences[i], times[i]);
}
}
String curS = "";
public List<TrieNode> search(char[] curArr, int i, TrieNode cur){
List<TrieNode> ans = new ArrayList<TrieNode>();
if(cur==null)
return ans;
if(i==curArr.length){
ans=getLeaves(cur);
return ans;
}
if(cur.children.containsKey(curArr[i]));
ans = search(curArr, i+1, cur.children.get(curArr[i]));
return ans;
}
public List<TrieNode> getLeaves(TrieNode cur){
List<TrieNode> ans = new ArrayList<TrieNode>();
Queue<TrieNode> queue = new LinkedList<TrieNode>();
if(cur!=null) queue.add(cur);
while(!queue.isEmpty()){
TrieNode tmp = queue.poll();
if(tmp.times>0)
ans.add(tmp);
for(Character c : tmp.children.keySet()){
queue.add(tmp.children.get(c));
}
}
return ans;
}
public List<String> input(char c) {
List<String> ans = new ArrayList<String>();
if(c=='#'){
insert(curS,1);
curS="";
return ans;
}
curS += c;
char[] curArr = curS.toCharArray();
List<TrieNode> tmp = search(curArr, 0, root);
PriorityQueue<TrieNode> pq = new PriorityQueue<TrieNode>((a,b)->(a.times==b.times)?a.s.compareTo(b.s):b.times-a.times);
pq.addAll(tmp);
for(int i=0;i<3&&!pq.isEmpty();i++)
ans.add(pq.poll().s);
return ans;
}
}
/**
* Your AutocompleteSystem object will be instantiated and called as such:
* AutocompleteSystem obj = new AutocompleteSystem(sentences, times);
* List<String> param_1 = obj.input(c);
*/