本文涉及知识点
字典树(前缀树)
LeetCode745. 前缀和后缀搜索
设计一个包含一些单词的特殊词典,并能够通过前缀和后缀来检索单词。
实现 WordFilter 类:
WordFilter(string[] words) 使用词典中的单词 words 初始化对象。
f(string pref, string suff) 返回词典中具有前缀 pref 和后缀 suff 的单词的下标。如果存在不止一个满足要求的下标,返回其中 最大的下标 。如果不存在这样的单词,返回 -1 。
示例:
输入
[“WordFilter”, “f”]
[[[“apple”]], [“a”, “e”]]
输出
[null, 0]
解释
WordFilter wordFilter = new WordFilter([“apple”]);
wordFilter.f(“a”, “e”); // 返回 0 ,因为下标为 0 的单词:前缀 prefix = “a” 且 后缀 suffix = “e” 。
提示:
1 <= words.length <= 104
1 <= words[i].length <= 7
1 <= pref.length, suff.length <= 7
words[i]、pref 和 suff 仅由小写英文字母组成
最多对函数 f 执行 104 次调用
字典树
∀ \forall ∀ s ∈ \in ∈ words
∀ \forall ∀ s1是s的前缀。
字典树tree1 ,s1对应的索引为i1
trees[i1] 记录所有s2的逆向后缀。
trees的各节点,要记录最后一个子孙的下标。
字典树代码
template<class TData = char, int iTypeNum = 26, TData cBegin = 'a'>
class CTrieNode
{
public:
~CTrieNode()
{
for (auto& [tmp, ptr] : m_dataToChilds) {
delete ptr;
}
}
CTrieNode* AddChar(TData ele, int& iMaxID)
{
#ifdef _DEBUG
if ((ele < cBegin) || (ele >= cBegin + iTypeNum))
{
return nullptr;
}
#endif
const int index = ele - cBegin;
auto ptr = m_dataToChilds[ele - cBegin];
if (!ptr)
{
m_dataToChilds[index] = new CTrieNode();
#ifdef _DEBUG
m_dataToChilds[index]->m_iID = ++iMaxID;
m_childForDebug[ele] = m_dataToChilds[index];
#endif
}
return m_dataToChilds[index];
}
CTrieNode* GetChild(TData ele)
{
#ifdef _DEBUG
if ((ele < cBegin) || (ele >= cBegin + iTypeNum))
{
return nullptr;
}
#endif
return m_dataToChilds[ele - cBegin];
}
protected:
#ifdef _DEBUG
int m_iID = -1;
std::unordered_map<TData, CTrieNode*> m_childForDebug;
#endif
public:
int m_iLeafIndex = -1;
protected:
//CTrieNode* m_dataToChilds[iTypeNum] = { nullptr };//空间换时间 大约216字节
//unordered_map<int, CTrieNode*> m_dataToChilds;//时间换空间 大约56字节
map<int, CTrieNode*> m_dataToChilds;//时间换空间,空间略优于哈希映射,数量小于256时,时间也优。大约48字节
};
template<class TData = char, int iTypeNum = 26, TData cBegin = 'a'>
class CTrie
{
public:
int GetLeadCount()
{
return m_iLeafCount;
}
CTrieNode<TData, iTypeNum, cBegin>* AddA(CTrieNode<TData, iTypeNum, cBegin>* par,TData curValue)
{
auto curNode =par->AddChar(curValue, m_iMaxID);
FreshLeafIndex(curNode);
return curNode;
}
template<class IT>
int Add(IT begin, IT end)
{
auto pNode = &m_root;
for (; begin != end; ++begin)
{
pNode = pNode->AddChar(*begin, m_iMaxID);
}
FreshLeafIndex(pNode);
return pNode->m_iLeafIndex;
}
template<class IT>
CTrieNode<TData, iTypeNum, cBegin>* Search(IT begin, IT end)
{
auto ptr = &m_root;
for (; begin != end; ++begin)
{
ptr = ptr->GetChild(*begin);
if (nullptr == ptr)
{
return nullptr;
}
}
return ptr;
}
CTrieNode<TData, iTypeNum, cBegin> m_root;
protected:
void FreshLeafIndex(CTrieNode<TData, iTypeNum, cBegin>* pNode)
{
if (-1 == pNode->m_iLeafIndex)
{
pNode->m_iLeafIndex = m_iLeafCount++;
}
}
int m_iMaxID = 0;
int m_iLeafCount = 0;
};
namespace NTmp {
template<class TData = char, int iTypeNum = 26, TData cBegin = 'a'>
class CTrieNode
{
public:
~CTrieNode()
{
for (auto& [tmp, ptr] : m_dataToChilds) {
delete ptr;
}
}
CTrieNode* AddChar(TData ele,int iDataIndex ,int& iMaxID)
{
#ifdef _DEBUG
if ((ele < cBegin) || (ele >= cBegin + iTypeNum))
{
return nullptr;
}
#endif
const int index = ele - cBegin;
auto ptr = m_dataToChilds[ele - cBegin];
if (!ptr)
{
m_dataToChilds[index] = new CTrieNode();
#ifdef _DEBUG
m_dataToChilds[index]->m_iID = ++iMaxID;
m_childForDebug[ele] = m_dataToChilds[index];
#endif
}
m_dataToChilds[index]->m_iChildChildIndex = iDataIndex;
return m_dataToChilds[index];
}
CTrieNode* GetChild(TData ele)
{
#ifdef _DEBUG
if ((ele < cBegin) || (ele >= cBegin + iTypeNum))
{
return nullptr;
}
#endif
return m_dataToChilds[ele - cBegin];
}
protected:
#ifdef _DEBUG
int m_iID = -1;
std::unordered_map<TData, CTrieNode*> m_childForDebug;
#endif
public:
int m_iChildChildIndex = -1;
int m_iLeafIndex = -1;
protected:
map<int, CTrieNode*> m_dataToChilds;//时间换空间,空间略优于哈希映射,数量小于256时,时间也优。大约48字节
};
template<class TData = char, int iTypeNum = 26, TData cBegin = 'a'>
class CTrie
{
public:
int GetLeadCount()
{
return m_iLeafCount;
}
CTrieNode<TData, iTypeNum, cBegin>* AddA(CTrieNode<TData, iTypeNum, cBegin>* par, TData curValue)
{
auto curNode = par->AddChar(curValue, m_iMaxID);
FreshLeafIndex(curNode);
return curNode;
}
template<class IT>
int Add(IT begin, IT end,int index)
{
auto pNode = &m_root;
for (; begin != end; ++begin)
{
pNode = pNode->AddChar(*begin,index, m_iMaxID);
}
FreshLeafIndex(pNode);
return pNode->m_iLeafIndex;
}
template<class IT>
CTrieNode<TData, iTypeNum, cBegin>* Search(IT begin, IT end)
{
auto ptr = &m_root;
for (; begin != end; ++begin)
{
ptr = ptr->GetChild(*begin);
if (nullptr == ptr)
{
return nullptr;
}
}
return ptr;
}
CTrieNode<TData, iTypeNum, cBegin> m_root;
protected:
void FreshLeafIndex(CTrieNode<TData, iTypeNum, cBegin>* pNode)
{
if (-1 == pNode->m_iLeafIndex)
{
pNode->m_iLeafIndex = m_iLeafCount++;
}
}
int m_iMaxID = 0;
int m_iLeafCount = 0;
};
}
class WordFilter {
public:
WordFilter(vector<string>& words) {
m_trees.resize(words.size() * 7);
for (int i = 0; i < words.size();i++) {
const auto& s = words[i];
auto ptr = &m_tree.m_root;
for (const auto& ch : s) {
ptr = m_tree.AddA(ptr, ch);
m_trees[ptr->m_iLeafIndex].Add(s.rbegin(), s.rend(), i);
}
}
}
int f(string pref, string suff) {
auto ptr = m_tree.Search(pref.begin(), pref.end());
if ((nullptr == ptr) ||(-1 == ptr->m_iLeafIndex)) { return -1; };
auto ptr2 = m_trees[ptr->m_iLeafIndex].Search(suff.rbegin(), suff.rend());
if (nullptr == ptr2) { return -1; }
return ptr2->m_iChildChildIndex;
}
CTrie<> m_tree;
vector<NTmp::CTrie<>> m_trees;
};
测试用例
template<class T>
void Assert(const T& t1, const T& t2)
{
assert(t1 == t2);
}
template<class T>
void Assert(const vector<T>& v1, const vector<T>& v2)
{
if (v1.size() != v2.size())
{
assert(false);
return;
}
for (int i = 0; i < v1.size(); i++)
{
Assert(v1[i], v2[i]);
}
}
int main()
{
vector<string> words;
{
vector<string> words = { "apple" };
WordFilter wf(words);
auto res = wf.f("a", "e");
Assert(res, 0);
res = wf.f("ap", "ple");
Assert(res, 0);
res = wf.f("app", "ple");
Assert(res, 0);
res = wf.f("d", "ple");
Assert(res, -1);
res = wf.f("app", "x");
Assert(res, -1);
res = wf.f("y", "x");
Assert(res, -1);
}
}
2023年7月
class WordFilter {
public:
WordFilter(vector& words) {
for (int i = 0 ; i < words.size();i++ )
{
const string& s = words[i];
m_strIndexs[s] = i;
const int len = s.length();
for (int begin = 1; begin <= len-2; begin++)
{
for (int end = begin; end <= len - 2; end++)
{
m_strIndexs[s.substr(0,begin)+ string(end-begin+1,‘') + s.substr(end+1)] = i;
}
}
}
}
int f(string pref, string suff) {
std::set indexs;
for (int len = 0; len + pref.length() + suff.length() <= 7; len++)
{
string strFind = pref + string(len, '’) + suff;
if (m_strIndexs.count(strFind))
{
indexs.emplace(m_strIndexs[strFind]);
}
}
for (int lenSame = 1; lenSame <= min(pref.length(), suff.length()); lenSame++)
{
if (pref.substr(pref.length() - lenSame) != suff.substr(0,lenSame))
{
continue;
}
string strFind = pref + suff.substr(lenSame);
if (m_strIndexs.count(strFind))
{
indexs.emplace(m_strIndexs[strFind]);
}
}
return indexs.empty() ? -1 : *indexs.rbegin();
}
std::unordered_map<string, int> m_strIndexs;
};