1 //===- llvm/ADT/Trie.h ---- Generic trie structure --------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Anton Korobeynikov and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This class defines a generic trie structure. The trie structure
11 // is immutable after creation, but the payload contained within it is not.
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_ADT_TRIE_H
16 #define LLVM_ADT_TRIE_H
24 // - Labels are usually small, maybe it's better to use SmallString
25 // - Should we use char* during construction?
26 // - Should we templatize Empty with traits-like interface?
27 // - GraphTraits interface
29 template<class Payload>
41 typedef std::vector<Node*> NodeVector;
42 typedef typename std::vector<Node*>::iterator NodeVectorIter;
45 bool operator() (Node* N1, Node* N2) {
46 return (N1->Label[0] < N2->Label[0]);
48 bool operator() (Node* N, char Id) {
49 return (N->Label[0] < Id);
57 inline explicit Node(const Payload& data, const std::string& label = ""):
58 Label(label), Data(data) { }
60 inline Node(const Node& n) {
62 Children = n.Children;
65 inline Node& operator=(const Node& n) {
68 Children = n.Children;
75 inline bool isLeaf() const { return Children.empty(); }
77 inline const Payload& getData() const { return Data; }
78 inline void setData(const Payload& data) { Data = data; }
80 inline void setLabel(const std::string& label) { Label = label; }
81 inline const std::string& getLabel() const { return Label; }
85 std::cerr << "Node: " << this << "\n"
86 << "Label: " << Label << "\n"
89 for (NodeVectorIter I = Children.begin(), E = Children.end(); I != E; ++I)
90 std::cerr << (*I)->Label << "\n";
94 inline void addEdge(Node* N) {
96 Children.push_back(N);
98 NodeVectorIter I = std::lower_bound(Children.begin(), Children.end(),
100 // FIXME: no dups are allowed
101 Children.insert(I, N);
105 inline Node* getEdge(char Id) {
107 NodeVectorIter I = std::lower_bound(Children.begin(), Children.end(),
109 if (I != Children.end() && (*I)->Label[0] == Id)
115 inline void setEdge(Node* N) {
116 char Id = N->Label[0];
117 NodeVectorIter I = std::lower_bound(Children.begin(), Children.end(),
119 assert(I != Children.end() && "Node does not exists!");
123 QueryResult query(const std::string& s) const {
125 unsigned l1 = s.length();
126 unsigned l2 = Label.length();
128 // Find the length of common part
129 l = std::min(l1, l2);
131 while ((i < l) && (s[i] == Label[i]))
134 if (i == l) { // One is prefix of another, find who is who
138 return StringIsPrefix;
140 return LabelIsPrefix;
141 } else // s and Label have common (possible empty) part, return its length
142 return (QueryResult)i;
146 std::vector<Node*> Nodes;
149 inline Node* getRoot() const { return Nodes[0]; }
151 inline Node* addNode(const Payload& data, const std::string label = "") {
152 Node* N = new Node(data, label);
157 inline Node* splitEdge(Node* N, char Id, size_t index) {
158 Node* eNode = N->getEdge(Id);
159 assert(eNode && "Node doesn't exist");
161 const std::string &l = eNode->Label;
162 assert(index > 0 && index < l.length() && "Trying to split too far!");
163 std::string l1 = l.substr(0, index);
164 std::string l2 = l.substr(index);
166 Node* nNode = addNode(Empty, l1);
170 nNode->addEdge(eNode);
176 inline explicit Trie(const Payload& empty):Empty(empty) {
180 for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
184 bool addString(const std::string& s, const Payload& data) {
185 Node* cNode = getRoot();
189 while (tNode == NULL) {
191 if (Node* nNode = cNode->getEdge(Id)) {
192 typename Node::QueryResult r = nNode->query(s1);
196 case Node::StringIsPrefix:
197 // Currently we don't allow to have two strings in the trie one
198 // being a prefix of another. This should be fixed.
199 assert(0 && "FIXME!");
201 case Node::DontMatch:
202 assert(0 && "Impossible!");
204 case Node::LabelIsPrefix:
205 s1 = s1.substr(nNode->getLabel().length());
209 nNode = splitEdge(cNode, Id, r);
210 tNode = addNode(data, s1.substr(r));
211 nNode->addEdge(tNode);
214 tNode = addNode(data, s1);
215 cNode->addEdge(tNode);
222 const Payload& lookup(const std::string& s) const {
223 Node* cNode = getRoot();
227 while (tNode == NULL) {
229 if (Node* nNode = cNode->getEdge(Id)) {
230 typename Node::QueryResult r = nNode->query(s1);
236 case Node::StringIsPrefix:
238 case Node::DontMatch:
239 assert(0 && "Impossible!");
241 case Node::LabelIsPrefix:
242 s1 = s1.substr(nNode->getLabel().length());
252 return tNode->getData();
259 #endif // LLVM_ADT_TRIE_H