teditor  1.8.0@@fee5e94
Terminal based editor written in C++
nfa.h
Go to the documentation of this file.
1 #pragma once
2 
3 #include <stack>
4 #include <string>
5 #include <vector>
6 #include <unordered_set>
7 #include <core/double_buffer.hpp>
8 #include <core/pos2d.h>
9 
10 namespace teditor {
11 namespace parser {
12 
14 
15 
23 struct NFA {
28  NFA(const std::string& reg);
29 
37  size_t find(const std::string& str, size_t start = 0, size_t end = 0);
38 
48  size_t findAny(const std::string& str, size_t& matchStartPos,
49  size_t start = 0, size_t end = 0);
50 
51  ~NFA() { for (auto itr : states) delete itr; }
52 
64  bool isMatch(bool lastStateRemaining = false) const;
71  bool areActiveStatesEmpty() const { return acs.current().empty(); }
77  const Point& getMatchPos() const { return matchState->matchPos; }
84  bool step(char c, const Point& pos);
89  void reset();
93  static const size_t NoMatch;
94 
95 private:
97  enum Specials {
98  Split = 256, // splitter NFA state
99  Match, // terminal state
100  Digit, // \d
101  WhiteSpace, // \s
102  NonWhiteSpace, // \S
103  Any, // .
104  AnyFromList, // [...], [A-Z], [a-z], [0-9]
105  NoneFromList, // [^...]
106  Bracket, // Temporary state for recognizing groups
107  Alternation, // Temporary state for recgonizing alternations
108  }; // enum Specials
109 
111  struct State {
112  int c;
113  std::string s; // for matching with a set of possible chars (aka [...])
114  State* next;
115  State* other; // used only with Split state
116  Point matchPos; // used only for matches
117  State() : c(0), s(), next(nullptr), other(nullptr) {}
118  State(int _c): c(_c), s(), next(nullptr), other(nullptr) {}
119  bool isMatch(char in) const;
120  }; // struct State
121 
122  typedef std::unordered_set<State*> Actives;
123 
124  std::string regex;
125  std::vector<State*> states; // all states for this NFA
126  State *startState;
127  State *matchState;
128  // during matching these are the "active" states
129  // this does NOT own the underlying pointers
130  // uses double-buffering to avoid corruption
131  DoubleBuffer<Actives> acs;
132 
133  void stepThroughSplitStates();
134  void checkForSplitState(State* st, const Point& pos, Actives& ac);
135 
136  // used only while compiling the regex's
137  // this does NOT own any of the underlying pointers
138  struct Fragment {
139  State* entry;
140  std::vector<State*> tails;
141  Fragment(): entry(nullptr), tails() {}
142  Fragment(State* e);
143  void addState(State* s);
144  void appendState(State* s) { tails.push_back(s); }
145  }; // struct Fragment
146 
147  // stack of fragments used during the regex compilation
148  std::stack<Fragment> fragments;
149 
150  // used to store intermediate regex compiler state
151  struct CompilerState {
152  bool prevBackSlash;
153  bool prevSqBracketOpen;
154  bool isUnderRange;
155  bool isUnderSqBracket;
156  CompilerState();
157  void validate(const std::string& reg);
158  }; // struct CompilerState
159 
160  void parseChar(char c, CompilerState& cState);
161  void parseGeneral(char c, CompilerState& cState);
162  void parseInsideSqBracket(char c, CompilerState& cState);
163  State* createState(int c);
164  void addNewStateFor(int c);
165  void stitchFragments();
166 }; // struct NFA
167 
168 } // namespace parser
169 } // namespace teditor
teditor::DoubleBuffer::current
Type & current()
Definition: double_buffer.hpp:12
teditor::parser::NFA::reset
void reset()
Reset all the variables used during regex search. This needs to be called once before beginning of ev...
Definition: nfa.cpp:90
nfa.h
teditor::Any
struct Impl ///
Definition: any.hpp:54
teditor::parser::NFA::~NFA
~NFA()
Definition: nfa.h:51
teditor::parser::NFA
Ken-Thompson NFA as described here: https://swtch.com/~rsc/regexp/regexp1.html but adjusted to work w...
Definition: nfa.h:23
double_buffer.hpp
teditor::parser::NFA::step
bool step(char c, const Point &pos)
Step through the NFA state using the current char.
Definition: nfa.cpp:97
utils.h
teditor::parser::NFA::isMatch
bool isMatch(bool lastStateRemaining=false) const
checks if we have reached match state, indicating a regex match
Definition: nfa.cpp:123
teditor::parser::NFA::NoMatch
static const size_t NoMatch
Definition: nfa.h:93
teditor::parser::NFA::findAny
size_t findAny(const std::string &str, size_t &matchStartPos, size_t start=0, size_t end=0)
Tries for regex match starting from anywhere in the string.
Definition: nfa.cpp:76
ASSERT
#define ASSERT(check, fmt,...)
Macro to assert with runtime_error exception if the check fails.
Definition: utils.h:35
teditor::parser::NFA::getMatchPos
const Point & getMatchPos() const
After the search has finished, use this to know the latest position of the match of the regex in the ...
Definition: nfa.h:77
teditor::DoubleBuffer::next
Type & next()
Definition: double_buffer.hpp:14
teditor::Pos2d< int >
teditor::parser::NFA::areActiveStatesEmpty
bool areActiveStatesEmpty() const
Tells if after the current step() whether there are any active states still remaining....
Definition: nfa.h:71
teditor::parser::NFA::NFA
NFA(const std::string &reg)
ctor with adding a regex for the NFA
Definition: nfa.cpp:48
teditor::DoubleBuffer::update
void update()
Definition: double_buffer.hpp:16
pos2d.h
teditor::parser::NFA::find
size_t find(const std::string &str, size_t start=0, size_t end=0)
String match function.
Definition: nfa.cpp:64
teditor::Point
Pos2di Point
Definition: pos2d.h:112
teditor::Pos2d::x
T x
Definition: pos2d.h:16
teditor
Definition: any.hpp:10