正则表达式转NFA

正则表达式的基本运算

正则表达式有三种基本的运算:

  1. 连接(Concatenation), 例如 abc, 由a, b, c组成
  2. 联合(Union), 例如 a|b|c, 表示a或者b或者c
  3. Kleene闭包(Kleene *), 例如 (ab)*, 表示ab串不出现,或者出现1次或一次以上

其它的运算如+, {}等都可以用以上三种基本运算或者运算的组合来表示。

正则表达式转换成NFA

1)(a)R=AB     (b)R=A|B     (c)R=A*

r=s|t

r=st

r=s*

2)例题3.26:为正则表达式r=(a|b)*abb构造一个NFA

①对于(a|b)中的a构造NFA

②对于(a|b)中的b构造NFA

③而(a|b)构造NFA,将上面两个图合并

④(a|b)*构造NFA

⑤对于abb中的a构造NFA,对于abb中的两个b构造NFA同④类似

⑥将④⑤合并之后

⑦最终的NFA是

 

        NFA是捏着正则式去比文本,吃掉一个字符,就把它跟正则式比较,匹配就记下来:“某年某月某日在某处匹配上了!”,然后接着往下干。一旦不匹配,就把刚吃的这个字符吐出来,一个个的吐,直到回到上一次匹配的地方。

正则表达式转换成NFA代码

  1.正则表达式后缀化《逆波兰表达式&后缀表达式》,并将其中操作符进行修改


    public static enum Operator {
        DIVIDE('|', 1),
        ADD('&', 2),MULTIPLY('*', 2),
        LEFT_BRACKET('(', 3), RIGHT_BRACKET(')', 3);

        char value;
        int priority;
       
        //...
    }

 2.构造NFA源码

public class NfaAlgorithm {



    private static String prepareString(String _regex) {
        String regex = "";
        char[] regexs = _regex.replaceAll(" ", "").toCharArray();
        for (int i = 0; i < regexs.length; i++) {
            if (i == 0)
                regex += regexs[i];
            else {
                if (regexs[i] == '|' || regexs[i] == '*' || regexs[i] == ')') {
                    regex += regexs[i];
                } else {
                    if (regexs[i - 1] == '(' || regexs[i - 1] == '|')
                        regex += regexs[i];
                    else
                        regex += ("&" + regexs[i]);
                }
            }
        }
        return regex;
    }

    private static String E = "epsllon";

    public static  Graph transformNFA(String suffixRegex){
        char[] rs = suffixRegex.toCharArray();
        Stack<Object> operandStack = new Stack();
        for (int i =0; i<rs.length; i++){
            char o = rs[i];
            if(!Operator.isOperator(o)){
                operandStack.push(String.valueOf(o));
            }else{

                Operator operator = Operator.findOperator(o);
                Graph graph = null;
                switch (operator){
                  case ADD://'&'
                      Object av1 = operandStack.pop();
                      Object av2 = operandStack.pop();
                      graph = Graph.parse(av2);
                      graph.add(av1);
                      break;
                  case MULTIPLY://'*'
                      Object mv = operandStack.pop();
                      graph = Graph.parse(mv);
                      graph.multiply();
                      break;
                  case DIVIDE://'|'
                      Object dv1 = operandStack.pop();
                      Object dv2 = operandStack.pop();
                      graph = Graph.parse(dv2);
                      graph.divide(dv1);
                      break;
                }
                operandStack.push(graph);
            }
        }
        return Graph.parse(operandStack.pop());
    }

    public static void main(String[] args) {
        String exp = prepareString("(A*B|AC)D");
        //String exp = prepareString("AB");

        System.out.println(exp);
        String regex =  SuffixAlgorithm.rpn(exp);
        System.out.println(regex);
        Graph graph = transformNFA(regex);
        System.out.println(graph);
    }

    static class Graph {
        private List<Edge> edges;
        private Node start;
        private Node end;

        private Graph(String label){
            this.start = new Node();
            this.end = new Node();
            this.edges = new ArrayList<>();
            Edge edge = new Edge(start,end,label);
            this.edges.add(edge);

        }

        public static Graph  parse(Object obj){
            if(obj instanceof  Graph){
                return (Graph) obj;
            }
            if(obj instanceof  String)
                return new Graph((String)obj);

            throw new IllegalArgumentException("not support class["+obj.getClass()+"]");
        }

        public void add(Object obj){
            if(obj instanceof  String){
                add((String)obj);
                return;
            }
            if(obj instanceof  Graph) {
                add((Graph) obj);
                return;
            }
            throw new IllegalArgumentException("not support class["+obj.getClass()+"]");
        }

        public void divide(Object obj){
            if(obj instanceof  String){
                divide(parse(obj));
                return;
            }
            if(obj instanceof  Graph) {
                divide((Graph) obj);
                return;
            }
            throw new IllegalArgumentException("not support class["+obj.getClass()+"]");
        }

        public void multiply(){
            Node nodeStart = this.start;
            Node nodeEnd = this.end;
            this.start = new Node();
            this.end = new Node();
            Edge edge1 = new Edge(nodeEnd,nodeStart,E);
            Edge edge2 = new Edge(this.start,nodeStart,E);
            Edge edge3 = new Edge(nodeEnd,this.end,E);
            Edge edge4 = new Edge(this.start,this.end,E);
            this.edges.add(edge1);
            this.edges.add(edge2);
            this.edges.add(edge3);
            this.edges.add(edge4);
        }

        private void divide(Graph graph){
            Node nodeStart = this.start;
            Node nodeEnd = this.end;
            this.start = new Node();
            this.end = new Node();
            Edge edge1 = new Edge(this.start,graph.start,E);
            Edge edge2 = new Edge(this.start,nodeStart,E);
            Edge edge3 = new Edge(nodeEnd,this.end,E);
            Edge edge4 = new Edge(graph.end,this.end,E);
            this.edges.add(edge1);
            this.edges.add(edge2);
            this.edges.add(edge3);
            this.edges.add(edge4);
            this.edges.addAll(graph.edges);
        }

        private void add(String label){
            Node mid = end;
            this.end = new Node();
            Edge edge = new Edge(mid,end,label);
            this.edges.add(edge);
        }

        private void add(Graph graph){
            Node node = this.end;
            this.end = graph.end;
            edges.stream().filter(edge -> edge.end == node)
                    .forEach(edge -> {
                        edge.end = graph.start;
                     });
            this.edges.addAll(graph.edges);
        }

        @Override
        public String toString() {
            String printString = "Start=" + this.start + "  End=" + this.end + "\n";
            for (int i = 0; i < edges.size(); i++) {
                printString += edges.get(i) + "\n";
            }
            return printString;
        }
    }

    //边
    static class Edge {
        private Node begin;
        private Node end;
        private String label;

        public Edge(Node begin, Node end, String label) {
            this.begin = begin;
            this.end = end;
            this.label = label;
        }

        @Override
        public String toString() {
            return "Edge [begin=" + begin + ", end=" + end + ", label=" + label+ "]";
        }
    }

    //节点
    static class Node {
        private int id;
        private static int ID=0;

        public Node(){
            this.id=ID++;
        }

        public int getId() {
            return id;
        }

        public static void reset(){
            ID=0;
        }

        @Override
        public String toString() {
            return id+"";
        }

    }
}

 

参考文章:

正则表达式转换成NFA


版权声明:本文为y3over原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。