Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

后缀树 #15

Open
RubyLouvre opened this issue Jul 31, 2019 · 4 comments
Open

后缀树 #15

RubyLouvre opened this issue Jul 31, 2019 · 4 comments

Comments

@RubyLouvre
Copy link
Owner

RubyLouvre commented Jul 31, 2019

 var count = 0
    function STrie(suffix){
       this.children = {}
       this.suffix =  suffix
       this.count = count++
    }
    class SuffixTire{
      constructor(str){
        var t = null;
        for(var i  = 0; i < str.length; i++){
          t = this.insert(t, str[i])
        }
        return this.getRoot(t)
      }
      insert(top, c){
        if(top == null){
          top = new STrie()
        }
        var node = top
        var newnode = new STrie()
        while( node && !(c in node.children)){
          newnode.suffix =  node.children[c] = new STrie(node)
          newnode = node.children[c]
          node = node.suffix
        }
        if(node){
          newnode.suffix =  node.children[c] 
        }
        return top.children[c]
      }
      getRoot(node){
        while(node.suffix){
          node = node.suffix
        }
        return node
      }
    }
    var a = new SuffixTire('cacao')
     console.log(a)
@RubyLouvre
Copy link
Owner Author

//由后向前构建
         class SuffixNode {
                constructor(s) {
                    this.children = [];
                    this.value = s;
                }
            }

            class SuffixTree {
                constructor() {
                    this.root = new SuffixNode("");
                }
                insert(word, symbol) {
                    var nodes = this.root.children;
                    while (word) {
                        this.addNodeByPrefix(nodes, word);
                        word = word.slice(1);
                    }
                    this.addEndSymbol(nodes, symbol || '')
                }
                addEndSymbol(nodes, symbol){
                  for (var i = 0; i < nodes.length; i++) {
                    var node = nodes[i];
                    if(node.isEnd){
                      node.value += symbol
                    }else if(node.children.length){
                      this.addEndSymbol(node.children, symbol)
                    }
                  }
                }
                addNodeByPrefix(nodes, word) {
                    var add = true;
                    for (var i = 0; i < nodes.length; i++) {
                        var node = nodes[i];
                        if (node.value[0] == word[0] ) {
                            //如果存在共同前缀
                            add = false;
                            var prefix = "";
                            for (var j = 0; j < word.length; j++) {
                                if (node.value[j] != word[j]) {
                                    break;
                                }
                            }
                            prefix = word.slice(0, j);
                            var valueLen = node.value.length  
                            if (prefix === word) {
                                //情况一
                                //word完全是node.value的一个前缀,如ana与anana
                                var cur = new SuffixNode(prefix );
                                console.log("创建节点的value", prefix);
                                console.log(
                                    "修改节点的value",
                                    node.value,
                                    "为",
                                    node.value.slice(j)
                                );
                                nodes[i] = cur; //反客为主
                                node.value = node.value.slice(j);
                                cur.children.push(node);
                             } else if (j === valueLen && word.length > valueLen) {
                                //情况三
                                //node.value比word短,全部相等,  如ab与abcd
                                console.log('修改节点的value', node.value, '为', prefix )
                                node.value = word.slice(0, j) 
                                this.addNodeByPrefix(node.children, word.slice(j))
                            } else {
                                //情况二
                                //node.value比word长, 局部相等, 如abcabxabcd与abxabcd
                                var cur = new SuffixNode(prefix );
                                nodes[i] = cur; //反客为主
                                console.log("创建节点的value", prefix );
                                console.log(
                                    "修改节点的value",
                                    node.value,
                                    "为",
                                    node.value.slice(j)
                                );
                                node.value = node.value.slice(j);
                                cur.children.push(node);
                                console.log(
                                    "添加另一个子节点",
                                    word.slice(j) 
                                );
                                cur.children.push(
                                    new SuffixNode(word.slice(j) )
                                );
                            }
                        }
                    }
                    if (add) {
                        console.log("没有前缀,添加新节点", word);
                        var node = new SuffixNode(word)
                        node.isEnd = true
                        nodes.push(node);
                    }
                }
            }
            var t = new SuffixTree()
            t.insert("abcabxabcd", '#');
            console.log(t)

@RubyLouvre
Copy link
Owner Author

RubyLouvre commented Aug 1, 2019

 class Node{
         constructor(node){
             this.suffix = node
             this.children = {}
         }
     }
     class STree{
     
         insert(str){
            this.str = str
            this.len = str.length +1000
            this.root = new Node(null)
            var node = t.root, n = 0;
           for(var i = 0; i < str.length; i++){
            [node, n] = this.update(node, n, i);
            [node, n] = this.canonize(node, n, i) 
           }
           
         }
         update(node, left, right){
             var c = this.str[right] //当前字符
             var prev = new Node;
             while(true){
                 var [finish, p] = this.branch(node, left, right-1, c);
                 if(finish){
                     break
                 }
                 p.children[c] =  [right, t.len, new Node]
                 prev.suffix = p;
                 prev = p;
                 [node, left] = this.canonize(node.suffix, left, right-1 );
             }
             prev.suffix = node;
             return [node, left]
          }
          branch(node, left, right, ch){
              if(right-left+1 <= 0){
                  if(!node){
                      return [true, this.root]
                  }else {
                      return [ch in node.children, node]
                  }
              }else{
                 // console.log(this.str, left, 'xxx')
                  var [ll, rr, child] = node.children[this.str[left]];
                  var pos = ll + right - left + 1;
                  if(this.str[pos] == ch){
                      return [true, node]
                  }else{
                      var branchNode = new Node;
                      node.children[this.str[ll]] = [ll, pos -1, branchNode];
                      branchNode.children[ this.str[pos] ] = [pos, rr, child];
                      return [false, branchNode]
                  }
              }
          }
          canonize(node, left, right){
           
            if(!node){
                if(right-left+1 <= 0){
                    return [null, left]
                }else{
                   // console.log(this.root, left+1, right, 'end')
                    return this.canonize(this.root, left+1, right)
                }
            }
            while(left <= right){
               // console.log(this.str, left, 'xxx')
                var [ll, rr, child] = node.children[this.str[left]]
                if(right-left >=  rr - ll){
                    left += rr - ll+1;
                    node = child
                }else{
                    break
                }
            }
            return [node, left]
          }
          findPatten(s){
              var node = this.root;
              while(1){
                 var match = false;
                 for(var i in node.children){
                     var [ll, rr, child] =  node.children[i];
                     var edge = this.str.slice(ll, rr+1);
                     console.log(edge, child)
                     /*
                     if(edge.indexOf(s) === 0){
                     return Math.max( Object.keys(child.children), 1  )
                     }else if(s.indexOf(edge) === 0){
                         match = true
                         node = child;
                         s = s.slice(edge.length)
                     }*/
                 }
                 if(!match){
                     return 0
                 }
              }
              return 0
          }
     }
     var t = new STree;
     t.insert('javaxjava')
     console.log(t)
     console.log(t.findPatten('java'))

@RubyLouvre
Copy link
Owner Author

解决复杂的分叉

 var INACTIVE = 'INACTIVE'
      var ACTIVE = 'ACTIVE'
      var FORK = 'FORK'
      class SuffixNode {
        constructor(value) {
          this.value = value
          this.splitLength = 0;
          this.status = INACTIVE;
          this.children = []
        }
      }
      class SuffixTree {
        constructor() {
          this.root = new SuffixNode('');
        }
        insert(word, sign) {
          sign = sign || '#'
          var nodes = this.root.children;
          word += sign;
          for (var i = 0; i < word.length; i++) {
            console.log("即将插入", word[i], '==============')
            this.addNodeByChar(nodes, word[i], 0)
          }
        }

        addNodeByChar(nodes, ch, level) {
          var add = true;
          if (level > 0) {
            add = false
          }
          for (var i = 0; i < nodes.length; i++) {
            var node = nodes[i]
            var isMatch = node.value[node.splitLength] === ch
            //如果没有分叉,可以修改value

            if (node.status !== FORK) {
              console.log(node.value, '后跟新字符', node.value + ch, node.status)
              node.value += ch;

            }
            if (!isMatch) {
              if (node.status == FORK) {
                if (node.splitLength == node.value) {
                  node.splitLength = 0
                } else {
                  if (node.splitLength) {//第二次分叉
                    var value = node.value;
                    var parent = new SuffixNode(value.slice(0, node.splitLength))//变成父节点
                    nodes[i] = parent;
                    parent.status = FORK
                    parent.children.push(node, new SuffixNode(''))

                    node.value = value.slice(node.splitLength)

                    node.splitLength = 0;
                    node = parent;
                  }
                }
              }

              if (node.status == ACTIVE) {//第一次分叉
                var value = node.value;
                var parent = new SuffixNode(value.slice(0, node.splitLength))//变成父节点
                nodes[i] = parent;
                parent.status = FORK
                parent.children.push(node, new SuffixNode(''))

                node.value = value.slice(node.splitLength, -1)
                node.status = INACTIVE;

                node.splitLength = 0;
                node = parent;
              }
            } else { //命中了,就不用在根节点中添加新节点
              add = false;
              node.splitLength++
              if (node.status === INACTIVE) {
                node.status = ACTIVE; //准备分叉
              }
            }
            //如果有孩子,可以继续传递ch
            if (node.children.length) {
              this.addNodeByChar(node.children, ch, level + 1)
            }
          }
          if (add) {
            if (ch == '#') {
              if (level > 0) {
                return
              }
            }
            console.log(`第${level}层添加新节点 ${ch}`)
            nodes.push(new SuffixNode(ch))
          }
        }
        toString() {
          return getChildren(this.root.children)
        }
      }

      function getChildren(childfren) {
        var array = []
        for (var i = 0; i < children; i++) {
          array.push(node.value + getChildren(node.childfren))
        }
        if (array.length) {
          return '[' + array.join('\n') + ']'
        }
        return ''

      }
      var tree = new SuffixTree();
      //tree.insert("banana");
      tree.insert("abcabxa");
      console.log(tree)

@RubyLouvre
Copy link
Owner Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant