123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396 |
- <?php
- namespace AbelZhou\Tree;
- class TrieTree {
- protected $nodeTree = [];
-
- public function __construct() {
- }
-
- public function delete($index_str, $deltree = false, $is_py = false, $chinese = "") {
- $str = trim($index_str);
- $chinese = trim($chinese);
- if ($is_py && empty($chinese)) {
- return false;
- }
- $delstr_arr = $this->convertStrToH($str);
- $len = count($delstr_arr);
-
- $childTree = &$this->nodeTree;
- $del_index = array();
-
- for ($i = 0; $i < $len; $i++) {
- $code = $delstr_arr[$i];
-
- if (isset($childTree[$code])) {
-
- $del_index[$i] = [
- 'code' => $code,
- 'index' => &$childTree[$code]
- ];
-
- if ($i == ($len - 1) && !$childTree[$code]['end']) {
- return false;
- }
- $childTree = &$childTree[$code]['child'];
- } else {
-
- return false;
- }
- }
- $idx = $len - 1;
-
- if ($deltree) {
-
- $del_index[$idx]['index']['child'] = array();
- }
-
- if ($idx == 0) {
-
- if ($is_py) {
-
- if (isset($this->nodeTree[$del_index[$idx]['code']]['chinese_list'])) {
- $is_del = false;
- foreach ($this->nodeTree[$del_index[$idx]['code']]['chinese_list'] as $key=>$node) {
- if ($node['word'] == $chinese){
- unset($this->nodeTree[$del_index[$idx]['code']]['chinese_list'][$key]);
- $is_del = true;
- break;
- }
- }
- if($is_del && 0 != count($this->nodeTree[$del_index[$idx]['code']]['chinese_list'])){
- return true;
- }
- if(!$is_del){
- return false;
- }
-
- }
- }else{
- if (count($del_index[$idx]['index']['child']) == 0) {
- unset($this->nodeTree[$del_index[$idx]['code']]);
- return true;
- }
- }
- }
-
- if (count($del_index[$idx]['index']['child']) > 0) {
- $del_index[$idx]['index']['end'] = false;
- $del_index[$idx]['index']['data'] = array();
- unset($del_index[$idx]['index']['full']);
- return true;
- }
-
-
- for (; $idx >= 0; $idx--) {
-
- if (count($del_index[$idx]['index']['child']) > 0) {
-
- if ($del_index[$idx]['index']['end'] == true || $del_index[$idx]['index']['child'] > 1) {
-
- $child_code = $del_index[$idx + 1]['code'];
- unset($del_index[$idx]['index']['child'][$child_code]);
- return true;
- }
- }
- }
- return false;
- }
-
- public function append($index_str, $data = array(), $is_py = false, $chinese = '') {
- $str = trim($index_str);
- $chinese = trim($chinese);
- if ($is_py && empty($chinese)) {
- return false;
- }
- $childTree = &$this->nodeTree;
- $len = strlen($str);
- for ($i = 0; $i < $len; $i++) {
- $ascii_code = ord($str[$i]);
- $code = NULL;
- $word = NULL;
- $is_end = false;
- if (($ascii_code >> 7) == 0) {
- $code = dechex(ord($str[$i]));
- $word = $str[$i];
- } else if (($ascii_code >> 4) == 15) {
- if ($i < $len - 3) {
- $code = dechex(ord($str[$i])) . dechex(ord($str[$i + 1])) . dechex(ord($str[$i + 2])) . dechex(ord($str[$i + 3]));
- $word = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
- $i += 3;
- }
- } else if (($ascii_code >> 5) == 7) {
- if ($i < $len - 2) {
- $code = dechex(ord($str[$i])) . dechex(ord($str[$i + 1])) . dechex(ord($str[$i + 2]));
- $word = $str[$i] . $str[$i + 1] . $str[$i + 2];
- $i += 2;
- }
- } else if (($ascii_code >> 6) == 3) {
- if ($i < $len - 1) {
- $code = dechex(ord($str[$i])) . dechex(ord($str[$i + 1]));
- $word = $str[$i] . $str[$i + 1];
- $i++;
- }
- }
- if ($i == ($len - 1)) {
- $is_end = true;
- if ($is_py) {
- $str = $chinese;
- }
- }
- $childTree = &$this->_appendWordToTree($childTree, $code, $word, $is_end, $data, $str, $is_py);
- }
- unset($childTree);
- return $this;
- }
-
- private function &_appendWordToTree(&$tree, $code, $word, $end = false, $data = array(), $full_str = '', $is_py) {
- if (!isset($tree[$code])) {
- $tree[$code] = array(
- 'end' => $end,
- 'child' => array(),
- 'value' => $word,
- );
- }
- if ($end) {
- $tree[$code]['end'] = true;
- $tree[$code]['is_py'] = $is_py;
-
- if ($is_py) {
- $is_change = false;
- if(isset($tree[$code]["chinese_list"]) && count($tree[$code]["chinese_list"])>0) {
- foreach ($tree[$code]["chinese_list"] as $key => &$node) {
- if ($node['word'] == $full_str) {
- $node['data'] = $data;
- $is_change = true;
- break;
- }
- }
- }
- if(!$is_change){
- $tree[$code]['chinese_list'][] = ["word" => $full_str, "data" => $data];
- }
- } else {
- $tree[$code]['full'] = $full_str;
- $tree[$code]['data'] = $data;
- }
- }
- return $tree[$code]['child'];
- }
-
- public function getTree() {
- return $this->nodeTree;
- }
-
- public function getTreeWord($word, $deep = 0) {
- $search = trim($word);
- if (empty($search)) {
- return false;
- }
- if ($deep === 0) {
- $deep = 999;
- }
- $word_keys = $this->convertStrToH($search);
- $tree = &$this->nodeTree;
- $key_count = count($word_keys);
- $words = [];
- foreach ($word_keys as $key => $val) {
- if (isset($tree[$val])) {
-
- if ($key == $key_count - 1 && $tree[$val]['end'] == true) {
- if (isset($tree[$val]['chinese_list'])) {
- $words = array_merge($words, $tree[$val]['chinese_list']);
- } else {
- $words[] = ["word" => $tree[$val]['full'], "data" => $tree[$val]['data']];
- }
- }
- $tree = &$tree[$val]["child"];
- } else {
-
- return [];
- }
- }
- $this->_getTreeWord($tree, $deep, $words);
- return $words;
- }
- private function _getTreeWord(&$child, $deep, &$words = array()) {
- foreach ($child as $node) {
- if ($node['end'] == true) {
- if (isset($node['chinese_list'])) {
- $words = array_merge($words, $node['chinese_list']);
- } else {
- $words[] = ["word" => $node['full'], "data" => $node['data']];
- }
- }
- if (!empty($node['child']) && $deep >= count($words)) {
- $this->_getTreeWord($node['child'], $deep, $words);
- }
- }
- }
-
- public function __toString() {
-
- return json_encode($this->nodeTree);
- }
-
- public function search($search) {
- $search = trim($search);
- if (empty($search)) {
- return false;
- }
- $search_keys = $this->convertStrToH($search);
-
- $hit_arr = array();
- $tree = &$this->nodeTree;
- $arr_len = count($search_keys);
- $current_index = 0;
- for ($i = 0; $i < $arr_len; $i++) {
-
- if (isset($tree[$search_keys[$i]])) {
- $node = $tree[$search_keys[$i]];
- if ($node['end']) {
-
- $key = md5($node['full']);
- if (isset($hit_arr[$key])) {
- $hit_arr[$key]['count'] += 1;
- } else {
- $hit_arr[$key] = array(
- 'word' => $node['full'],
- 'data' => $node['data'],
- 'count' => 1
- );
- }
- if (empty($node['child'])) {
-
- $i = $current_index;
-
- $tree = &$this->nodeTree;
-
- $current_index++;
- } else {
-
- $tree = &$tree[$search_keys[$i]]['child'];
- }
- continue;
- } else {
-
- $tree = &$tree[$search_keys[$i]]['child'];
- continue;
- }
- } else {
-
- $i = $current_index;
-
- $tree = &$this->nodeTree;
-
- $current_index++;
- continue;
- }
- }
- unset($tree, $search_keys);
- return $hit_arr;
- }
-
- public function convertStrToH($str) {
- $len = strlen($str);
- $chars = [];
- for ($i = 0; $i < $len; $i++) {
- $ascii_code = ord($str[$i]);
- if (($ascii_code >> 7) == 0) {
- $chars[] = dechex(ord($str[$i]));
- } else if (($ascii_code >> 4) == 15) {
- if ($i < $len - 3) {
- $chars[] = dechex(ord($str[$i])) . dechex(ord($str[$i + 1])) . dechex(ord($str[$i + 2])) . dechex(ord($str[$i + 3]));
- $i += 3;
- }
- } else if (($ascii_code >> 5) == 7) {
- if ($i < $len - 2) {
- $chars[] = dechex(ord($str[$i])) . dechex(ord($str[$i + 1])) . dechex(ord($str[$i + 2]));
- $i += 2;
- }
- } else if (($ascii_code >> 6) == 3) {
- if ($i < $len - 1) {
- $chars[] = dechex(ord($str[$i])) . dechex(ord($str[$i + 1]));
- $i++;
- }
- }
- }
- return $chars;
- }
- }
|