| @@ -0,0 +1,113 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*- coding: utf-8 -*- | |||
| """ | |||
| Created on Wed Jan 30 10:48:49 2019 | |||
| Trie (prefix tree) | |||
| @author: ljia | |||
| @references: | |||
| `NLP: Build a Trie Data structure from scratch with python <https://viblo.asia/p/nlp-build-a-trie-data-structure-from-scratch-with-python-3P0lPzroKox>`__, 2019.1 | |||
| """ | |||
| import pickle | |||
| import json | |||
| """ Trie class | |||
| """ | |||
| class Trie: | |||
| """ | |||
| """ | |||
| # init Trie class | |||
| def __init__(self): | |||
| self.root = self.getNode() | |||
| def getNode(self): | |||
| return {"isEndOfWord": False, "children": {}} | |||
| def insertWord(self, word): | |||
| current = self.root | |||
| for ch in word: | |||
| if ch in current["children"]: | |||
| node = current["children"][ch] | |||
| else: | |||
| node = self.getNode() | |||
| current["children"][ch] = node | |||
| current = node | |||
| current["isEndOfWord"] = True | |||
| if 'count' in current: | |||
| current['count'] += 1 | |||
| else: | |||
| current['count'] = 1 | |||
| def searchWord(self, word): | |||
| current = self.root | |||
| for ch in word: | |||
| if ch not in current["children"]: | |||
| return 0 | |||
| node = current["children"][ch] | |||
| current = node | |||
| if 'count' in current: | |||
| return current["count"] | |||
| else: | |||
| return 0 | |||
| def searchWordPrefix(self, word): | |||
| current = self.root | |||
| for ch in word: | |||
| if not current["children"].has_key(ch): | |||
| return False | |||
| node = current["children"][ch] | |||
| current = node | |||
| # return True if children contain keys and values | |||
| return bool(current["children"]) | |||
| def deleteWord(self, word): | |||
| self._delete(self.root, word, 0) | |||
| def _delete(self, current, word, index): | |||
| if(index == len(word)): | |||
| if not current["isEndOfWord"]: | |||
| return False | |||
| current["isEndOfWord"] = False | |||
| return len(current["children"].keys()) == 0 | |||
| ch = word[index] | |||
| if not current["children"].has_key(ch): | |||
| return False | |||
| node = current["children"][ch] | |||
| should_delete_current_node = self._delete(node, word, index + 1) | |||
| if should_delete_current_node: | |||
| current["children"].pop(ch) | |||
| return len(current["children"].keys()) == 0 | |||
| return False | |||
| def save_to_pickle(self, file_name): | |||
| f = open(file_name + ".pkl", "wb") | |||
| pickle.dump(self.root, f) | |||
| f.close() | |||
| def load_from_pickle(self, file_name): | |||
| f = open(file_name + ".pkl", "rb") | |||
| self.root = pickle.load(f) | |||
| f.close() | |||
| def to_json(self): | |||
| return json.dump(self.root) | |||
| def save_to_json(self, file_name): | |||
| json_data = json.dumps(self.root) | |||
| f = open(file_name + ".json", "w") | |||
| f.write(json_data) | |||
| f.close() | |||
| def load_from_json(self, file_name): | |||
| json_file = open(file_name + ".json", "r") | |||
| self.root = json.load(json_file) | |||
| json_file.close() | |||