| @@ -53,7 +53,7 @@ print(words) | |||
| pos = jiagu.pos(words) # 词性标注 | |||
| print(pos) | |||
| ner = jiagu.ner(text) # 命名实体识别 | |||
| ner = jiagu.ner(words) # 命名实体识别 | |||
| print(ner) | |||
| ``` | |||
| @@ -61,7 +61,7 @@ print(ner) | |||
| ```python3 | |||
| import jiagu | |||
| text = '汉服和服装、知识图谱机器人' | |||
| text = '汉服和服装、维基图谱' | |||
| words = jiagu.cut(text) # 深度学习分词 | |||
| print(words) | |||
| @@ -70,7 +70,7 @@ words = jiagu.seg(text) # 字典分词 | |||
| print(words) | |||
| # jiagu.load_userdict('dict/user.dict') # 加载自定义字典,支持字典路径、字典列表形式。 | |||
| jiagu.load_userdict(['知识图谱']) | |||
| jiagu.load_userdict(['汉服和服装']) | |||
| words = jiagu.seg(text) # 自定义分词,字典分词模式有效 | |||
| print(words) | |||
| @@ -3,25 +3,25 @@ import jiagu | |||
| # jiagu.init() # 可手动初始化,也可以动态初始化 | |||
| text = '厦门明天会不会下雨' | |||
| text = '在苏州冻成狗' | |||
| words = jiagu.cut(text) # 分词 | |||
| words = jiagu.seg(text) # 分词 | |||
| print(words) | |||
| pos = jiagu.pos(words) # 词性标注 | |||
| print(pos) | |||
| ner = jiagu.ner(text) # 命名实体识别 | |||
| ner = jiagu.ner(words) # 命名实体识别 | |||
| print(ner) | |||
| # 字典模式分词 | |||
| text = '知识图谱机器人' | |||
| text = '思知机器人挺好用的' | |||
| words = jiagu.seg(text) | |||
| print(words) | |||
| # jiagu.load_userdict('dict/user.dict') # 加载自定义字典,支持字典路径、字典列表形式。 | |||
| jiagu.load_userdict(['知识图谱']) | |||
| jiagu.load_userdict(['思知机器人']) | |||
| words = jiagu.seg(text) | |||
| print(words) | |||
| @@ -1,13 +1,5 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*-coding:utf-8-*- | |||
| """ | |||
| * Copyright (C) 2018 OwnThink. | |||
| * | |||
| * Name : __init__.py | |||
| * Author : Yener <yener@ownthink.com> | |||
| * Version : 0.01 | |||
| * Description : | |||
| """ | |||
| from jiagu import analyze | |||
| any = analyze.Analyze() | |||
| @@ -1,11 +1,2 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*-coding:utf-8-*- | |||
| """ | |||
| * Copyright (C) 2018 OwnThink. | |||
| * | |||
| * Name : __main__.py | |||
| * Author : Yener <yener@ownthink.com> | |||
| * Version : 0.01 | |||
| * Description : | |||
| """ | |||
| @@ -1,17 +1,9 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*-coding:utf-8-*- | |||
| """ | |||
| * Copyright (C) 2018 OwnThink. | |||
| * | |||
| * Name : analyze.py - 解析模块 | |||
| * Author : Yener <yener@ownthink.com> | |||
| * Version : 0.01 | |||
| * Description : | |||
| """ | |||
| import os | |||
| from jiagu import mmseg | |||
| from jiagu import findword | |||
| from jiagu import bilstm_crf | |||
| from jiagu import perceptron | |||
| from jiagu.textrank import Keywords | |||
| from jiagu.textrank import Summarize | |||
| from jiagu.segment.nroute import Segment | |||
| @@ -50,18 +42,18 @@ class Analyze(object): | |||
| def init_cws(self): | |||
| if self.seg_model is None: | |||
| self.seg_model = bilstm_crf.Predict(add_curr_dir('model/cws.model')) | |||
| self.seg_model = perceptron.Perceptron(add_curr_dir('model/cws.model')) | |||
| def load_model(self, model_path): | |||
| self.seg_model = bilstm_crf.Predict(model_path) | |||
| self.seg_model = perceptron.Perceptron(model_path) | |||
| def init_pos(self): | |||
| if self.pos_model is None: | |||
| self.pos_model = bilstm_crf.Predict(add_curr_dir('model/pos.model')) | |||
| self.pos_model = perceptron.Perceptron(add_curr_dir('model/pos.model')) | |||
| def init_ner(self): | |||
| if self.ner_model is None: | |||
| self.ner_model = bilstm_crf.Predict(add_curr_dir('model/ner.model')) | |||
| self.ner_model = perceptron.Perceptron(add_curr_dir('model/ner.model')) | |||
| def init_mmseg(self): | |||
| if self.seg_mmseg is None: | |||
| @@ -69,7 +61,7 @@ class Analyze(object): | |||
| def init_kg(self): | |||
| if self.kg_model is None: | |||
| self.kg_model = bilstm_crf.Predict(add_curr_dir('model/kg.model')) | |||
| self.kg_model = perceptron.Perceptron(add_curr_dir('model/kg.model')) | |||
| @staticmethod | |||
| def __lab2word(sentence, labels): | |||
| @@ -97,22 +89,13 @@ class Analyze(object): | |||
| def cws_text(self, sentence): | |||
| if sentence == '': | |||
| return [''] | |||
| labels = self.seg_model.predict([sentence])[0] | |||
| labels = self.seg_model.predict(list(sentence)) | |||
| return self.__lab2word(sentence, labels) | |||
| def cws_list(self, sentences): | |||
| text_list = sentences | |||
| all_labels = self.seg_model.predict(text_list) | |||
| sent_words = [] | |||
| for ti, text in enumerate(text_list): | |||
| seg_labels = all_labels[ti] | |||
| sent_words.append(self.__lab2word(text, seg_labels)) | |||
| return sent_words | |||
| def seg(self, sentence): | |||
| return self.seg_nroute.seg(sentence, mode="default") | |||
| def cws(self, sentence, input='text', model='default'): | |||
| def cws(self, sentence, model='default'): | |||
| """中文分词 | |||
| :param sentence: str or list | |||
| @@ -125,54 +108,31 @@ class Analyze(object): | |||
| """ | |||
| if model == 'default': | |||
| self.init_cws() | |||
| if input == 'batch': | |||
| words_list = self.cws_list(sentence) | |||
| return words_list | |||
| else: | |||
| words = self.cws_text(sentence) | |||
| return words | |||
| words = self.cws_text(sentence) | |||
| return words | |||
| elif model == 'mmseg': | |||
| self.init_mmseg() | |||
| words = self.seg_mmseg.cws(sentence) | |||
| return words | |||
| else: | |||
| pass | |||
| return [] | |||
| def pos(self, sentence, input='words'): # 传入的是词语 | |||
| def pos(self, words): # 传入的是词语 | |||
| self.init_pos() | |||
| labels = self.pos_model.predict(words) | |||
| return labels | |||
| if input == 'batch': | |||
| all_labels = self.pos_model.predict(sentence) | |||
| return all_labels | |||
| else: | |||
| labels = self.pos_model.predict([sentence])[0] | |||
| return labels | |||
| def ner(self, sentence, input='text'): # 传入的是文本 | |||
| def ner(self, words): # 传入的是词语 | |||
| self.init_ner() | |||
| labels = self.ner_model.predict(words) | |||
| return labels | |||
| if input == 'batch': | |||
| all_labels = self.ner_model.predict(sentence) | |||
| return all_labels | |||
| else: | |||
| labels = self.ner_model.predict([sentence])[0] | |||
| return labels | |||
| def knowledge(self, sentence, input='text'): | |||
| def knowledge(self, text): # 传入的是文本 | |||
| self.init_kg() | |||
| if input == 'batch': | |||
| all_labels = self.kg_model.predict(sentence) | |||
| result = [] | |||
| for sent, labels in zip(sentence, all_labels): | |||
| result.append(self.lab2spo(sent, labels)) | |||
| return result | |||
| else: | |||
| labels = self.kg_model.predict([sentence])[0] | |||
| return self.lab2spo(sentence, labels) | |||
| words = self.seg(text) | |||
| labels = self.kg_model.predict(words) | |||
| return self.lab2spo(words, labels) | |||
| def keywords(self, text, topkey=5): | |||
| if self.keywords_model == None: | |||
| @@ -195,11 +155,11 @@ class Analyze(object): | |||
| def text_cluster(self, docs, features_method='tfidf', method="k-means", k=3, max_iter=100, eps=0.5, min_pts=2): | |||
| return cluster(docs, features_method, method, k, max_iter, eps, min_pts, self.seg) | |||
| def lab2spo(self, text, epp_labels): | |||
| def lab2spo(self, words, epp_labels): | |||
| subject_list = [] # 存放实体的列表 | |||
| object_list = [] | |||
| index = 0 | |||
| for word, ep in zip(list(text), epp_labels): | |||
| for word, ep in zip(words, epp_labels): | |||
| if ep[0] == 'B' and ep[2:] == '实体': | |||
| subject_list.append([word, ep[2:], index]) | |||
| elif (ep[0] == 'I' or ep[0] == 'E') and ep[2:] == '实体': | |||
| @@ -1,77 +0,0 @@ | |||
| #!/usr/bin/env python3 | |||
| # -*-coding:utf-8-*- | |||
| """ | |||
| * Copyright (C) 2018 OwnThink. | |||
| * | |||
| * Name : bilstm_crf.py - 预测 | |||
| * Author : Yener <yener@ownthink.com> | |||
| * Version : 0.01 | |||
| * Description : | |||
| """ | |||
| import os | |||
| os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' | |||
| import pickle | |||
| import numpy as np | |||
| import tensorflow as tf | |||
| from tensorflow.contrib.crf import viterbi_decode | |||
| class Predict(object): | |||
| def __init__(self, model_file): | |||
| with open(model_file, 'rb') as f: | |||
| model, char_to_id, id_to_tag = pickle.load(f) | |||
| self.char_to_id = char_to_id | |||
| self.id_to_tag = {int(k): v for k, v in id_to_tag.items()} | |||
| self.num_class = len(self.id_to_tag) | |||
| graph_def = tf.GraphDef() | |||
| graph_def.ParseFromString(model) | |||
| with tf.Graph().as_default() as graph: | |||
| tf.import_graph_def(graph_def, name="prefix") | |||
| self.input_x = graph.get_tensor_by_name("prefix/char_inputs:0") | |||
| self.lengths = graph.get_tensor_by_name("prefix/lengths:0") | |||
| self.dropout = graph.get_tensor_by_name("prefix/dropout:0") | |||
| self.logits = graph.get_tensor_by_name("prefix/project/logits:0") | |||
| self.trans = graph.get_tensor_by_name("prefix/crf_loss/transitions:0") | |||
| self.sess = tf.Session(graph=graph) | |||
| self.sess.as_default() | |||
| def decode(self, logits, trans, sequence_lengths, tag_num): | |||
| small = -1000.0 | |||
| viterbi_sequences = [] | |||
| start = np.asarray([[small] * tag_num + [0]]) | |||
| for logit, length in zip(logits, sequence_lengths): | |||
| score = logit[:length] | |||
| pad = small * np.ones([length, 1]) | |||
| score = np.concatenate([score, pad], axis=1) | |||
| score = np.concatenate([start, score], axis=0) | |||
| viterbi_seq, viterbi_score = viterbi_decode(score, trans) | |||
| viterbi_sequences.append(viterbi_seq[1:]) | |||
| return viterbi_sequences | |||
| def predict(self, sents): | |||
| inputs = [] | |||
| lengths = [len(text) for text in sents] | |||
| max_len = max(lengths) | |||
| for sent in sents: | |||
| sent_ids = [self.char_to_id.get(w) if w in self.char_to_id else self.char_to_id.get("<OOV>") for w in sent] | |||
| padding = [0] * (max_len - len(sent_ids)) | |||
| sent_ids += padding | |||
| inputs.append(sent_ids) | |||
| inputs = np.array(inputs, dtype=np.int32) | |||
| feed_dict = { | |||
| self.input_x: inputs, | |||
| self.lengths: lengths, | |||
| self.dropout: 1.0 | |||
| } | |||
| logits, trans = self.sess.run([self.logits, self.trans], feed_dict=feed_dict) | |||
| path = self.decode(logits, trans, lengths, self.num_class) | |||
| labels = [[self.id_to_tag.get(l) for l in p] for p in path] | |||
| return labels | |||
| @@ -1,15 +1,4 @@ | |||
| # -*- encoding:utf-8 -*- | |||
| """ | |||
| * Copyright (C) 2017 OwnThink. | |||
| * | |||
| * Name : findword.py - 新词发现 | |||
| * Author : Yener <yener@ownthink.com> | |||
| * Version : 0.01 | |||
| * Description : 新词发现算法实现 | |||
| special thanks to | |||
| http://www.matrix67.com/blog/archives/5044 | |||
| https://github.com/zoulala/New_words_find | |||
| """ | |||
| import re | |||
| from math import log | |||
| from collections import Counter | |||
| @@ -1,13 +1,5 @@ | |||
| #!/usr/bin/env python | |||
| # encoding: utf-8 | |||
| """ | |||
| * Copyright (C) 2018 OwnThink. | |||
| * | |||
| * Name : mmseg.py | |||
| * Author : Leo <1162441289@qq.com> | |||
| * Version : 0.01 | |||
| * Description : mmseg分词方法,目前算法比较耗时,仍在优化中 | |||
| """ | |||
| import os | |||
| import pickle | |||
| from math import log | |||
| @@ -0,0 +1,227 @@ | |||
| # -*- coding:utf-8 -*- | |||
| import os | |||
| import gzip | |||
| import pickle | |||
| import random | |||
| from collections import defaultdict | |||
| class AveragedPerceptron(object): | |||
| def __init__(self): | |||
| # Each feature gets its own weight vector, so weights is a dict-of-dicts | |||
| self.weights = {} | |||
| self.classes = set() | |||
| # The accumulated values, for the averaging. These will be keyed by | |||
| # feature/clas tuples | |||
| self._totals = defaultdict(int) | |||
| # The last time the feature was changed, for the averaging. Also | |||
| # keyed by feature/clas tuples | |||
| # (tstamps is short for timestamps) | |||
| self._tstamps = defaultdict(int) | |||
| # Number of instances seen | |||
| self.i = 0 | |||
| def predict(self, features): | |||
| '''Dot-product the features and current weights and return the best label.''' | |||
| scores = defaultdict(float) | |||
| for feat, value in features.items(): | |||
| if feat not in self.weights or value == 0: | |||
| continue | |||
| weights = self.weights[feat] | |||
| for label, weight in weights.items(): | |||
| scores[label] += value * weight | |||
| # Do a secondary alphabetic sort, for stability | |||
| return max(self.classes, key=lambda label: (scores[label], label)) | |||
| def update(self, truth, guess, features): | |||
| '''Update the feature weights.''' | |||
| def upd_feat(c, f, w, v): | |||
| param = (f, c) | |||
| self._totals[param] += (self.i - self._tstamps[param]) * w | |||
| self._tstamps[param] = self.i | |||
| self.weights[f][c] = w + v | |||
| self.i += 1 | |||
| if truth == guess: | |||
| return None | |||
| for f in features: | |||
| weights = self.weights.setdefault(f, {}) | |||
| upd_feat(truth, f, weights.get(truth, 0.0), 1.0) | |||
| upd_feat(guess, f, weights.get(guess, 0.0), -1.0) | |||
| return None | |||
| def average_weights(self): | |||
| '''Average weights from all iterations.''' | |||
| for feat, weights in self.weights.items(): | |||
| new_feat_weights = {} | |||
| for clas, weight in weights.items(): | |||
| param = (feat, clas) | |||
| total = self._totals[param] | |||
| total += (self.i - self._tstamps[param]) * weight | |||
| averaged = round(total / float(self.i), 3) | |||
| if averaged: | |||
| new_feat_weights[clas] = averaged | |||
| self.weights[feat] = new_feat_weights | |||
| return None | |||
| class Perceptron: | |||
| def __init__(self, loc=None): | |||
| self.START = ['-START-', '-START2-'] | |||
| self.END = ['-END-', '-END2-'] | |||
| self.model = AveragedPerceptron() | |||
| if loc != None: | |||
| self.load(loc) | |||
| def predict(self, words): | |||
| prev, prev2 = self.START | |||
| labels = [] | |||
| context = self.START + words + self.END | |||
| for i, word in enumerate(words): | |||
| features = self._get_features(i, word, context, prev, prev2) | |||
| tag = self.model.predict(features) | |||
| labels.append(tag) | |||
| prev2 = prev | |||
| prev = tag | |||
| return labels | |||
| def train(self, sentences, save_loc=None, nr_iter=5, shuf=False): | |||
| self._make_tagdict(sentences) | |||
| for iter_ in range(nr_iter): | |||
| c = 0 | |||
| n = 0 | |||
| for words, tags in sentences: | |||
| prev, prev2 = self.START | |||
| context = self.START + words + self.END | |||
| for i, word in enumerate(words): | |||
| feats = self._get_features(i, word, context, prev, prev2) | |||
| guess = self.model.predict(feats) | |||
| self.model.update(tags[i], guess, feats) | |||
| prev2 = prev | |||
| prev = guess | |||
| c += guess == tags[i] | |||
| n += 1 | |||
| if shuf == True: | |||
| random.shuffle(sentences) | |||
| print("Iter {0}: {1}/{2}={3}".format(iter_, c, n, (float(c) / n) * 100)) | |||
| self.save(save_loc) | |||
| self.model.average_weights() | |||
| self.save(save_loc) | |||
| def save(self, loc='model/ap.model', zip=True): | |||
| if zip == False: | |||
| pickle.dump((self.model.weights, self.model.classes), open(loc, 'wb')) | |||
| else: | |||
| pickle.dump((self.model.weights, self.model.classes), gzip.open(loc, 'wb')) | |||
| def load(self, loc='model/ap.model', zip=True): | |||
| if zip == False: | |||
| self.model.weights, self.model.classes = pickle.load(open(loc, 'rb')) | |||
| else: | |||
| self.model.weights, self.model.classes = pickle.load(gzip.open(loc,'rb')) | |||
| def _get_features(self, i, word, context, prev, prev2): | |||
| '''Map tokens into a feature representation, implemented as a | |||
| {hashable: float} dict. If the features change, a new model must be | |||
| trained. | |||
| ''' | |||
| def add(name, *args): | |||
| features[' '.join((name,) + tuple(args))] += 1 | |||
| i += len(self.START) | |||
| features = defaultdict(int) | |||
| # It's useful to have a constant feature, which acts sort of like a prior | |||
| add('bias') | |||
| add('i suffix', word[-3:]) | |||
| add('i pref1', word[0]) | |||
| add('i-1 tag', prev) | |||
| add('i-2 tag', prev2) | |||
| add('i tag+i-2 tag', prev, prev2) | |||
| add('i word', context[i]) | |||
| add('i-1 tag+i word', prev, context[i]) | |||
| add('i-1 word', context[i - 1]) | |||
| add('i-1 suffix', context[i - 1][-3:]) | |||
| add('i-2 word', context[i - 2]) | |||
| add('i+1 word', context[i + 1]) | |||
| add('i+1 suffix', context[i + 1][-3:]) | |||
| add('i+2 word', context[i + 2]) | |||
| return features | |||
| def _make_tagdict(self, sentences): | |||
| '''Make a tag dictionary for single-tag words.''' | |||
| for words, tags in sentences: | |||
| for word, tag in zip(words, tags): | |||
| self.model.classes.add(tag) | |||
| def train(filepath='data/train.txt', model='model/ap.model', nr_iter=1): | |||
| tagger = Perceptron() | |||
| print('Reading corpus...') | |||
| training_data = [] | |||
| sentence = ([], []) | |||
| fin = open(filepath, 'r', encoding='utf8') | |||
| for index, line in enumerate(fin): | |||
| line = line.strip() | |||
| if line == '': | |||
| training_data.append(sentence) | |||
| sentence = ([], []) | |||
| else: | |||
| params = line.split() | |||
| if len(params) != 2: continue | |||
| sentence[0].append(params[0]) | |||
| sentence[1].append(params[1]) | |||
| fin.close() | |||
| print('training corpus size : %d', len(training_data)) | |||
| print('Start training...') | |||
| tagger.train(training_data, save_loc=model, nr_iter=nr_iter) | |||
| def eval(filepath='data/test.txt', model='model/ap.model'): | |||
| tagger = Perceptron(model) | |||
| print('Start testing...') | |||
| right = 0.0 | |||
| total = 0.0 | |||
| sentence = ([], []) | |||
| fin = open(filepath, 'r', encoding='utf8') | |||
| for index, line in enumerate(fin): | |||
| line = line.strip() | |||
| if line == '': | |||
| words = sentence[0] | |||
| tags = sentence[1] | |||
| outputs = tagger.predict(words) | |||
| assert len(tags) == len(outputs) | |||
| total += len(tags) | |||
| for o, t in zip(outputs, tags): | |||
| if o == t: right += 1 | |||
| sentence = ([], []) | |||
| else: | |||
| params = line.split() | |||
| if len(params) != 2: continue | |||
| sentence[0].append(params[0]) | |||
| sentence[1].append(params[1]) | |||
| fin.close() | |||
| print("Precision : %f", right / total) | |||
| def predict(model='model/ap.model'): | |||
| tagger = Perceptron(model) | |||
| while True: | |||
| text = input('>') | |||
| words = list(text) | |||
| labels = tagger.predict(words) | |||
| for word, label in zip(words, labels): | |||
| print(word, label) | |||
| if __name__ == '__main__': | |||
| train() | |||
| eval() | |||
| # predict() | |||
| @@ -1,13 +1,4 @@ | |||
| # -*- encoding:utf-8 -*- | |||
| """ | |||
| * Copyright (C) 2017 OwnThink. | |||
| * | |||
| * Name : textrank.py - 解析 | |||
| * Author : zengbin93 <zeng_bin8888@163.com> | |||
| * Version : 0.01 | |||
| * Description : TextRank算法实现 | |||
| special thanks to https://github.com/ArtistScript/FastTextRank | |||
| """ | |||
| import sys | |||
| import numpy as np | |||
| from jiagu import utils | |||
| @@ -1,16 +1,7 @@ | |||
| # -*- encoding:utf-8 -*- | |||
| """ | |||
| * Copyright (C) 2017 OwnThink. | |||
| * | |||
| * Name : utils.py - 解析 | |||
| * Author : zengbin93 <zeng_bin8888@163.com> | |||
| * Version : 0.01 | |||
| * Description : 常用工具函数 | |||
| """ | |||
| import os | |||
| import jiagu | |||
| import math | |||
| import numpy as np | |||
| def default_stopwords_file(): | |||
| @@ -138,22 +129,6 @@ def different(scores, old_scores, tol=0.0001): | |||
| return flag | |||
| def cosine_similarity(vec1, vec2): | |||
| """计算两个向量的余弦相似度 | |||
| :param vec1: list or np.array | |||
| :param vec2: list or np.array | |||
| :return: float | |||
| """ | |||
| tx = np.array(vec1) | |||
| ty = np.array(vec2) | |||
| cos1 = np.sum(tx * ty) | |||
| cos21 = np.sqrt(sum(tx ** 2)) | |||
| cos22 = np.sqrt(sum(ty ** 2)) | |||
| cosine_value = cos1 / float(cos21 * cos22) | |||
| return cosine_value | |||
| def combine(word_list, window=2): | |||
| if window < 2: | |||
| window = 2 | |||
| @@ -3,16 +3,15 @@ | |||
| from setuptools import setup | |||
| setup(name='jiagu', | |||
| version='0.1.8', | |||
| version='0.1.9', | |||
| description='Jiagu Natural Language Processing', | |||
| author='Yener(Zheng Wenyu)', | |||
| author_email='help@ownthink.com', | |||
| url='https://github.com/ownthink/Jiagu', | |||
| license='MIT', | |||
| install_requires=['tensorflow==1.6.0', 'numpy>=1.12.1'], | |||
| packages=['jiagu'], | |||
| package_dir={'jiagu': 'jiagu'}, | |||
| package_data={'jiagu': ['*.*', 'cluster/*', 'data/*', 'model/*', | |||
| 'normal/*', 'segment/*', 'segment/dict/*', | |||
| 'sentiment/*', 'sentiment/model/*', 'topic/*']} | |||
| 'normal/*', 'segment/*', 'segment/dict/*', | |||
| 'sentiment/*', 'sentiment/model/*', 'topic/*']} | |||
| ) | |||
| @@ -170,6 +170,7 @@ def train(filepath='data/train.txt', model='model/ap.model', nr_iter=1): | |||
| sentence = ([], []) | |||
| else: | |||
| params = line.split() | |||
| if len(params) != 2: continue | |||
| sentence[0].append(params[0]) | |||
| sentence[1].append(params[1]) | |||
| fin.close() | |||