diff --git a/fastNLP/core/action.py b/fastNLP/core/action.py index 358db499..ef595cbb 100644 --- a/fastNLP/core/action.py +++ b/fastNLP/core/action.py @@ -1,7 +1,3 @@ -""" - This file defines Action(s) and sample methods. - -""" from collections import Counter import numpy as np @@ -9,13 +5,12 @@ import torch class Action(object): - """ - Operations shared by Trainer, Tester, or Inference. + """Operations shared by Trainer, Tester, or Inference. + This is designed for reducing replicate codes. - make_batch: produce a min-batch of data. @staticmethod - pad: padding method used in sequence modeling. @staticmethod - mode: change network mode for either train or test. (for PyTorch) @staticmethod - The base Action shall define operations shared by as much task-specific Actions as possible. """ def __init__(self): @@ -24,18 +19,20 @@ class Action(object): @staticmethod def make_batch(iterator, use_cuda, output_length=True, max_len=None): """Batch and Pad data. + :param iterator: an iterator, (object that implements __next__ method) which returns the next sample. :param use_cuda: bool, whether to use GPU :param output_length: bool, whether to output the original length of the sequence before padding. (default: True) :param max_len: int, maximum sequence length. Longer sequences will be clipped. (default: None) - :return - if output_length is True: + :return : + + if output_length is True, (batch_x, seq_len): tuple of two elements batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len] seq_len: list. The length of the pre-padded sequence, if output_length is True. batch_y: list. Each entry is a list of labels of a sample. [batch_size, num_labels] - if output_length is False: + if output_length is False, batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len] batch_y: list. Each entry is a list of labels of a sample. [batch_size, num_labels] """ @@ -77,21 +74,21 @@ class Action(object): return batch @staticmethod - def mode(model, test=False): - """ - Train mode or Test mode. This is for PyTorch currently. - :param model: - :param test: + def mode(model, is_test=False): + """Train mode or Test mode. This is for PyTorch currently. + + :param model: a PyTorch model + :param is_test: bool, whether in test mode or not. """ - if test: + if is_test: model.eval() else: model.train() def convert_to_torch_tensor(data_list, use_cuda): - """ - convert lists into (cuda) Tensors. + """Convert lists into (cuda) Tensors. + :param data_list: 2-level lists :param use_cuda: bool, whether to use GPU or not :return data_list: PyTorch Tensor of shape [batch_size, max_seq_len] @@ -103,8 +100,8 @@ def convert_to_torch_tensor(data_list, use_cuda): def k_means_1d(x, k, max_iter=100): - """ - Perform k-means on 1-D data. + """Perform k-means on 1-D data. + :param x: list of int, representing points in 1-D. :param k: the number of clusters required. :param max_iter: maximum iteration @@ -132,21 +129,28 @@ def k_means_1d(x, k, max_iter=100): def k_means_bucketing(all_inst, buckets): - """ + """Assign all instances into possible buckets using k-means, such that instances in the same bucket have similar lengths. + :param all_inst: 3-level list + E.g. :: + [ [[word_11, word_12, word_13], [label_11. label_12]], # sample 1 [[word_21, word_22, word_23], [label_21. label_22]], # sample 2 ... ] + :param buckets: list of int. The length of the list is the number of buckets. Each integer is the maximum length threshold for each bucket (This is usually None.). :return data: 2-level list + :: + [ [index_11, index_12, ...], # bucket 1 [index_21, index_22, ...], # bucket 2 ... ] + """ bucket_data = [[] for _ in buckets] num_buckets = len(buckets) @@ -160,11 +164,16 @@ def k_means_bucketing(all_inst, buckets): class BaseSampler(object): - """ - Base class for all samplers. + """The base class of all samplers. + """ def __init__(self, data_set): + """ + + :param data_set: multi-level list, of shape [num_example, *] + + """ self.data_set_length = len(data_set) self.data = data_set @@ -176,11 +185,16 @@ class BaseSampler(object): class SequentialSampler(BaseSampler): - """ - Sample data in the original order. + """Sample data in the original order. + """ def __init__(self, data_set): + """ + + :param data_set: multi-level list + + """ super(SequentialSampler, self).__init__(data_set) def __iter__(self): @@ -188,11 +202,16 @@ class SequentialSampler(BaseSampler): class RandomSampler(BaseSampler): - """ - Sample data in random permutation order. + """Sample data in random permutation order. + """ def __init__(self, data_set): + """ + + :param data_set: multi-level list + + """ super(RandomSampler, self).__init__(data_set) self.order = np.random.permutation(self.data_set_length) @@ -201,11 +220,18 @@ class RandomSampler(BaseSampler): class Batchifier(object): - """ - Wrap random or sequential sampler to generate a mini-batch. + """Wrap random or sequential sampler to generate a mini-batch. + """ def __init__(self, sampler, batch_size, drop_last=True): + """ + + :param sampler: a Sampler object + :param batch_size: int, the size of the mini-batch + :param drop_last: bool, whether to drop the last examples that are not enough to make a mini-batch. + + """ super(Batchifier, self).__init__() self.sampler = sampler self.batch_size = batch_size @@ -223,8 +249,7 @@ class Batchifier(object): class BucketBatchifier(Batchifier): - """ - Partition all samples into multiple buckets, each of which contains sentences of approximately the same length. + """Partition all samples into multiple buckets, each of which contains sentences of approximately the same length. In sampling, first random choose a bucket. Then sample data from it. The number of buckets is decided dynamically by the variance of sentence lengths. """ @@ -237,6 +262,7 @@ class BucketBatchifier(Batchifier): :param num_buckets: int, number of buckets for grouping these sequences. :param drop_last: bool, useless currently. :param sampler: Sampler, useless currently. + """ super(BucketBatchifier, self).__init__(sampler, batch_size, drop_last) buckets = ([None] * num_buckets) diff --git a/fastNLP/core/loss.py b/fastNLP/core/loss.py index 6a5cb349..8d866bbf 100644 --- a/fastNLP/core/loss.py +++ b/fastNLP/core/loss.py @@ -8,6 +8,11 @@ class Loss(object): """ def __init__(self, args): + """ + + :param args: None or str, the name of a loss function. + + """ if args is None: # this is useful when Trainer.__init__ performs type check self._loss = None @@ -17,10 +22,19 @@ class Loss(object): raise NotImplementedError def get(self): + """ + + :return self._loss: the loss function + """ return self._loss @staticmethod def _borrow_from_pytorch(loss_name): + """Given a name of a loss function, return it from PyTorch. + + :param loss_name: str, the name of a loss function + :return loss: a PyTorch loss + """ if loss_name == "cross_entropy": return torch.nn.CrossEntropyLoss() else: diff --git a/fastNLP/core/metrics.py b/fastNLP/core/metrics.py index c8d7fe52..7bf4b034 100644 --- a/fastNLP/core/metrics.py +++ b/fastNLP/core/metrics.py @@ -1,11 +1,12 @@ import warnings + import numpy as np import torch def _conver_numpy(x): - """ - convert input data to numpy array + """convert input data to numpy array + """ if isinstance(x, np.ndarray): return x @@ -17,21 +18,20 @@ def _conver_numpy(x): def _check_same_len(*arrays, axis=0): - """ - check if input array list has same length for one dimension + """check if input array list has same length for one dimension + """ lens = set([x.shape[axis] for x in arrays if x is not None]) return len(lens) == 1 def _label_types(y): - """ - determine the type - "binary" - "multiclass" - "multiclass-multioutput" - "multilabel" - "unknown" + """Determine the type + - "binary" + - "multiclass" + - "multiclass-multioutput" + - "multilabel" + - "unknown" """ # never squeeze the first dimension y = y.squeeze() if y.shape[0] > 1 else y.resize(1, -1) @@ -46,8 +46,8 @@ def _label_types(y): def _check_data(y_true, y_pred): - """ - check if y_true and y_pred is same type of data e.g both binary or multiclass + """Check if y_true and y_pred is same type of data e.g both binary or multiclass + """ y_true, y_pred = _conver_numpy(y_true), _conver_numpy(y_pred) if not _check_same_len(y_true, y_pred): @@ -174,16 +174,13 @@ def classification_report(y_true, y_pred, labels=None, target_names=None, digits def accuracy_topk(y_true, y_prob, k=1): - """ - Compute accuracy of y_true matching top-k probable + """Compute accuracy of y_true matching top-k probable labels in y_prob. - Paras: - y_ture - ndarray, true label, [n_samples] - y_prob - ndarray, label probabilities, [n_samples, n_classes] - k - int, k in top-k - Returns: - accuracy of top-k + :param y_true: ndarray, true label, [n_samples] + :param y_prob: ndarray, label probabilities, [n_samples, n_classes] + :param k: int, k in top-k + :return :accuracy of top-k """ y_pred_topk = np.argsort(y_prob, axis=-1)[:, -1:-k - 1:-1] @@ -195,16 +192,14 @@ def accuracy_topk(y_true, y_prob, k=1): def pred_topk(y_prob, k=1): - """ - Return top-k predicted labels and corresponding probabilities. - - Args: - y_prob - ndarray, size [n_samples, n_classes], probabilities on labels - k - int, k of top-k - Returns: - y_pred_topk - ndarray, size [n_samples, k], predicted top-k labels - y_prob_topk - ndarray, size [n_samples, k], probabilities for - top-k labels + """Return top-k predicted labels and corresponding probabilities. + + + :param y_prob: ndarray, size [n_samples, n_classes], probabilities on labels + :param k: int, k of top-k + :returns + y_pred_topk: ndarray, size [n_samples, k], predicted top-k labels + y_prob_topk: ndarray, size [n_samples, k], probabilities for top-k labels """ y_pred_topk = np.argsort(y_prob, axis=-1)[:, -1:-k - 1:-1] diff --git a/fastNLP/core/optimizer.py b/fastNLP/core/optimizer.py index e106fde0..ff2ee40e 100644 --- a/fastNLP/core/optimizer.py +++ b/fastNLP/core/optimizer.py @@ -4,7 +4,6 @@ import torch class Optimizer(object): """Wrapper of optimizer from framework - names: arguments (type) 1. Adam: lr (float), weight_decay (float) 2. AdaGrad 3. RMSProp @@ -16,20 +15,29 @@ class Optimizer(object): """ :param optimizer_name: str, the name of the optimizer :param kwargs: the arguments + """ self.optim_name = optimizer_name self.kwargs = kwargs @property def name(self): + """The name of the optimizer. + + :return: str + """ return self.optim_name @property def params(self): + """The arguments used to create the optimizer. + + :return: dict of (str, *) + """ return self.kwargs def construct_from_pytorch(self, model_params): - """construct a optimizer from framework over given model parameters""" + """Construct a optimizer from framework over given model parameters.""" if self.optim_name in ["SGD", "sgd"]: if "lr" in self.kwargs: diff --git a/fastNLP/core/preprocess.py b/fastNLP/core/preprocess.py index 5a95e39b..f8142c36 100644 --- a/fastNLP/core/preprocess.py +++ b/fastNLP/core/preprocess.py @@ -17,12 +17,24 @@ DEFAULT_WORD_TO_INDEX = {DEFAULT_PADDING_LABEL: 0, DEFAULT_UNKNOWN_LABEL: 1, # the first vocab in dict with the index = 5 def save_pickle(obj, pickle_path, file_name): + """Save an object into a pickle file. + + :param obj: an object + :param pickle_path: str, the directory where the pickle file is to be saved + :param file_name: str, the name of the pickle file. In general, it should be ended by "pkl". + """ with open(os.path.join(pickle_path, file_name), "wb") as f: _pickle.dump(obj, f) print("{} saved in {}".format(file_name, pickle_path)) def load_pickle(pickle_path, file_name): + """Load an object from a given pickle file. + + :param pickle_path: str, the directory where the pickle file is. + :param file_name: str, the name of the pickle file. + :return obj: an object stored in the pickle + """ with open(os.path.join(pickle_path, file_name), "rb") as f: obj = _pickle.load(f) print("{} loaded from {}".format(file_name, pickle_path)) @@ -30,7 +42,8 @@ def load_pickle(pickle_path, file_name): def pickle_exist(pickle_path, pickle_name): - """ + """Check if a given pickle file exists in the directory. + :param pickle_path: the directory of target pickle file :param pickle_name: the filename of target pickle file :return: True if file exists else False @@ -45,6 +58,19 @@ def pickle_exist(pickle_path, pickle_name): class BasePreprocess(object): + """Base class of all preprocessors. + Preprocessors are responsible for converting data of strings into data of indices. + During the pre-processing, the following pickle files will be built: + + - "word2id.pkl", a mapping from words(tokens) to indices + - "id2word.pkl", a reversed dictionary + - "label2id.pkl", a dictionary on labels + - "id2label.pkl", a reversed dictionary on labels + + These four pickle files are expected to be saved in the given pickle directory once they are constructed. + Preprocessors will check if those files are already in the directory and will reuse them in future calls. + """ + def __init__(self): self.word2index = None self.label2index = None @@ -59,6 +85,7 @@ class BasePreprocess(object): def run(self, train_dev_data, test_data=None, pickle_path="./", train_dev_split=0, cross_val=False, n_fold=10): """Main preprocessing pipeline. + :param train_dev_data: three-level list, with either single label or multiple labels in a sample. :param test_data: three-level list, with either single label or multiple labels in a sample. (optional) :param pickle_path: str, the path to save the pickle files. @@ -67,6 +94,7 @@ class BasePreprocess(object): :param n_fold: int, the number of folds of cross validation. Only useful when cross_val is True. :return results: a tuple of datasets after preprocessing. """ + if pickle_exist(pickle_path, "word2id.pkl") and pickle_exist(pickle_path, "class2id.pkl"): self.word2index = load_pickle(pickle_path, "word2id.pkl") self.label2index = load_pickle(pickle_path, "class2id.pkl") @@ -182,25 +210,31 @@ class SeqLabelPreprocess(BasePreprocess): """Preprocess pipeline, including building mapping from words to index, from index to words, from labels/classes to index, from index to labels/classes. data of three-level list which have multiple labels in each sample. + :: + [ [ [word_11, word_12, ...], [label_1, label_1, ...] ], [ [word_21, word_22, ...], [label_2, label_1, ...] ], ... ] + """ def __init__(self): super(SeqLabelPreprocess, self).__init__() def build_dict(self, data): - """ - Add new words with indices into self.word_dict, new labels with indices into self.label_dict. + """Add new words with indices into self.word_dict, new labels with indices into self.label_dict. + :param data: three-level list + :: + [ [ [word_11, word_12, ...], [label_1, label_1, ...] ], [ [word_21, word_22, ...], [label_2, label_1, ...] ], ... ] + :return word2index: dict of {str, int} label2index: dict of {str, int} """ @@ -216,14 +250,17 @@ class SeqLabelPreprocess(BasePreprocess): return word2index, label2index def to_index(self, data): - """ - Convert word strings and label strings into indices. + """Convert word strings and label strings into indices. + :param data: three-level list + :: + [ [ [word_11, word_12, ...], [label_1, label_1, ...] ], [ [word_21, word_22, ...], [label_2, label_1, ...] ], ... ] + :return data_index: the same shape as data, but each string is replaced by its corresponding index """ data_index = [] @@ -242,11 +279,14 @@ class ClassPreprocess(BasePreprocess): Preprocess pipeline, including building mapping from words to index, from index to words, from labels/classes to index, from index to labels/classes. design for data of three-level list which has a single label in each sample. + :: + [ [ [word_11, word_12, ...], label_1 ], [ [word_21, word_22, ...], label_2 ], ... ] + """ def __init__(self): @@ -273,14 +313,17 @@ class ClassPreprocess(BasePreprocess): return word2index, label2index def to_index(self, data): - """ - Convert word strings and label strings into indices. + """Convert word strings and label strings into indices. + :param data: three-level list + :: + [ [ [word_11, word_12, ...], label_1 ], [ [word_21, word_22, ...], label_2 ], ... ] + :return data_index: the same shape as data, but each string is replaced by its corresponding index """ data_index = [] @@ -295,14 +338,15 @@ class ClassPreprocess(BasePreprocess): def infer_preprocess(pickle_path, data): - """ - Preprocess over inference data. - Transform three-level list of strings into that of index. + """Preprocess over inference data. Transform three-level list of strings into that of index. + :: + [ [word_11, word_12, ...], [word_21, word_22, ...], ... ] + """ word2index = load_pickle(pickle_path, "word2id.pkl") data_index = [] diff --git a/fastNLP/core/tester.py b/fastNLP/core/tester.py index c085f7a4..bcb6ba8c 100644 --- a/fastNLP/core/tester.py +++ b/fastNLP/core/tester.py @@ -155,8 +155,8 @@ class BaseTester(object): raise NotImplementedError class SeqLabelTester(BaseTester): - """ - Tester for sequence labeling. + """Tester for sequence labeling. + """ def __init__(self, **test_args): @@ -215,8 +215,8 @@ class SeqLabelTester(BaseTester): return batch_loss, batch_accuracy def show_metrics(self): - """ - This is called by Trainer to print evaluation on dev set. + """This is called by Trainer to print evaluation on dev set. + :return print_str: str """ loss, accuracy = self.metrics() diff --git a/fastNLP/core/trainer.py b/fastNLP/core/trainer.py index 5fb5b0dc..4714131e 100644 --- a/fastNLP/core/trainer.py +++ b/fastNLP/core/trainer.py @@ -1,4 +1,5 @@ import copy +import os import time from datetime import timedelta @@ -26,10 +27,10 @@ class BaseTrainer(object): :param kwargs: dict of (key, value), or dict-like object. key is str. The base trainer requires the following keys: - - epochs: int, the number of epochs in training - - validate: bool, whether or not to validate on dev set - - batch_size: int - - pickle_path: str, the path to pickle files for pre-processing + - epochs: int, the number of epochs in training + - validate: bool, whether or not to validate on dev set + - batch_size: int + - pickle_path: str, the path to pickle files for pre-processing """ super(BaseTrainer, self).__init__() @@ -88,6 +89,7 @@ class BaseTrainer(object): def train(self, network, train_data, dev_data=None): """General Training Procedure + :param network: a model :param train_data: three-level list, the training set. :param dev_data: three-level list, the validation data (optional) @@ -144,6 +146,7 @@ class BaseTrainer(object): def _train_step(self, data_iterator, network, **kwargs): """Training process in one epoch. + kwargs should contain: - n_print: int, print training information every n steps. - start: time.time(), the starting time of this step. @@ -199,14 +202,13 @@ class BaseTrainer(object): Action.mode(network, test) def define_optimizer(self): - """ - Define framework-specific optimizer specified by the models. + """Define framework-specific optimizer specified by the models. + """ self._optimizer = self._optimizer_proto.construct_from_pytorch(self._model.parameters()) def update(self): - """ - Perform weight update on a model. + """Perform weight update on a model. For PyTorch, just call optimizer to update. """ @@ -216,8 +218,8 @@ class BaseTrainer(object): raise NotImplementedError def grad_backward(self, loss): - """ - Compute gradient with link rules. + """Compute gradient with link rules. + :param loss: a scalar where back-prop starts For PyTorch, just do "loss.backward()" @@ -226,8 +228,8 @@ class BaseTrainer(object): loss.backward() def get_loss(self, predict, truth): - """ - Compute loss given prediction and ground truth. + """Compute loss given prediction and ground truth. + :param predict: prediction label vector :param truth: ground truth label vector :return: a scalar @@ -235,8 +237,9 @@ class BaseTrainer(object): return self._loss_func(predict, truth) def define_loss(self): - """ - if the model defines a loss, use model's loss. + """Define a loss for the trainer. + + If the model defines a loss, use model's loss. Otherwise, Trainer must has a loss argument, use it as loss. These two losses cannot be defined at the same time. Trainer does not handle loss definition or choose default losses. @@ -253,7 +256,8 @@ class BaseTrainer(object): logger.info("The model didn't define loss, use Trainer's loss.") def best_eval_result(self, validator): - """ + """Check if the current epoch yields better validation results. + :param validator: a Tester instance :return: bool, True means current results on dev set is the best. """ @@ -268,15 +272,14 @@ class BaseTrainer(object): """ if model_name[-4:] != ".pkl": model_name += ".pkl" - ModelSaver(self.pickle_path + model_name).save_pytorch(network) + ModelSaver(os.path.join(self.pickle_path, model_name)).save_pytorch(network) def _create_validator(self, valid_args): raise NotImplementedError class SeqLabelTrainer(BaseTrainer): - """ - Trainer for Sequence Labeling + """Trainer for Sequence Labeling """ @@ -306,11 +309,11 @@ class SeqLabelTrainer(BaseTrainer): return y def get_loss(self, predict, truth): - """ - Compute loss given prediction and ground truth. + """Compute loss given prediction and ground truth. + :param predict: prediction label vector, [batch_size, max_len, tag_size] :param truth: ground truth label vector, [batch_size, max_len] - :return: a scalar + :return loss: a scalar """ batch_size, max_len = predict.size(0), predict.size(1) assert truth.shape == (batch_size, max_len) diff --git a/fastNLP/fastnlp.py b/fastNLP/fastnlp.py index 4a697e9a..c76e6681 100644 --- a/fastNLP/fastnlp.py +++ b/fastNLP/fastnlp.py @@ -1,3 +1,5 @@ +import os + from fastNLP.core.predictor import SeqLabelInfer, ClassificationInfer from fastNLP.core.preprocess import load_pickle from fastNLP.loader.config_loader import ConfigLoader, ConfigSection @@ -39,8 +41,15 @@ FastNLP_MODEL_COLLECTION = { "type": "seq_label", "config_file_name": "pos_tag.config", "config_section_name": "pos_tag_model" + }, + "text_classify_model": { + "url": "", + "class": "cnn_text_classification.CNNText", + "pickle": "text_class_model_v0.pkl", + "type": "text_class", + "config_file_name": "text_classify.cfg", + "config_section_name": "model" } - } @@ -86,7 +95,7 @@ class FastNLP(object): print("Restore model class {}".format(str(model_class))) model_args = ConfigSection() - ConfigLoader.load_config(self.model_dir + config_file, {section_name: model_args}) + ConfigLoader.load_config(os.path.join(self.model_dir, config_file), {section_name: model_args}) print("Restore model hyper-parameters {}".format(str(model_args.data))) # fetch dictionary size and number of labels from pickle files @@ -100,7 +109,7 @@ class FastNLP(object): print("Model constructed.") # To do: framework independent - ModelLoader.load_pytorch(model, self.model_dir + FastNLP_MODEL_COLLECTION[model_name]["pickle"]) + ModelLoader.load_pytorch(model, os.path.join(self.model_dir, FastNLP_MODEL_COLLECTION[model_name]["pickle"])) print("Model weights loaded.") self.model = model diff --git a/setup.py b/setup.py index 64e72c15..25a645c5 100644 --- a/setup.py +++ b/setup.py @@ -13,8 +13,8 @@ with open('requirements.txt') as f: setup( name='fastNLP', - version='1.0', - description=('fudan fastNLP '), + version='0.0.1', + description='fastNLP: Deep Learning Toolkit for NLP, developed by Fudan FastNLP Team', long_description=readme, license=license, author='fudanNLP', diff --git a/test/core/test_action.py b/test/core/test_action.py index 6ad1bd29..8d0f628b 100644 --- a/test/core/test_action.py +++ b/test/core/test_action.py @@ -1,9 +1,8 @@ -import os - import unittest from fastNLP.core.action import Action, Batchifier, SequentialSampler + class TestAction(unittest.TestCase): def test_case_1(self): x = [1, 2, 3, 4, 5, 6, 7, 8] diff --git a/test/test_fastNLP.py b/test/test_fastNLP.py index b858ba46..92bc894f 100644 --- a/test/test_fastNLP.py +++ b/test/test_fastNLP.py @@ -6,7 +6,7 @@ from fastNLP.fastnlp import interpret_word_seg_results, interpret_cws_pos_result PATH_TO_CWS_PICKLE_FILES = "/home/zyfeng/fastNLP/reproduction/chinese_word_segment/save/" PATH_TO_POS_TAG_PICKLE_FILES = "/home/zyfeng/data/crf_seg/" - +PATH_TO_TEXT_CLASSIFICATION_PICKLE_FILES = "/home/zyfeng/data/text_classify/" def word_seg(): nlp = FastNLP(model_dir=PATH_TO_CWS_PICKLE_FILES) @@ -68,7 +68,18 @@ def pos_tag(): print(interpret_cws_pos_results(words, labels)) - +def text_classify(): + nlp = FastNLP(model_dir=PATH_TO_TEXT_CLASSIFICATION_PICKLE_FILES) + nlp.load("text_classify_model", config_file="text_classify.cfg", section_name="model") + text = [ + "世界物联网大会明日在京召开龙头股启动在即", + "乌鲁木齐市新增一处城市中心旅游目的地", + "朱元璋的大明朝真的源于明教吗?——告诉你一个真实的“明教”"] + results = nlp.run(text) + print(results) + """ + ['finance', 'travel', 'history'] + """ if __name__ == "__main__": - pos_tag() + text_classify()