Text classification interface is ready.

- fix issue #58, use path.join instead of + - modify description and version in setup.py - docstring in core/ follows RestructureText format
6 years ago · 5309c98846
--- a/fastNLP/core/action.py
+++ b/fastNLP/core/action.py
@@ -1,7 +1,3 @@
 """
    This file defines Action(s) and sample methods.

 """
 from collections import Counter

 import numpy as np
@@ -9,13 +5,12 @@ import torch


 class Action(object):
    """
        Operations shared by Trainer, Tester, or Inference.
    """Operations shared by Trainer, Tester, or Inference.

        This is designed for reducing replicate codes.
            - make_batch: produce a min-batch of data. @staticmethod
            - pad: padding method used in sequence modeling. @staticmethod
            - mode: change network mode for either train or test. (for PyTorch) @staticmethod
        The base Action shall define operations shared by as much task-specific Actions as possible.
    """

    def __init__(self):
@@ -24,18 +19,20 @@ class Action(object):
    @staticmethod
    def make_batch(iterator, use_cuda, output_length=True, max_len=None):
        """Batch and Pad data.

        :param iterator: an iterator, (object that implements __next__ method) which returns the next sample.
        :param use_cuda: bool, whether to use GPU
        :param output_length: bool, whether to output the original length of the sequence before padding. (default: True)
        :param max_len: int, maximum sequence length. Longer sequences will be clipped. (default: None)
        :return
        if output_length is True:
        :return :

        if output_length is True,
            (batch_x, seq_len): tuple of two elements
                     batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len]
                     seq_len: list. The length of the pre-padded sequence, if output_length is True.
            batch_y: list. Each entry is a list of labels of a sample.  [batch_size, num_labels]

        if output_length is False:
        if output_length is False,
            batch_x: list. Each entry is a list of features of a sample. [batch_size, max_len]
            batch_y: list. Each entry is a list of labels of a sample.  [batch_size, num_labels]
        """
@@ -77,21 +74,21 @@ class Action(object):
        return batch

    @staticmethod
    def mode(model, test=False):
        """
        Train mode or Test mode. This is for PyTorch currently.
        :param model:
        :param test:
    def mode(model, is_test=False):
        """Train mode or Test mode. This is for PyTorch currently.

        :param model: a PyTorch model
        :param is_test: bool, whether in test mode or not.
        """
        if test:
        if is_test:
            model.eval()
        else:
            model.train()


 def convert_to_torch_tensor(data_list, use_cuda):
    """
    convert lists into (cuda) Tensors.
    """Convert lists into (cuda) Tensors.

    :param data_list: 2-level lists
    :param use_cuda: bool, whether to use GPU or not
    :return data_list: PyTorch Tensor of shape [batch_size, max_seq_len]
@@ -103,8 +100,8 @@ def convert_to_torch_tensor(data_list, use_cuda):


 def k_means_1d(x, k, max_iter=100):
    """
    Perform k-means on 1-D data.
    """Perform k-means on 1-D data.

    :param x: list of int, representing points in 1-D.
    :param k: the number of clusters required.
    :param max_iter: maximum iteration
@@ -132,21 +129,28 @@ def k_means_1d(x, k, max_iter=100):


 def k_means_bucketing(all_inst, buckets):
    """
    """Assign all instances into possible buckets using k-means, such that instances in the same bucket have similar lengths.

    :param all_inst: 3-level list
            E.g. ::

                [
                    [[word_11, word_12, word_13], [label_11. label_12]],  # sample 1
                    [[word_21, word_22, word_23], [label_21. label_22]],  # sample 2
                    ...
                ]

    :param buckets: list of int. The length of the list is the number of buckets. Each integer is the maximum length
        threshold for each bucket (This is usually None.).
    :return data: 2-level list
            ::

                [
                    [index_11, index_12, ...],  # bucket 1
                    [index_21, index_22, ...],  # bucket 2
                    ...
                ]

    """
    bucket_data = [[] for _ in buckets]
    num_buckets = len(buckets)
@@ -160,11 +164,16 @@ def k_means_bucketing(all_inst, buckets):


 class BaseSampler(object):
    """
        Base class for all samplers.
    """The base class of all samplers.

    """

    def __init__(self, data_set):
        """

        :param data_set: multi-level list, of shape [num_example, *]

        """
        self.data_set_length = len(data_set)
        self.data = data_set

@@ -176,11 +185,16 @@ class BaseSampler(object):


 class SequentialSampler(BaseSampler):
    """
    Sample data in the original order.
    """Sample data in the original order.

    """

    def __init__(self, data_set):
        """

        :param data_set: multi-level list

        """
        super(SequentialSampler, self).__init__(data_set)

    def __iter__(self):
@@ -188,11 +202,16 @@ class SequentialSampler(BaseSampler):


 class RandomSampler(BaseSampler):
    """
    Sample data in random permutation order.
    """Sample data in random permutation order.

    """

    def __init__(self, data_set):
        """

        :param data_set: multi-level list

        """
        super(RandomSampler, self).__init__(data_set)
        self.order = np.random.permutation(self.data_set_length)

@@ -201,11 +220,18 @@ class RandomSampler(BaseSampler):


 class Batchifier(object):
    """
    Wrap random or sequential sampler to generate a mini-batch.
    """Wrap random or sequential sampler to generate a mini-batch.

    """

    def __init__(self, sampler, batch_size, drop_last=True):
        """

        :param sampler: a Sampler object
        :param batch_size: int, the size of the mini-batch
        :param drop_last: bool, whether to drop the last examples that are not enough to make a mini-batch.

        """
        super(Batchifier, self).__init__()
        self.sampler = sampler
        self.batch_size = batch_size
@@ -223,8 +249,7 @@ class Batchifier(object):


 class BucketBatchifier(Batchifier):
    """
    Partition all samples into multiple buckets, each of which contains sentences of approximately the same length.
    """Partition all samples into multiple buckets, each of which contains sentences of approximately the same length.
    In sampling, first random choose a bucket. Then sample data from it.
    The number of buckets is decided dynamically by the variance of sentence lengths.
    """
@@ -237,6 +262,7 @@ class BucketBatchifier(Batchifier):
        :param num_buckets: int, number of buckets for grouping these sequences.
        :param drop_last: bool, useless currently.
        :param sampler: Sampler, useless currently.

        """
        super(BucketBatchifier, self).__init__(sampler, batch_size, drop_last)
        buckets = ([None] * num_buckets)
--- a/fastNLP/core/loss.py
+++ b/fastNLP/core/loss.py
@@ -8,6 +8,11 @@ class Loss(object):
    """

    def __init__(self, args):
        """

        :param args: None or str, the name of a loss function.

        """
        if args is None:
            # this is useful when Trainer.__init__ performs type check
            self._loss = None
@@ -17,10 +22,19 @@ class Loss(object):
            raise NotImplementedError

    def get(self):
        """

        :return self._loss: the loss function
        """
        return self._loss

    @staticmethod
    def _borrow_from_pytorch(loss_name):
        """Given a name of a loss function, return it from PyTorch.

        :param loss_name: str, the name of a loss function
        :return loss: a PyTorch loss
        """
        if loss_name == "cross_entropy":
            return torch.nn.CrossEntropyLoss()
        else:
--- a/fastNLP/core/metrics.py
+++ b/fastNLP/core/metrics.py
@@ -1,11 +1,12 @@
 import warnings

 import numpy as np
 import torch


 def _conver_numpy(x):
    """
    convert input data to numpy array
    """convert input data to numpy array

    """
    if isinstance(x, np.ndarray):
        return x
@@ -17,21 +18,20 @@ def _conver_numpy(x):


 def _check_same_len(*arrays, axis=0):
    """
    check if input array list has same length for one dimension
    """check if input array list has same length for one dimension

    """
    lens = set([x.shape[axis] for x in arrays if x is not None])
    return len(lens) == 1


 def _label_types(y):
    """
    determine the type
    "binary"
    "multiclass"
    "multiclass-multioutput"
    "multilabel"
    "unknown"
    """Determine the type
        - "binary"
        - "multiclass"
        - "multiclass-multioutput"
        - "multilabel"
        - "unknown"
    """
    # never squeeze the first dimension
    y = y.squeeze() if y.shape[0] > 1 else y.resize(1, -1)
@@ -46,8 +46,8 @@ def _label_types(y):


 def _check_data(y_true, y_pred):
    """
    check if y_true and y_pred is same type of data e.g both binary or multiclass
    """Check if y_true and y_pred is same type of data e.g both binary or multiclass

    """
    y_true, y_pred = _conver_numpy(y_true), _conver_numpy(y_pred)
    if not _check_same_len(y_true, y_pred):
@@ -174,16 +174,13 @@ def classification_report(y_true, y_pred, labels=None, target_names=None, digits


 def accuracy_topk(y_true, y_prob, k=1):
    """
    Compute accuracy of y_true matching top-k probable
    """Compute accuracy of y_true matching top-k probable
    labels in y_prob.

    Paras:
        y_ture - ndarray, true label, [n_samples]
        y_prob - ndarray, label probabilities, [n_samples, n_classes]
        k - int, k in top-k
    Returns:
        accuracy of top-k
        :param y_true: ndarray, true label, [n_samples]
        :param y_prob: ndarray, label probabilities, [n_samples, n_classes]
        :param k: int, k in top-k
        :return :accuracy of top-k
    """

    y_pred_topk = np.argsort(y_prob, axis=-1)[:, -1:-k - 1:-1]
@@ -195,16 +192,14 @@ def accuracy_topk(y_true, y_prob, k=1):


 def pred_topk(y_prob, k=1):
    """
    Return top-k predicted labels and corresponding probabilities.

    Args:
        y_prob - ndarray, size [n_samples, n_classes], probabilities on labels
        k - int, k of top-k
    Returns:
        y_pred_topk - ndarray, size [n_samples, k], predicted top-k labels
        y_prob_topk - ndarray, size [n_samples, k], probabilities for
            top-k labels
    """Return top-k predicted labels and corresponding probabilities.


        :param y_prob: ndarray, size [n_samples, n_classes], probabilities on labels
        :param k: int, k of top-k
    :returns
        y_pred_topk: ndarray, size [n_samples, k], predicted top-k labels
        y_prob_topk: ndarray, size [n_samples, k], probabilities for top-k labels
    """

    y_pred_topk = np.argsort(y_prob, axis=-1)[:, -1:-k - 1:-1]
--- a/fastNLP/core/optimizer.py
+++ b/fastNLP/core/optimizer.py
@@ -4,7 +4,6 @@ import torch
 class Optimizer(object):
    """Wrapper of optimizer from framework

            names: arguments (type)
            1. Adam: lr (float), weight_decay (float)
            2. AdaGrad
            3. RMSProp
@@ -16,20 +15,29 @@ class Optimizer(object):
        """
        :param optimizer_name: str, the name of the optimizer
        :param kwargs: the arguments

        """
        self.optim_name = optimizer_name
        self.kwargs = kwargs

    @property
    def name(self):
        """The name of the optimizer.

        :return: str
        """
        return self.optim_name

    @property
    def params(self):
        """The arguments used to create the optimizer.

        :return: dict of (str, *)
        """
        return self.kwargs

    def construct_from_pytorch(self, model_params):
        """construct a optimizer from framework over given model parameters"""
        """Construct a optimizer from framework over given model parameters."""

        if self.optim_name in ["SGD", "sgd"]:
            if "lr" in self.kwargs:
--- a/fastNLP/core/preprocess.py
+++ b/fastNLP/core/preprocess.py
@@ -17,12 +17,24 @@ DEFAULT_WORD_TO_INDEX = {DEFAULT_PADDING_LABEL: 0, DEFAULT_UNKNOWN_LABEL: 1,
 # the first vocab in dict with the index = 5

 def save_pickle(obj, pickle_path, file_name):
    """Save an object into a pickle file.

    :param obj: an object
    :param pickle_path: str, the directory where the pickle file is to be saved
    :param file_name: str, the name of the pickle file. In general, it should be ended by "pkl".
    """
    with open(os.path.join(pickle_path, file_name), "wb") as f:
        _pickle.dump(obj, f)
    print("{} saved in {}".format(file_name, pickle_path))


 def load_pickle(pickle_path, file_name):
    """Load an object from a given pickle file.

    :param pickle_path: str, the directory where the pickle file is.
    :param file_name: str, the name of the pickle file.
    :return obj: an object stored in the pickle
    """
    with open(os.path.join(pickle_path, file_name), "rb") as f:
        obj = _pickle.load(f)
    print("{} loaded from {}".format(file_name, pickle_path))
@@ -30,7 +42,8 @@ def load_pickle(pickle_path, file_name):


 def pickle_exist(pickle_path, pickle_name):
    """
    """Check if a given pickle file exists in the directory.

    :param pickle_path: the directory of target pickle file
    :param pickle_name: the filename of target pickle file
    :return: True if file exists else False
@@ -45,6 +58,19 @@ def pickle_exist(pickle_path, pickle_name):


 class BasePreprocess(object):
    """Base class of all preprocessors.
    Preprocessors are responsible for converting data of strings into data of indices.
    During the pre-processing, the following pickle files will be built:

        - "word2id.pkl", a mapping from words(tokens) to indices
        - "id2word.pkl", a reversed dictionary
        - "label2id.pkl", a dictionary on labels
        - "id2label.pkl", a reversed dictionary on labels

    These four pickle files are expected to be saved in the given pickle directory once they are constructed.
    Preprocessors will check if those files are already in the directory and will reuse them in future calls.
    """

    def __init__(self):
        self.word2index = None
        self.label2index = None
@@ -59,6 +85,7 @@ class BasePreprocess(object):

    def run(self, train_dev_data, test_data=None, pickle_path="./", train_dev_split=0, cross_val=False, n_fold=10):
        """Main preprocessing pipeline.

        :param train_dev_data: three-level list, with either single label or multiple labels in a sample.
        :param test_data: three-level list, with either single label or multiple labels in a sample. (optional)
        :param pickle_path: str, the path to save the pickle files.
@@ -67,6 +94,7 @@ class BasePreprocess(object):
        :param n_fold: int, the number of folds of cross validation. Only useful when cross_val is True.
        :return results: a tuple of datasets after preprocessing.
        """

        if pickle_exist(pickle_path, "word2id.pkl") and pickle_exist(pickle_path, "class2id.pkl"):
            self.word2index = load_pickle(pickle_path, "word2id.pkl")
            self.label2index = load_pickle(pickle_path, "class2id.pkl")
@@ -182,25 +210,31 @@ class SeqLabelPreprocess(BasePreprocess):
    """Preprocess pipeline, including building mapping from words to index, from index to words,
        from labels/classes to index, from index to labels/classes.
        data of three-level list which have multiple labels in each sample.
        ::

            [
                [ [word_11, word_12, ...], [label_1, label_1, ...] ],
                [ [word_21, word_22, ...], [label_2, label_1, ...] ],
                ...
            ]

    """

    def __init__(self):
        super(SeqLabelPreprocess, self).__init__()

    def build_dict(self, data):
        """
        Add new words with indices into self.word_dict, new labels with indices into self.label_dict.
        """Add new words with indices into self.word_dict, new labels with indices into self.label_dict.

        :param data: three-level list
            ::

            [
                [ [word_11, word_12, ...], [label_1, label_1, ...] ],
                [ [word_21, word_22, ...], [label_2, label_1, ...] ],
                ...
            ]

        :return word2index: dict of {str, int}
                label2index: dict of {str, int}
        """
@@ -216,14 +250,17 @@ class SeqLabelPreprocess(BasePreprocess):
        return word2index, label2index

    def to_index(self, data):
        """
        Convert word strings and label strings into indices.
        """Convert word strings and label strings into indices.

        :param data: three-level list
            ::

            [
                [ [word_11, word_12, ...], [label_1, label_1, ...] ],
                [ [word_21, word_22, ...], [label_2, label_1, ...] ],
                ...
            ]

        :return data_index: the same shape as data, but each string is replaced by its corresponding index
        """
        data_index = []
@@ -242,11 +279,14 @@ class ClassPreprocess(BasePreprocess):
        Preprocess pipeline, including building mapping from words to index, from index to words,
        from labels/classes to index, from index to labels/classes.
        design for data of three-level list which has a single label in each sample.
            ::

            [
                [ [word_11, word_12, ...], label_1 ],
                [ [word_21, word_22, ...], label_2 ],
                ...
            ]

    """

    def __init__(self):
@@ -273,14 +313,17 @@ class ClassPreprocess(BasePreprocess):
        return word2index, label2index

    def to_index(self, data):
        """
        Convert word strings and label strings into indices.
        """Convert word strings and label strings into indices.

        :param data: three-level list
        ::

            [
                [ [word_11, word_12, ...], label_1 ],
                [ [word_21, word_22, ...], label_2 ],
                ...
            ]

        :return data_index: the same shape as data, but each string is replaced by its corresponding index
        """
        data_index = []
@@ -295,14 +338,15 @@ class ClassPreprocess(BasePreprocess):


 def infer_preprocess(pickle_path, data):
    """
        Preprocess over inference data.
        Transform three-level list of strings into that of index.
    """Preprocess over inference data. Transform three-level list of strings into that of index.
        ::

        [
            [word_11, word_12, ...],
            [word_21, word_22, ...],
            ...
        ]

    """
    word2index = load_pickle(pickle_path, "word2id.pkl")
    data_index = []
--- a/fastNLP/core/tester.py
+++ b/fastNLP/core/tester.py
@@ -155,8 +155,8 @@ class BaseTester(object):
        raise NotImplementedError

 class SeqLabelTester(BaseTester):
    """
    Tester for sequence labeling.
    """Tester for sequence labeling.

    """

    def __init__(self, **test_args):
@@ -215,8 +215,8 @@ class SeqLabelTester(BaseTester):
        return batch_loss, batch_accuracy

    def show_metrics(self):
        """
        This is called by Trainer to print evaluation on dev set.
        """This is called by Trainer to print evaluation on dev set.

        :return print_str: str
        """
        loss, accuracy = self.metrics()
--- a/fastNLP/core/trainer.py
+++ b/fastNLP/core/trainer.py
@@ -1,4 +1,5 @@
 import copy
 import os
 import time
 from datetime import timedelta

@@ -26,10 +27,10 @@ class BaseTrainer(object):
        :param kwargs: dict of (key, value), or dict-like object. key is str.

        The base trainer requires the following keys:
        - epochs: int, the number of epochs in training
        - validate: bool, whether or not to validate on dev set
        - batch_size: int
        - pickle_path: str, the path to pickle files for pre-processing
            - epochs: int, the number of epochs in training
            - validate: bool, whether or not to validate on dev set
            - batch_size: int
            - pickle_path: str, the path to pickle files for pre-processing
        """
        super(BaseTrainer, self).__init__()

@@ -88,6 +89,7 @@ class BaseTrainer(object):

    def train(self, network, train_data, dev_data=None):
        """General Training Procedure

        :param network: a model
        :param train_data: three-level list, the training set.
        :param dev_data: three-level list, the validation data (optional)
@@ -144,6 +146,7 @@ class BaseTrainer(object):

    def _train_step(self, data_iterator, network, **kwargs):
        """Training process in one epoch.

            kwargs should contain:
                - n_print: int, print training information every n steps.
                - start: time.time(), the starting time of this step.
@@ -199,14 +202,13 @@ class BaseTrainer(object):
        Action.mode(network, test)

    def define_optimizer(self):
        """
        Define framework-specific optimizer specified by the models.
        """Define framework-specific optimizer specified by the models.

        """
        self._optimizer = self._optimizer_proto.construct_from_pytorch(self._model.parameters())

    def update(self):
        """
        Perform weight update on a model.
        """Perform weight update on a model.

        For PyTorch, just call optimizer to update.
        """
@@ -216,8 +218,8 @@ class BaseTrainer(object):
        raise NotImplementedError

    def grad_backward(self, loss):
        """
        Compute gradient with link rules.
        """Compute gradient with link rules.

        :param loss: a scalar where back-prop starts

        For PyTorch, just do "loss.backward()"
@@ -226,8 +228,8 @@ class BaseTrainer(object):
        loss.backward()

    def get_loss(self, predict, truth):
        """
        Compute loss given prediction and ground truth.
        """Compute loss given prediction and ground truth.

        :param predict: prediction label vector
        :param truth: ground truth label vector
        :return: a scalar
@@ -235,8 +237,9 @@ class BaseTrainer(object):
        return self._loss_func(predict, truth)

    def define_loss(self):
        """
        if the model defines a loss, use model's loss.
        """Define a loss for the trainer.

        If the model defines a loss, use model's loss.
        Otherwise, Trainer must has a loss argument, use it as loss.
        These two losses cannot be defined at the same time.
        Trainer does not handle loss definition or choose default losses.
@@ -253,7 +256,8 @@ class BaseTrainer(object):
            logger.info("The model didn't define loss, use Trainer's loss.")

    def best_eval_result(self, validator):
        """
        """Check if the current epoch yields better validation results.

        :param validator: a Tester instance
        :return: bool, True means current results on dev set is the best.
        """
@@ -268,15 +272,14 @@ class BaseTrainer(object):
        """
        if model_name[-4:] != ".pkl":
            model_name += ".pkl"
        ModelSaver(self.pickle_path + model_name).save_pytorch(network)
        ModelSaver(os.path.join(self.pickle_path, model_name)).save_pytorch(network)

    def _create_validator(self, valid_args):
        raise NotImplementedError


 class SeqLabelTrainer(BaseTrainer):
    """
    Trainer for Sequence Labeling
    """Trainer for Sequence Labeling

    """

@@ -306,11 +309,11 @@ class SeqLabelTrainer(BaseTrainer):
        return y

    def get_loss(self, predict, truth):
        """
        Compute loss given prediction and ground truth.
        """Compute loss given prediction and ground truth.

        :param predict: prediction label vector, [batch_size, max_len, tag_size]
        :param truth: ground truth label vector, [batch_size, max_len]
        :return: a scalar
        :return loss: a scalar
        """
        batch_size, max_len = predict.size(0), predict.size(1)
        assert truth.shape == (batch_size, max_len)
--- a/fastNLP/fastnlp.py
+++ b/fastNLP/fastnlp.py
@@ -1,3 +1,5 @@
 import os

 from fastNLP.core.predictor import SeqLabelInfer, ClassificationInfer
 from fastNLP.core.preprocess import load_pickle
 from fastNLP.loader.config_loader import ConfigLoader, ConfigSection
@@ -39,8 +41,15 @@ FastNLP_MODEL_COLLECTION = {
        "type": "seq_label",
        "config_file_name": "pos_tag.config",
        "config_section_name": "pos_tag_model"
    },
    "text_classify_model": {
        "url": "",
        "class": "cnn_text_classification.CNNText",
        "pickle": "text_class_model_v0.pkl",
        "type": "text_class",
        "config_file_name": "text_classify.cfg",
        "config_section_name": "model"
    }

 }


@@ -86,7 +95,7 @@ class FastNLP(object):
        print("Restore model class {}".format(str(model_class)))

        model_args = ConfigSection()
        ConfigLoader.load_config(self.model_dir + config_file, {section_name: model_args})
        ConfigLoader.load_config(os.path.join(self.model_dir, config_file), {section_name: model_args})
        print("Restore model hyper-parameters {}".format(str(model_args.data)))

        # fetch dictionary size and number of labels from pickle files
@@ -100,7 +109,7 @@ class FastNLP(object):
        print("Model constructed.")

        # To do: framework independent
        ModelLoader.load_pytorch(model, self.model_dir + FastNLP_MODEL_COLLECTION[model_name]["pickle"])
        ModelLoader.load_pytorch(model, os.path.join(self.model_dir, FastNLP_MODEL_COLLECTION[model_name]["pickle"]))
        print("Model weights loaded.")

        self.model = model
--- a/setup.py
+++ b/setup.py
@@ -13,8 +13,8 @@ with open('requirements.txt') as f:

 setup(
    name='fastNLP',
    version='1.0',
    description=('fudan fastNLP '),
    version='0.0.1',
    description='fastNLP: Deep Learning Toolkit for NLP, developed by Fudan FastNLP Team',
    long_description=readme,
    license=license,
    author='fudanNLP',
--- a/test/core/test_action.py
+++ b/test/core/test_action.py
@@ -1,9 +1,8 @@
 import os

 import unittest

 from fastNLP.core.action import Action, Batchifier, SequentialSampler


 class TestAction(unittest.TestCase):
    def test_case_1(self):
        x = [1, 2, 3, 4, 5, 6, 7, 8]
--- a/test/test_fastNLP.py
+++ b/test/test_fastNLP.py
@@ -6,7 +6,7 @@ from fastNLP.fastnlp import interpret_word_seg_results, interpret_cws_pos_result

 PATH_TO_CWS_PICKLE_FILES = "/home/zyfeng/fastNLP/reproduction/chinese_word_segment/save/"
 PATH_TO_POS_TAG_PICKLE_FILES = "/home/zyfeng/data/crf_seg/"

 PATH_TO_TEXT_CLASSIFICATION_PICKLE_FILES = "/home/zyfeng/data/text_classify/"

 def word_seg():
    nlp = FastNLP(model_dir=PATH_TO_CWS_PICKLE_FILES)
@@ -68,7 +68,18 @@ def pos_tag():
        print(interpret_cws_pos_results(words, labels))



 def text_classify():
    nlp = FastNLP(model_dir=PATH_TO_TEXT_CLASSIFICATION_PICKLE_FILES)
    nlp.load("text_classify_model", config_file="text_classify.cfg", section_name="model")
    text = [
        "世界物联网大会明日在京召开龙头股启动在即",
        "乌鲁木齐市新增一处城市中心旅游目的地",
        "朱元璋的大明朝真的源于明教吗？——告诉你一个真实的“明教”"]
    results = nlp.run(text)
    print(results)
    """
    ['finance', 'travel', 'history']
    """

 if __name__ == "__main__":
    pos_tag()
    text_classify()