diff --git a/fastNLP/core/__init__.py b/fastNLP/core/__init__.py
index 0bb6a2dd..dbe86953 100644
--- a/fastNLP/core/__init__.py
+++ b/fastNLP/core/__init__.py
@@ -1,5 +1,5 @@
 from .batch import Batch
-# from .dataset import DataSet
+from .dataset import DataSet
 from .fieldarray import FieldArray
 from .instance import Instance
 from .losses import LossFunc, CrossEntropyLoss, L1Loss, BCELoss, NLLLoss, LossInForward
@@ -9,5 +9,5 @@ from .sampler import SequentialSampler, BucketSampler, RandomSampler, BaseSample
 from .tester import Tester
 from .trainer import Trainer
 from .vocabulary import Vocabulary
-from ..io.dataset_loader import DataSet
 from .callback import Callback
+from .utils import cache_results
\ No newline at end of file
diff --git a/fastNLP/core/callback.py b/fastNLP/core/callback.py
index 01f6ce68..2ee5b3a6 100644
--- a/fastNLP/core/callback.py
+++ b/fastNLP/core/callback.py
@@ -61,6 +61,10 @@ class Callback(object):
         """If use_tqdm, return trainer's tqdm print bar, else return None."""
         return self._trainer.pbar
 
+    @property
+    def update_every(self):
+        """The model in trainer will update parameters every `update_every` batches."""
+        return self._trainer.update_every
     def on_train_begin(self):
         # before the main training loop
         pass
diff --git a/fastNLP/core/dataset.py b/fastNLP/core/dataset.py
index 068afb38..7b0e3b9a 100644
--- a/fastNLP/core/dataset.py
+++ b/fastNLP/core/dataset.py
@@ -6,7 +6,6 @@ from fastNLP.core.fieldarray import AutoPadder
 from fastNLP.core.fieldarray import FieldArray
 from fastNLP.core.instance import Instance
 from fastNLP.core.utils import get_func_signature
-from fastNLP.io.base_loader import DataLoaderRegister
 
 
 class DataSet(object):
@@ -105,11 +104,6 @@ class DataSet(object):
             raise AttributeError
         if isinstance(item, str) and item in self.field_arrays:
             return self.field_arrays[item]
-        try:
-            reader = DataLoaderRegister.get_reader(item)
-            return reader
-        except AttributeError:
-            raise
 
     def __setstate__(self, state):
         self.__dict__ = state
@@ -369,7 +363,7 @@ class DataSet(object):
         :return dataset: the read data set
 
         """
-        with open(csv_path, "r") as f:
+        with open(csv_path, "r", encoding='utf-8') as f:
             start_idx = 0
             if headers is None:
                 headers = f.readline().rstrip('\r\n')
diff --git a/fastNLP/core/utils.py b/fastNLP/core/utils.py
index 695efdfc..d9141412 100644
--- a/fastNLP/core/utils.py
+++ b/fastNLP/core/utils.py
@@ -11,6 +11,64 @@ import torch
 CheckRes = namedtuple('CheckRes', ['missing', 'unused', 'duplicated', 'required', 'all_needed',
                                    'varargs'])
 
+def _prepare_cache_filepath(filepath):
+    """
+    检查filepath是否可以作为合理的cache文件. 如果可以的话，会自动创造路径
+    :param filepath: str.
+    :return: None, if not, this function will raise error
+    """
+    _cache_filepath = os.path.abspath(filepath)
+    if os.path.isdir(_cache_filepath):
+        raise RuntimeError("The cache_file_path must be a file, not a directory.")
+    cache_dir = os.path.dirname(_cache_filepath)
+    if not os.path.exists(cache_dir):
+        os.makedirs(cache_dir)
+
+
+def cache_results(cache_filepath, refresh=False, verbose=1):
+    def wrapper_(func):
+        signature = inspect.signature(func)
+        for key, _ in signature.parameters.items():
+            if key in ('cache_filepath', 'refresh', 'verbose'):
+                raise RuntimeError("The function decorated by cache_results cannot have keyword `{}`.".format(key))
+        def wrapper(*args, **kwargs):
+            if 'cache_filepath' in kwargs:
+                _cache_filepath = kwargs.pop('cache_filepath')
+                assert isinstance(_cache_filepath, str), "cache_filepath can only be str."
+            else:
+                _cache_filepath = cache_filepath
+            if 'refresh' in kwargs:
+                _refresh  = kwargs.pop('refresh')
+                assert isinstance(_refresh, bool), "refresh can only be bool."
+            else:
+                _refresh = refresh
+            if 'verbose' in kwargs:
+                _verbose = kwargs.pop('verbose')
+                assert isinstance(_verbose, int), "verbose can only be integer."
+            refresh_flag = True
+
+            if _cache_filepath is not None and _refresh is False:
+                # load data
+                if os.path.exists(_cache_filepath):
+                    with open(_cache_filepath, 'rb') as f:
+                        results = _pickle.load(f)
+                    if verbose==1:
+                        print("Read cache from {}.".format(_cache_filepath))
+                    refresh_flag = False
+
+            if refresh_flag:
+                results = func(*args, **kwargs)
+                if _cache_filepath is not None:
+                    if results is None:
+                        raise RuntimeError("The return value is None. Delete the decorator.")
+                    _prepare_cache_filepath(_cache_filepath)
+                    with open(_cache_filepath, 'wb') as f:
+                        _pickle.dump(results, f)
+                    print("Save cache to {}.".format(_cache_filepath))
+
+            return results
+        return wrapper
+    return wrapper_
 
 def save_pickle(obj, pickle_path, file_name):
     """Save an object into a pickle file.
diff --git a/fastNLP/core/vocabulary.py b/fastNLP/core/vocabulary.py
index a1c8e678..a73ce2c7 100644
--- a/fastNLP/core/vocabulary.py
+++ b/fastNLP/core/vocabulary.py
@@ -1,5 +1,5 @@
 from collections import Counter
-
+from fastNLP.core.dataset import DataSet
 
 def check_build_vocab(func):
     """A decorator to make sure the indexing is built before used.
@@ -151,6 +151,68 @@ class Vocabulary(object):
         else:
             raise ValueError("word {} not in vocabulary".format(w))
 
+    @check_build_vocab
+    def index_dataset(self, *datasets, field_name, new_field_name=None):
+        """
+        example:
+            # remember to use `field_name`
+            vocab.index_dataset(tr_data, dev_data, te_data, field_name='words')
+
+        :param datasets: fastNLP Dataset type. you can pass multiple datasets
+        :param field_name: str, what field to index. Only support 0,1,2 dimension.
+        :param new_field_name: str. What the indexed field should be named, default is to overwrite field_name
+        :return:
+        """
+        def index_instance(ins):
+            """
+            有几种情况, str, 1d-list, 2d-list
+            :param ins:
+            :return:
+            """
+            field = ins[field_name]
+            if isinstance(field, str):
+                return self.to_index(field)
+            elif isinstance(field, list):
+                if not isinstance(field[0], list):
+                    return [self.to_index(w) for w in field]
+                else:
+                    if isinstance(field[0][0], list):
+                        raise RuntimeError("Only support field with 2 dimensions.")
+                    return[[self.to_index(c) for c in w] for w in field]
+
+        if new_field_name is None:
+            new_field_name = field_name
+        for dataset in datasets:
+            if isinstance(dataset, DataSet):
+                dataset.apply(index_instance, new_field_name=new_field_name)
+            else:
+                raise RuntimeError("Only DataSet type is allowed.")
+
+    def from_dataset(self, *datasets, field_name):
+        """
+        Construct vocab from dataset.
+
+        :param datasets: DataSet.
+        :param field_name: str, what field is used to construct dataset.
+        :return:
+        """
+        def construct_vocab(ins):
+            field = ins[field_name]
+            if isinstance(field, str):
+                self.add_word(field)
+            elif isinstance(field, list):
+                if not isinstance(field[0], list):
+                    self.add_word_lst(field)
+                else:
+                    if isinstance(field[0][0], list):
+                        raise RuntimeError("Only support field with 2 dimensions.")
+                    [self.add_word_lst(w) for w in field]
+        for dataset in datasets:
+            if isinstance(dataset, DataSet):
+                dataset.apply(construct_vocab)
+            else:
+                raise RuntimeError("Only DataSet type is allowed.")
+
     def to_index(self, w):
         """ Turn a word to an index. If w is not in Vocabulary, return the unknown label.
 
diff --git a/fastNLP/io/__init__.py b/fastNLP/io/__init__.py
index e69de29b..a3b18aa5 100644
--- a/fastNLP/io/__init__.py
+++ b/fastNLP/io/__init__.py
@@ -0,0 +1 @@
+from .embed_loader import EmbedLoader
diff --git a/fastNLP/io/embed_loader.py b/fastNLP/io/embed_loader.py
index 1615fb7f..08a55aa6 100644
--- a/fastNLP/io/embed_loader.py
+++ b/fastNLP/io/embed_loader.py
@@ -1,3 +1,5 @@
+import os
+
 import numpy as np
 import torch
 
@@ -124,3 +126,97 @@ class EmbedLoader(BaseLoader):
                                                size=(len(vocab) - np.sum(hit_flags), emb_dim))
             embedding_matrix[np.where(1 - hit_flags)] = sampled_vectors
         return embedding_matrix
+
+    @staticmethod
+    def load_with_vocab(embed_filepath, vocab, dtype=np.float32, normalize=True):
+        """
+        load pretraining embedding in {embed_file} based on words in vocab. Words in vocab but not in the pretraining
+            embedding are initialized from a normal distribution which has the mean and std of the found words vectors.
+        The embedding type is determined automatically, support glove and word2vec(the first line only has two elements).
+
+        :param embed_filepath: str, where to read pretrain embedding
+        :param vocab: Vocabulary.
+        :param dtype: the dtype of the embedding matrix
+        :param normalize: bool, whether to normalize each word vector so that every vector has norm 1.
+        :return: np.ndarray() will have the same [len(vocab), dimension], dimension is determined by the pretrain
+            embedding
+        """
+        assert isinstance(vocab, Vocabulary), "Only fastNLP.Vocabulary is supported."
+        if not os.path.exists(embed_filepath):
+            raise FileNotFoundError("`{}` does not exist.".format(embed_filepath))
+        with open(embed_filepath, 'r', encoding='utf-8') as f:
+            hit_flags = np.zeros(len(vocab), dtype=bool)
+            line = f.readline().strip()
+            parts = line.split()
+            if len(parts)==2:
+                dim = int(parts[1])
+            else:
+                dim = len(parts)-1
+                f.seek(0)
+            matrix = np.random.randn(len(vocab), dim).astype(dtype)
+            for line in f:
+                parts = line.strip().split()
+                if parts[0] in vocab:
+                    index = vocab.to_index(parts[0])
+                    matrix[index] = np.fromstring(' '.join(parts[1:]), sep=' ', dtype=dtype, count=dim)
+                    hit_flags[index] = True
+            total_hits = sum(hit_flags)
+            print("Found {} out of {} words in the pre-training embedding.".format(total_hits, len(vocab)))
+            found_vectors = matrix[hit_flags]
+            if len(found_vectors)!=0:
+                mean = np.mean(found_vectors, axis=1, keepdims=True)
+                std = np.std(found_vectors, axis=1, keepdims=True)
+                unfound_vec_num = len(vocab) - total_hits
+                r_vecs = np.random.randn(unfound_vec_num, dim).astype(dtype)*std + mean
+                matrix[hit_flags==False] = r_vecs
+
+            if normalize:
+                matrix /= np.linalg.norm(matrix, axis=1, keepdims=True)
+
+            return matrix
+
+    @staticmethod
+    def load_without_vocab(embed_filepath, dtype=np.float32, padding='<pad>', unknown='<unk>', normalize=True):
+        """
+        load pretraining embedding in {embed_file}. And construct a Vocabulary based on the pretraining embedding.
+        The embedding type is determined automatically, support glove and word2vec(the first line only has two elements).
+
+        :param embed_filepath: str, where to read pretrain embedding
+        :param dtype: the dtype of the embedding matrix
+        :param padding: the padding tag for vocabulary.
+        :param unknown: the unknown tag for vocabulary.
+        :param normalize: bool, whether to normalize each word vector so that every vector has norm 1.
+        :return: np.ndarray() is determined by the pretraining embeddings
+                 Vocabulary: contain all pretraining words and two special tag[<pad>, <unk>]
+
+        """
+        vocab = Vocabulary(padding=padding, unknown=unknown)
+        vec_dict = {}
+
+        with open(embed_filepath, 'r', encoding='utf-8') as f:
+            line = f.readline()
+            start = 1
+            dim = -1
+            if len(line.strip().split())!=2:
+                f.seek(0)
+                start = 0
+            for idx, line in enumerate(f, start=start):
+                parts = line.strip().split()
+                word = parts[0]
+                if dim==-1:
+                    dim = len(parts)-1
+                vec = np.fromstring(' '.join(parts[1:]), sep=' ', dtype=dtype, count=dim)
+                vec_dict[word] = vec
+                vocab.add_word(word)
+            if dim==-1:
+                raise RuntimeError("{} is an empty file.".format(embed_filepath))
+            matrix = np.random.randn(len(vocab), dim).astype(dtype)
+
+            for key, vec in vec_dict.items():
+                index = vocab.to_index(key)
+                matrix[index] = vec
+
+            if normalize:
+                matrix /= np.linalg.norm(matrix, axis=1, keepdims=True)
+
+            return matrix, vocab
diff --git a/fastNLP/io/logger.py b/fastNLP/io/logger.py
deleted file mode 100644
index 9e9730db..00000000
--- a/fastNLP/io/logger.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import logging
-import os
-
-
-def create_logger(logger_name, log_path, log_format=None, log_level=logging.INFO):
-    """Create a logger.
-
-    :param str logger_name:
-    :param str log_path:
-    :param log_format:
-    :param log_level:
-    :return: logger
-
-    To use a logger::
-
-        logger.debug("this is a debug message")
-        logger.info("this is a info message")
-        logger.warning("this is a warning message")
-        logger.error("this is an error message")
-    """
-    logger = logging.getLogger(logger_name)
-    logger.setLevel(log_level)
-    if log_path is None:
-        handler = logging.StreamHandler()
-    else:
-        os.stat(os.path.dirname(os.path.abspath(log_path)))
-        handler = logging.FileHandler(log_path)
-    handler.setLevel(log_level)
-    if log_format is None:
-        log_format = "[%(asctime)s %(name)-13s %(levelname)s %(process)d %(thread)d " \
-                     "%(filename)s:%(lineno)-5d] %(message)s"
-    formatter = logging.Formatter(log_format)
-    handler.setFormatter(formatter)
-    logger.addHandler(handler)
-    return logger
diff --git a/test/core/test_utils.py b/test/core/test_utils.py
new file mode 100644
index 00000000..5c325127
--- /dev/null
+++ b/test/core/test_utils.py
@@ -0,0 +1,115 @@
+
+import unittest
+import _pickle
+from fastNLP import cache_results
+from fastNLP.io.embed_loader import EmbedLoader
+from fastNLP import DataSet
+from fastNLP import Instance
+import time
+import os
+
+@cache_results('test/demo1.pkl')
+def process_data_1(embed_file, cws_train):
+    embed, vocab = EmbedLoader.load_without_vocab(embed_file)
+    time.sleep(1)  # 测试是否通过读取cache获得结果
+    with open(cws_train, 'r', encoding='utf-8') as f:
+        d = DataSet()
+        for line in f:
+            line = line.strip()
+            if len(line)>0:
+                d.append(Instance(raw=line))
+    return embed, vocab, d
+
+
+class TestCache(unittest.TestCase):
+    def test_cache_save(self):
+        try:
+            start_time = time.time()
+            embed, vocab, d = process_data_1('test/data_for_tests/word2vec_test.txt', 'test/data_for_tests/cws_train')
+            end_time = time.time()
+            pre_time = end_time - start_time
+            with open('test/demo1.pkl', 'rb') as f:
+                _embed, _vocab, _d = _pickle.load(f)
+            self.assertEqual(embed.shape, _embed.shape)
+            for i in range(embed.shape[0]):
+                self.assertListEqual(embed[i].tolist(), _embed[i].tolist())
+            start_time = time.time()
+            embed, vocab, d = process_data_1('test/data_for_tests/word2vec_test.txt', 'test/data_for_tests/cws_train')
+            end_time = time.time()
+            read_time = end_time - start_time
+            print("Read using {:.3f}, while prepare using:{:.3f}".format(read_time, pre_time))
+            self.assertGreater(pre_time-0.5, read_time)
+        finally:
+            os.remove('test/demo1.pkl')
+
+    def test_cache_save_overwrite_path(self):
+        try:
+            start_time = time.time()
+            embed, vocab, d = process_data_1('test/data_for_tests/word2vec_test.txt', 'test/data_for_tests/cws_train',
+                                             cache_filepath='test/demo_overwrite.pkl')
+            end_time = time.time()
+            pre_time = end_time - start_time
+            with open('test/demo_overwrite.pkl', 'rb') as f:
+                _embed, _vocab, _d = _pickle.load(f)
+            self.assertEqual(embed.shape, _embed.shape)
+            for i in range(embed.shape[0]):
+                self.assertListEqual(embed[i].tolist(), _embed[i].tolist())
+            start_time = time.time()
+            embed, vocab, d = process_data_1('test/data_for_tests/word2vec_test.txt', 'test/data_for_tests/cws_train',
+                                             cache_filepath='test/demo_overwrite.pkl')
+            end_time = time.time()
+            read_time = end_time - start_time
+            print("Read using {:.3f}, while prepare using:{:.3f}".format(read_time, pre_time))
+            self.assertGreater(pre_time-0.5, read_time)
+        finally:
+            os.remove('test/demo_overwrite.pkl')
+
+    def test_cache_refresh(self):
+        try:
+            start_time = time.time()
+            embed, vocab, d = process_data_1('test/data_for_tests/word2vec_test.txt', 'test/data_for_tests/cws_train',
+                                             refresh=True)
+            end_time = time.time()
+            pre_time = end_time - start_time
+            with open('test/demo1.pkl', 'rb') as f:
+                _embed, _vocab, _d = _pickle.load(f)
+            self.assertEqual(embed.shape, _embed.shape)
+            for i in range(embed.shape[0]):
+                self.assertListEqual(embed[i].tolist(), _embed[i].tolist())
+            start_time = time.time()
+            embed, vocab, d = process_data_1('test/data_for_tests/word2vec_test.txt', 'test/data_for_tests/cws_train',
+                                             refresh=True)
+            end_time = time.time()
+            read_time = end_time - start_time
+            print("Read using {:.3f}, while prepare using:{:.3f}".format(read_time, pre_time))
+            self.assertGreater(0.1, pre_time-read_time)
+        finally:
+            os.remove('test/demo1.pkl')
+
+    def test_duplicate_keyword(self):
+        with self.assertRaises(RuntimeError):
+            @cache_results(None)
+            def func_verbose(a, verbose):
+                pass
+            func_verbose(0, 1)
+        with self.assertRaises(RuntimeError):
+            @cache_results(None)
+            def func_cache(a, cache_filepath):
+                pass
+            func_cache(1, 2)
+        with self.assertRaises(RuntimeError):
+            @cache_results(None)
+            def func_refresh(a, refresh):
+                pass
+            func_refresh(1, 2)
+
+    def test_create_cache_dir(self):
+        @cache_results('test/demo1/demo.pkl')
+        def cache():
+            return 1, 2
+        try:
+            results = cache()
+            print(results)
+        finally:
+            os.remove('test/demo1/demo.pkl')
+            os.rmdir('test/demo1')
\ No newline at end of file
diff --git a/test/core/test_vocabulary.py b/test/core/test_vocabulary.py
index 2f9cd3b1..0f13b935 100644
--- a/test/core/test_vocabulary.py
+++ b/test/core/test_vocabulary.py
@@ -2,6 +2,8 @@ import unittest
 from collections import Counter
 
 from fastNLP.core.vocabulary import Vocabulary
+from fastNLP.core.dataset import DataSet
+from fastNLP.core.instance import Instance
 
 text = ["FastNLP", "works", "well", "in", "most", "cases", "and", "scales", "well", "in",
         "works", "well", "in", "most", "cases", "scales", "well"]
@@ -31,6 +33,42 @@ class TestAdd(unittest.TestCase):
         vocab.update(text)
         self.assertEqual(vocab.word_count, counter)
 
+    def test_from_dataset(self):
+        start_char = 65
+        num_samples = 10
+
+        # 0 dim
+        dataset = DataSet()
+        for i in range(num_samples):
+            ins = Instance(char=chr(start_char+i))
+            dataset.append(ins)
+        vocab = Vocabulary()
+        vocab.from_dataset(dataset, field_name='char')
+        for i in range(num_samples):
+            self.assertEqual(vocab.to_index(chr(start_char+i)), i+2)
+        vocab.index_dataset(dataset, field_name='char')
+
+        # 1 dim
+        dataset = DataSet()
+        for i in range(num_samples):
+            ins = Instance(char=[chr(start_char+i)]*6)
+            dataset.append(ins)
+        vocab = Vocabulary()
+        vocab.from_dataset(dataset, field_name='char')
+        for i in range(num_samples):
+            self.assertEqual(vocab.to_index(chr(start_char+i)), i+2)
+        vocab.index_dataset(dataset, field_name='char')
+
+        # 2 dim
+        dataset = DataSet()
+        for i in range(num_samples):
+            ins = Instance(char=[[chr(start_char+i) for _ in range(6)] for _ in range(6)])
+            dataset.append(ins)
+        vocab = Vocabulary()
+        vocab.from_dataset(dataset, field_name='char')
+        for i in range(num_samples):
+            self.assertEqual(vocab.to_index(chr(start_char+i)), i+2)
+        vocab.index_dataset(dataset, field_name='char')
 
 class TestIndexing(unittest.TestCase):
     def test_len(self):
diff --git a/test/io/test_embed_loader.py b/test/io/test_embed_loader.py
index 60e3710e..3f1fb5e7 100644
--- a/test/io/test_embed_loader.py
+++ b/test/io/test_embed_loader.py
@@ -1,4 +1,5 @@
 import unittest
+import numpy as np
 
 from fastNLP.core.vocabulary import Vocabulary
 from fastNLP.io.embed_loader import EmbedLoader
@@ -10,3 +11,34 @@ class TestEmbedLoader(unittest.TestCase):
         vocab.update(["the", "in", "I", "to", "of", "hahaha"])
         embedding = EmbedLoader().fast_load_embedding(50, "test/data_for_tests/glove.6B.50d_test.txt", vocab)
         self.assertEqual(tuple(embedding.shape), (len(vocab), 50))
+
+    def test_load_with_vocab(self):
+        vocab = Vocabulary()
+        glove = "test/data_for_tests/glove.6B.50d_test.txt"
+        word2vec = "test/data_for_tests/word2vec_test.txt"
+        vocab.add_word('the')
+        g_m = EmbedLoader.load_with_vocab(glove, vocab)
+        self.assertEqual(g_m.shape, (3, 50))
+        w_m = EmbedLoader.load_with_vocab(word2vec, vocab, normalize=True)
+        self.assertEqual(w_m.shape, (3, 50))
+        self.assertAlmostEqual(np.linalg.norm(w_m, axis=1).sum(), 3)
+
+    def test_load_without_vocab(self):
+        words = ['the', 'of', 'in', 'a', 'to', 'and']
+        glove = "test/data_for_tests/glove.6B.50d_test.txt"
+        word2vec = "test/data_for_tests/word2vec_test.txt"
+        g_m, vocab = EmbedLoader.load_without_vocab(glove)
+        self.assertEqual(g_m.shape, (8, 50))
+        for word in words:
+            self.assertIn(word, vocab)
+        w_m, vocab = EmbedLoader.load_without_vocab(word2vec, normalize=True)
+        self.assertEqual(w_m.shape, (8, 50))
+        self.assertAlmostEqual(np.linalg.norm(w_m, axis=1).sum(), 8)
+        for word in words:
+            self.assertIn(word, vocab)
+        # no unk
+        w_m, vocab = EmbedLoader.load_without_vocab(word2vec, normalize=True, unknown=None)
+        self.assertEqual(w_m.shape, (7, 50))
+        self.assertAlmostEqual(np.linalg.norm(w_m, axis=1).sum(), 7)
+        for word in words:
+            self.assertIn(word, vocab)
\ No newline at end of file