Merge pull request #5 from lyhuang18/lyhuang-reproduction

Yelp_f的结果
5 years ago · e38777b014
--- a/fastNLP/core/vocabulary.py
+++ b/fastNLP/core/vocabulary.py
@@ -117,6 +117,8 @@ class Vocabulary(object):
        :param str word: 新词
        """
        if word in self._no_create_word:
            self._no_create_word.pop(word)
        self.add(word)
    @_check_build_status
@@ -126,6 +128,9 @@ class Vocabulary(object):
        :param list[str] word_lst: 词的序列
        """
        for word in word_lst:
            if word in self._no_create_word:
                self._no_create_word.pop(word)
        self.update(word_lst)
    def build_vocab(self):
--- a/fastNLP/modules/encoder/_elmo.py
+++ b/fastNLP/modules/encoder/_elmo.py
@@ -1,12 +1,13 @@
 """
 这个页面的代码大量参考了https://github.com/HIT-SCIR/ELMoForManyLangs/tree/master/elmoformanylangs
 这个页面的代码大量参考了 allenNLP
 """
 from typing import Optional, Tuple, List, Callable
 import os
 import h5py
 import numpy
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
@@ -16,7 +17,6 @@ import json
 from ..utils import get_dropout_mask
 import codecs
 from torch import autograd
 class LstmCellWithProjection(torch.nn.Module):
    """
@@ -58,6 +58,7 @@ class LstmCellWithProjection(torch.nn.Module):
        respectively. The first dimension is 1 in order to match the Pytorch
        API for returning stacked LSTM states.
    """
    def __init__(self,
                 input_size: int,
                 hidden_size: int,
@@ -129,13 +130,13 @@ class LstmCellWithProjection(torch.nn.Module):
        # We have to use this '.data.new().fill_' pattern to create tensors with the correct
        # type - forward has no knowledge of whether these are torch.Tensors or torch.cuda.Tensors.
        output_accumulator = inputs.data.new(batch_size,
                                                      total_timesteps,
                                                      self.hidden_size).fill_(0)
                                             total_timesteps,
                                             self.hidden_size).fill_(0)
        if initial_state is None:
            full_batch_previous_memory = inputs.data.new(batch_size,
                                                                  self.cell_size).fill_(0)
                                                         self.cell_size).fill_(0)
            full_batch_previous_state = inputs.data.new(batch_size,
                                                                 self.hidden_size).fill_(0)
                                                        self.hidden_size).fill_(0)
        else:
            full_batch_previous_state = initial_state[0].squeeze(0)
            full_batch_previous_memory = initial_state[1].squeeze(0)
@@ -169,7 +170,7 @@ class LstmCellWithProjection(torch.nn.Module):
                # Second conditional: Does the next shortest sequence beyond the current batch
                # index require computation use this timestep?
                while current_length_index < (len(batch_lengths) - 1) and \
                                batch_lengths[current_length_index + 1] > index:
                        batch_lengths[current_length_index + 1] > index:
                    current_length_index += 1
            # Actually get the slices of the batch which we
@@ -256,7 +257,7 @@ class LstmbiLm(nn.Module):
        inputs = inputs[sort_idx]
        inputs = nn.utils.rnn.pack_padded_sequence(inputs, sort_lens, batch_first=self.batch_first)
        output, hx = self.encoder(inputs, None)  # -> [N,L,C]
        output, _ = nn.util.rnn.pad_packed_sequence(output, batch_first=self.batch_first)
        output, _ = nn.utils.rnn.pad_packed_sequence(output, batch_first=self.batch_first)
        _, unsort_idx = torch.sort(sort_idx, dim=0, descending=False)
        output = output[unsort_idx]
        forward, backward = output.split(self.config['encoder']['dim'], 2)
@@ -316,13 +317,13 @@ class ElmobiLm(torch.nn.Module):
        :param seq_len: batch_size
        :return: torch.FloatTensor. num_layers x batch_size x max_len x hidden_size
        """
        max_len = inputs.size(1)
        sort_lens, sort_idx = torch.sort(seq_len, dim=0, descending=True)
        inputs = inputs[sort_idx]
        inputs = nn.utils.rnn.pack_padded_sequence(inputs, sort_lens, batch_first=True)
        output, _ = self._lstm_forward(inputs, None)
        _, unsort_idx = torch.sort(sort_idx, dim=0, descending=False)
        output = output[:, unsort_idx]
        return output
    def _lstm_forward(self,
@@ -399,7 +400,7 @@ class ElmobiLm(torch.nn.Module):
                                 torch.cat([forward_state[1], backward_state[1]], -1)))
        stacked_sequence_outputs: torch.FloatTensor = torch.stack(sequence_outputs)
        # Stack the hidden state and memory for each layer into 2 tensors of shape
        # Stack the hidden state and memory for each layer in。to 2 tensors of shape
        # (num_layers, batch_size, hidden_size) and (num_layers, batch_size, cell_size)
        # respectively.
        final_hidden_states, final_memory_states = zip(*final_states)
@@ -408,6 +409,66 @@ class ElmobiLm(torch.nn.Module):
                                                       torch.cat(final_memory_states, 0))
        return stacked_sequence_outputs, final_state_tuple
    def load_weights(self, weight_file: str) -> None:
        """
        Load the pre-trained weights from the file.
        """
        requires_grad = False
        with h5py.File(weight_file, 'r') as fin:
            for i_layer, lstms in enumerate(
                    zip(self.forward_layers, self.backward_layers)
            ):
                for j_direction, lstm in enumerate(lstms):
                    # lstm is an instance of LSTMCellWithProjection
                    cell_size = lstm.cell_size
                    dataset = fin['RNN_%s' % j_direction]['RNN']['MultiRNNCell']['Cell%s' % i_layer
                                                                                 ]['LSTMCell']
                    # tensorflow packs together both W and U matrices into one matrix,
                    # but pytorch maintains individual matrices.  In addition, tensorflow
                    # packs the gates as input, memory, forget, output but pytorch
                    # uses input, forget, memory, output.  So we need to modify the weights.
                    tf_weights = numpy.transpose(dataset['W_0'][...])
                    torch_weights = tf_weights.copy()
                    # split the W from U matrices
                    input_size = lstm.input_size
                    input_weights = torch_weights[:, :input_size]
                    recurrent_weights = torch_weights[:, input_size:]
                    tf_input_weights = tf_weights[:, :input_size]
                    tf_recurrent_weights = tf_weights[:, input_size:]
                    # handle the different gate order convention
                    for torch_w, tf_w in [[input_weights, tf_input_weights],
                                          [recurrent_weights, tf_recurrent_weights]]:
                        torch_w[(1 * cell_size):(2 * cell_size), :] = tf_w[(2 * cell_size):(3 * cell_size), :]
                        torch_w[(2 * cell_size):(3 * cell_size), :] = tf_w[(1 * cell_size):(2 * cell_size), :]
                    lstm.input_linearity.weight.data.copy_(torch.FloatTensor(input_weights))
                    lstm.state_linearity.weight.data.copy_(torch.FloatTensor(recurrent_weights))
                    lstm.input_linearity.weight.requires_grad = requires_grad
                    lstm.state_linearity.weight.requires_grad = requires_grad
                    # the bias weights
                    tf_bias = dataset['B'][...]
                    # tensorflow adds 1.0 to forget gate bias instead of modifying the
                    # parameters...
                    tf_bias[(2 * cell_size):(3 * cell_size)] += 1
                    torch_bias = tf_bias.copy()
                    torch_bias[(1 * cell_size):(2 * cell_size)
                    ] = tf_bias[(2 * cell_size):(3 * cell_size)]
                    torch_bias[(2 * cell_size):(3 * cell_size)
                    ] = tf_bias[(1 * cell_size):(2 * cell_size)]
                    lstm.state_linearity.bias.data.copy_(torch.FloatTensor(torch_bias))
                    lstm.state_linearity.bias.requires_grad = requires_grad
                    # the projection weights
                    proj_weights = numpy.transpose(dataset['W_P_0'][...])
                    lstm.state_projection.weight.data.copy_(torch.FloatTensor(proj_weights))
                    lstm.state_projection.weight.requires_grad = requires_grad
 class LstmTokenEmbedder(nn.Module):
    def __init__(self, config, word_emb_layer, char_emb_layer):
@@ -441,7 +502,7 @@ class LstmTokenEmbedder(nn.Module):
            chars_emb = self.char_emb_layer(chars)
            # TODO 这里应该要考虑seq_len的问题
            _, (chars_outputs, __) = self.char_lstm(chars_emb)
            chars_outputs = chars_outputs.contiguous().view(-1, self.config['token_embedder']['char_dim'] * 2)
            chars_outputs = chars_outputs.contiguous().view(-1, self.config['token_embedder']['embedding']['dim'] * 2)
            embs.append(chars_outputs)
        token_embedding = torch.cat(embs, dim=2)
@@ -450,79 +511,143 @@ class LstmTokenEmbedder(nn.Module):
 class ConvTokenEmbedder(nn.Module):
    def __init__(self, config, word_emb_layer, char_emb_layer):
    def __init__(self, config, weight_file, word_emb_layer, char_emb_layer, char_vocab):
        super(ConvTokenEmbedder, self).__init__()
        self.config = config
        self.weight_file = weight_file
        self.word_emb_layer = word_emb_layer
        self.char_emb_layer = char_emb_layer
        self.output_dim = config['encoder']['projection_dim']
        self.emb_dim = 0
        if word_emb_layer is not None:
            self.emb_dim += word_emb_layer.weight.size(1)
        if char_emb_layer is not None:
            self.convolutions = []
            cnn_config = config['token_embedder']
            filters = cnn_config['filters']
            char_embed_dim = cnn_config['char_dim']
            for i, (width, num) in enumerate(filters):
                conv = torch.nn.Conv1d(
                    in_channels=char_embed_dim,
                    out_channels=num,
                    kernel_size=width,
                    bias=True
                )
                self.convolutions.append(conv)
            self.convolutions = nn.ModuleList(self.convolutions)
            self.n_filters = sum(f[1] for f in filters)
            self.n_highway = cnn_config['n_highway']
            self.highways = Highway(self.n_filters, self.n_highway, activation=torch.nn.functional.relu)
            self.emb_dim += self.n_filters
        self.projection = nn.Linear(self.emb_dim, self.output_dim, bias=True)
        self._options = config
        self.requires_grad = False
        self._load_weights()
        self._char_embedding_weights = char_emb_layer.weight.data
    def _load_weights(self):
        self._load_cnn_weights()
        self._load_highway()
        self._load_projection()
    def _load_cnn_weights(self):
        cnn_options = self._options['token_embedder']
        filters = cnn_options['filters']
        char_embed_dim = cnn_options['embedding']['dim']
        convolutions = []
        for i, (width, num) in enumerate(filters):
            conv = torch.nn.Conv1d(
                in_channels=char_embed_dim,
                out_channels=num,
                kernel_size=width,
                bias=True
            )
            # load the weights
            with h5py.File(self.weight_file, 'r') as fin:
                weight = fin['CNN']['W_cnn_{}'.format(i)][...]
                bias = fin['CNN']['b_cnn_{}'.format(i)][...]
            w_reshaped = numpy.transpose(weight.squeeze(axis=0), axes=(2, 1, 0))
            if w_reshaped.shape != tuple(conv.weight.data.shape):
                raise ValueError("Invalid weight file")
            conv.weight.data.copy_(torch.FloatTensor(w_reshaped))
            conv.bias.data.copy_(torch.FloatTensor(bias))
            conv.weight.requires_grad = self.requires_grad
            conv.bias.requires_grad = self.requires_grad
            convolutions.append(conv)
            self.add_module('char_conv_{}'.format(i), conv)
        self._convolutions = convolutions
    def _load_highway(self):
        # the highway layers have same dimensionality as the number of cnn filters
        cnn_options = self._options['token_embedder']
        filters = cnn_options['filters']
        n_filters = sum(f[1] for f in filters)
        n_highway = cnn_options['n_highway']
        # create the layers, and load the weights
        self._highways = Highway(n_filters, n_highway, activation=torch.nn.functional.relu)
        for k in range(n_highway):
            # The AllenNLP highway is one matrix multplication with concatenation of
            # transform and carry weights.
            with h5py.File(self.weight_file, 'r') as fin:
                # The weights are transposed due to multiplication order assumptions in tf
                # vs pytorch (tf.matmul(X, W) vs pytorch.matmul(W, X))
                w_transform = numpy.transpose(fin['CNN_high_{}'.format(k)]['W_transform'][...])
                # -1.0 since AllenNLP is g * x + (1 - g) * f(x) but tf is (1 - g) * x + g * f(x)
                w_carry = -1.0 * numpy.transpose(fin['CNN_high_{}'.format(k)]['W_carry'][...])
                weight = numpy.concatenate([w_transform, w_carry], axis=0)
                self._highways._layers[k].weight.data.copy_(torch.FloatTensor(weight))
                self._highways._layers[k].weight.requires_grad = self.requires_grad
                b_transform = fin['CNN_high_{}'.format(k)]['b_transform'][...]
                b_carry = -1.0 * fin['CNN_high_{}'.format(k)]['b_carry'][...]
                bias = numpy.concatenate([b_transform, b_carry], axis=0)
                self._highways._layers[k].bias.data.copy_(torch.FloatTensor(bias))
                self._highways._layers[k].bias.requires_grad = self.requires_grad
    def _load_projection(self):
        cnn_options = self._options['token_embedder']
        filters = cnn_options['filters']
        n_filters = sum(f[1] for f in filters)
        self._projection = torch.nn.Linear(n_filters, self.output_dim, bias=True)
        with h5py.File(self.weight_file, 'r') as fin:
            weight = fin['CNN_proj']['W_proj'][...]
            bias = fin['CNN_proj']['b_proj'][...]
            self._projection.weight.data.copy_(torch.FloatTensor(numpy.transpose(weight)))
            self._projection.bias.data.copy_(torch.FloatTensor(bias))
            self._projection.weight.requires_grad = self.requires_grad
            self._projection.bias.requires_grad = self.requires_grad
    def forward(self, words, chars):
        embs = []
        if self.word_emb_layer is not None:
            if hasattr(self, 'words_to_words'):
                words = self.words_to_words[words]
            word_emb = self.word_emb_layer(words)
            embs.append(word_emb)
        """
        :param words:
        :param chars: Tensor  Shape ``(batch_size, sequence_length, 50)``:
        :return Tensor Shape ``(batch_size, sequence_length + 2, embedding_dim)`` :
        """
        # the character id embedding
        # (batch_size * sequence_length, max_chars_per_token, embed_dim)
        # character_embedding = torch.nn.functional.embedding(
        #     chars.view(-1, max_chars_per_token),
        #     self._char_embedding_weights
        # )
        batch_size, sequence_length, max_char_len = chars.size()
        character_embedding = self.char_emb_layer(chars).reshape(batch_size*sequence_length, max_char_len, -1)
        # run convolutions
        cnn_options = self._options['token_embedder']
        if cnn_options['activation'] == 'tanh':
            activation = torch.tanh
        elif cnn_options['activation'] == 'relu':
            activation = torch.nn.functional.relu
        else:
            raise Exception("Unknown activation")
        if self.char_emb_layer is not None:
            batch_size, seq_len, _ = chars.size()
            chars = chars.view(batch_size * seq_len, -1)
            character_embedding = self.char_emb_layer(chars)
            character_embedding = torch.transpose(character_embedding, 1, 2)
            cnn_config = self.config['token_embedder']
            if cnn_config['activation'] == 'tanh':
                activation = torch.nn.functional.tanh
            elif cnn_config['activation'] == 'relu':
                activation = torch.nn.functional.relu
            else:
                raise Exception("Unknown activation")
        # (batch_size * sequence_length, embed_dim, max_chars_per_token)
        character_embedding = torch.transpose(character_embedding, 1, 2)
        convs = []
        for i in range(len(self._convolutions)):
            conv = getattr(self, 'char_conv_{}'.format(i))
            convolved = conv(character_embedding)
            # (batch_size * sequence_length, n_filters for this width)
            convolved, _ = torch.max(convolved, dim=-1)
            convolved = activation(convolved)
            convs.append(convolved)
            convs = []
            for i in range(len(self.convolutions)):
                convolved = self.convolutions[i](character_embedding)
                # (batch_size * sequence_length, n_filters for this width)
                convolved, _ = torch.max(convolved, dim=-1)
                convolved = activation(convolved)
                convs.append(convolved)
            char_emb = torch.cat(convs, dim=-1)
            char_emb = self.highways(char_emb)
        # (batch_size * sequence_length, n_filters)
        token_embedding = torch.cat(convs, dim=-1)
            embs.append(char_emb.view(batch_size, -1, self.n_filters))
        # apply the highway layers (batch_size * sequence_length, n_filters)
        token_embedding = self._highways(token_embedding)
        token_embedding = torch.cat(embs, dim=2)
        # final projection  (batch_size * sequence_length, embedding_dim)
        token_embedding = self._projection(token_embedding)
        return self.projection(token_embedding)
        # reshape to (batch_size, sequence_length+2, embedding_dim)
        return token_embedding.view(batch_size, sequence_length, -1)
 class Highway(torch.nn.Module):
@@ -543,6 +668,7 @@ class Highway(torch.nn.Module):
    activation : ``Callable[[torch.Tensor], torch.Tensor]``, optional (default=``torch.nn.functional.relu``)
        The non-linearity to use in the highway layers.
    """
    def __init__(self,
                 input_dim: int,
                 num_layers: int = 1,
@@ -573,6 +699,7 @@ class Highway(torch.nn.Module):
            current_input = gate * linear_part + (1 - gate) * nonlinear_part
        return current_input
 class _ElmoModel(nn.Module):
    """
    该Module是ElmoEmbedding中进行所有的heavy lifting的地方。做的工作，包括
@@ -582,11 +709,32 @@ class _ElmoModel(nn.Module):
        (4) 设计一个保存token的embedding，允许缓存word的表示。
    """
    def __init__(self, model_dir:str, vocab:Vocabulary=None, cache_word_reprs:bool=False):
    def __init__(self, model_dir: str, vocab: Vocabulary = None, cache_word_reprs: bool = False):
        super(_ElmoModel, self).__init__()
        config = json.load(open(os.path.join(model_dir, 'structure_config.json'), 'r'))
        dir = os.walk(model_dir)
        config_file = None
        weight_file = None
        config_count = 0
        weight_count = 0
        for path, dir_list, file_list in dir:
            for file_name in file_list:
                if file_name.__contains__(".json"):
                    config_file = file_name
                    config_count += 1
                elif file_name.__contains__(".hdf5"):
                    weight_file = file_name
                    weight_count += 1
        if config_count > 1 or weight_count > 1:
            raise Exception(f"Multiple config files(*.json) or weight files(*.hdf5) detected in {model_dir}.")
        elif config_count == 0 or weight_count == 0:
            raise Exception(f"No config file or weight file found in {model_dir}")
        config = json.load(open(os.path.join(model_dir, config_file), 'r'))
        self.weight_file = os.path.join(model_dir, weight_file)
        self.config = config
        self.requires_grad = False
        OOV_TAG = '<oov>'
        PAD_TAG = '<pad>'
@@ -595,48 +743,8 @@ class _ElmoModel(nn.Module):
        BOW_TAG = '<bow>'
        EOW_TAG = '<eow>'
        # 将加载embedding放到这里
        token_embedder_states = torch.load(os.path.join(model_dir, 'token_embedder.pkl'), map_location='cpu')
        # For the model trained with word form word encoder.
        if config['token_embedder']['word_dim'] > 0:
            word_lexicon = {}
            with codecs.open(os.path.join(model_dir, 'word.dic'), 'r', encoding='utf-8') as fpi:
                for line in fpi:
                    tokens = line.strip().split('\t')
                    if len(tokens) == 1:
                        tokens.insert(0, '\u3000')
                    token, i = tokens
                    word_lexicon[token] = int(i)
            # 做一些sanity check
            for special_word in [PAD_TAG, OOV_TAG, BOS_TAG, EOS_TAG]:
                assert special_word in word_lexicon, f"{special_word} not found in word.dic."
            # 根据vocab调整word_embedding
            pre_word_embedding = token_embedder_states.pop('word_emb_layer.embedding.weight')
            word_emb_layer = nn.Embedding(len(vocab)+2, config['token_embedder']['word_dim'])  #多增加两个是为了<bos>与<eos>
            found_word_count = 0
            for word, index in vocab:
                if index == vocab.unknown_idx:  # 因为fastNLP的unknow是<unk> 而在这里是<oov>所以ugly强制适配一下
                    index_in_pre = word_lexicon[OOV_TAG]
                    found_word_count += 1
                elif index == vocab.padding_idx:  # 需要pad对齐
                    index_in_pre = word_lexicon[PAD_TAG]
                    found_word_count += 1
                elif word in word_lexicon:
                    index_in_pre = word_lexicon[word]
                    found_word_count += 1
                else:
                    index_in_pre = word_lexicon[OOV_TAG]
                word_emb_layer.weight.data[index] = pre_word_embedding[index_in_pre]
            print(f"{found_word_count} out of {len(vocab)} words were found in pretrained elmo embedding.")
            word_emb_layer.weight.data[-1] = pre_word_embedding[word_lexicon[EOS_TAG]]
            word_emb_layer.weight.data[-2] = pre_word_embedding[word_lexicon[BOS_TAG]]
            self.word_vocab = vocab
        else:
            word_emb_layer = None
        # For the model trained with character-based word encoder.
        if config['token_embedder']['char_dim'] > 0:
        if config['token_embedder']['embedding']['dim'] > 0:
            char_lexicon = {}
            with codecs.open(os.path.join(model_dir, 'char.dic'), 'r', encoding='utf-8') as fpi:
                for line in fpi:
@@ -645,22 +753,26 @@ class _ElmoModel(nn.Module):
                        tokens.insert(0, '\u3000')
                    token, i = tokens
                    char_lexicon[token] = int(i)
            # 做一些sanity check
            for special_word in [PAD_TAG, OOV_TAG, BOW_TAG, EOW_TAG]:
                assert special_word in char_lexicon, f"{special_word} not found in char.dic."
            # 从vocab中构建char_vocab
            char_vocab = Vocabulary(unknown=OOV_TAG, padding=PAD_TAG)
            # 需要保证<bow>与<eow>在里面
            char_vocab.add_word(BOW_TAG)
            char_vocab.add_word(EOW_TAG)
            char_vocab.add_word_lst([BOW_TAG, EOW_TAG, BOS_TAG, EOS_TAG])
            for word, index in vocab:
                char_vocab.add_word_lst(list(word))
            # 保证<eos>, <bos>也在
            char_vocab.add_word_lst(list(BOS_TAG))
            char_vocab.add_word_lst(list(EOS_TAG))
            # 根据char_lexicon调整
            char_emb_layer = nn.Embedding(len(char_vocab), int(config['token_embedder']['char_dim']))
            pre_char_embedding = token_embedder_states.pop('char_emb_layer.embedding.weight')
            self.bos_index, self.eos_index, self._pad_index = len(vocab), len(vocab)+1, vocab.padding_idx
            # 根据char_lexicon调整, 多设置一位，是预留给word padding的(该位置的char表示为全0表示)
            char_emb_layer = nn.Embedding(len(char_vocab)+1, int(config['token_embedder']['embedding']['dim']),
                                          padding_idx=len(char_vocab))
            with h5py.File(self.weight_file, 'r') as fin:
                char_embed_weights = fin['char_embed'][...]
            char_embed_weights = torch.from_numpy(char_embed_weights)
            found_char_count = 0
            for char, index in char_vocab:  # 调整character embedding
                if char in char_lexicon:
@@ -668,79 +780,84 @@ class _ElmoModel(nn.Module):
                    found_char_count += 1
                else:
                    index_in_pre = char_lexicon[OOV_TAG]
                char_emb_layer.weight.data[index] = pre_char_embedding[index_in_pre]
                char_emb_layer.weight.data[index] = char_embed_weights[index_in_pre]
            print(f"{found_char_count} out of {len(char_vocab)} characters were found in pretrained elmo embedding.")
            # 生成words到chars的映射
            if config['token_embedder']['name'].lower() == 'cnn':
                max_chars = config['token_embedder']['max_characters_per_token']
            elif config['token_embedder']['name'].lower() == 'lstm':
                max_chars = max(map(lambda x: len(x[0]), vocab)) + 2 # 需要补充两个<bow>与<eow>
                max_chars = max(map(lambda x: len(x[0]), vocab)) + 2  # 需要补充两个<bow>与<eow>
            else:
                raise ValueError('Unknown token_embedder: {0}'.format(config['token_embedder']['name']))
            # 增加<bos>, <eos>所以加2.
            self.words_to_chars_embedding = nn.Parameter(torch.full((len(vocab)+2, max_chars),
                                                                    fill_value=char_vocab.to_index(PAD_TAG), dtype=torch.long),
                                                                    fill_value=len(char_vocab),
                                                                    dtype=torch.long),
                                                         requires_grad=False)
            for word, index in vocab:
                if len(word)+2>max_chars:
                    word = word[:max_chars-2]
                if index==vocab.padding_idx:  # 如果是pad的话，需要和给定的对齐
                    word = PAD_TAG
                elif index==vocab.unknown_idx:
                    word = OOV_TAG
                char_ids = [char_vocab.to_index(BOW_TAG)] + [char_vocab.to_index(c) for c in word] + [char_vocab.to_index(EOW_TAG)]
                char_ids += [char_vocab.to_index(PAD_TAG)]*(max_chars-len(char_ids))
            for word, index in list(iter(vocab)) + [(BOS_TAG, len(vocab)), (EOS_TAG, len(vocab)+1)]:
                if len(word) + 2 > max_chars:
                    word = word[:max_chars - 2]
                if index == self._pad_index:
                    continue
                elif word == BOS_TAG or word == EOS_TAG:
                    char_ids = [char_vocab.to_index(BOW_TAG)] + [char_vocab.to_index(word)] + [
                        char_vocab.to_index(EOW_TAG)]
                    char_ids += [char_vocab.to_index(PAD_TAG)] * (max_chars - len(char_ids))
                else:
                    char_ids = [char_vocab.to_index(BOW_TAG)] + [char_vocab.to_index(c) for c in word] + [
                        char_vocab.to_index(EOW_TAG)]
                    char_ids += [char_vocab.to_index(PAD_TAG)] * (max_chars - len(char_ids))
                self.words_to_chars_embedding[index] = torch.LongTensor(char_ids)
            for index, word in enumerate([BOS_TAG, EOS_TAG]):  # 加上<eos>, <bos>
                if len(word)+2>max_chars:
                    word = word[:max_chars-2]
                char_ids = [char_vocab.to_index(BOW_TAG)] + [char_vocab.to_index(c) for c in word] + [char_vocab.to_index(EOW_TAG)]
                char_ids += [char_vocab.to_index(PAD_TAG)]*(max_chars-len(char_ids))
                self.words_to_chars_embedding[index+len(vocab)] = torch.LongTensor(char_ids)
            self.char_vocab = char_vocab
        else:
            char_emb_layer = None
        if config['token_embedder']['name'].lower() == 'cnn':
            self.token_embedder = ConvTokenEmbedder(
                config, word_emb_layer, char_emb_layer)
                config, self.weight_file, None, char_emb_layer, self.char_vocab)
        elif config['token_embedder']['name'].lower() == 'lstm':
            self.token_embedder = LstmTokenEmbedder(
                config, word_emb_layer, char_emb_layer)
        self.token_embedder.load_state_dict(token_embedder_states, strict=False)
        if config['token_embedder']['word_dim'] > 0 and vocab._no_create_word_length > 0:  # 需要映射，使得来自于dev, test的idx指向unk
            words_to_words = nn.Parameter(torch.arange(len(vocab)+2).long(), requires_grad=False)
                config, None, char_emb_layer)
        if config['token_embedder']['word_dim'] > 0 \
                and vocab._no_create_word_length > 0:  # 需要映射，使得来自于dev, test的idx指向unk
            words_to_words = nn.Parameter(torch.arange(len(vocab) + 2).long(), requires_grad=False)
            for word, idx in vocab:
                if vocab._is_word_no_create_entry(word):
                    words_to_words[idx] = vocab.unknown_idx
            setattr(self.token_embedder, 'words_to_words', words_to_words)
        self.output_dim = config['encoder']['projection_dim']
        # 暂时只考虑 elmo
        if config['encoder']['name'].lower() == 'elmo':
            self.encoder = ElmobiLm(config)
        elif config['encoder']['name'].lower() == 'lstm':
            self.encoder = LstmbiLm(config)
        self.encoder.load_state_dict(torch.load(os.path.join(model_dir, 'encoder.pkl'),
                                                map_location='cpu'))
        self.bos_index = len(vocab)
        self.eos_index = len(vocab) + 1
        self._pad_index = vocab.padding_idx
        self.encoder.load_weights(self.weight_file)
        if cache_word_reprs:
            if config['token_embedder']['char_dim']>0:  # 只有在使用了chars的情况下有用
            if config['token_embedder']['embedding']['dim'] > 0:  # 只有在使用了chars的情况下有用
                print("Start to generate cache word representations.")
                batch_size = 320
                num_batches = self.words_to_chars_embedding.size(0)//batch_size + \
                              int(self.words_to_chars_embedding.size(0)%batch_size!=0)
                self.cached_word_embedding = nn.Embedding(self.words_to_chars_embedding.size(0),
                # bos eos
                word_size = self.words_to_chars_embedding.size(0)
                num_batches = word_size // batch_size + \
                              int(word_size % batch_size != 0)
                self.cached_word_embedding = nn.Embedding(word_size,
                                                          config['encoder']['projection_dim'])
                with torch.no_grad():
                    for i in range(num_batches):
                        words = torch.arange(i*batch_size, min((i+1)*batch_size, self.words_to_chars_embedding.size(0))).long()
                        words = torch.arange(i * batch_size,
                                             min((i + 1) * batch_size, word_size)).long()
                        chars = self.words_to_chars_embedding[words].unsqueeze(1)  # batch_size x 1 x max_chars
                        word_reprs = self.token_embedder(words.unsqueeze(1), chars).detach()  # batch_size x 1 x config['encoder']['projection_dim']
                        word_reprs = self.token_embedder(words.unsqueeze(1),
                                                         chars).detach()  # batch_size x 1 x config['encoder']['projection_dim']
                        self.cached_word_embedding.weight.data[words] = word_reprs.squeeze(1)
                    print("Finish generating cached word representations. Going to delete the character encoder.")
                del self.token_embedder, self.words_to_chars_embedding
            else:
@@ -758,7 +875,7 @@ class _ElmoModel(nn.Module):
        seq_len = words.ne(self._pad_index).sum(dim=-1)
        expanded_words[:, 1:-1] = words
        expanded_words[:, 0].fill_(self.bos_index)
        expanded_words[torch.arange(batch_size).to(words), seq_len+1] = self.eos_index
        expanded_words[torch.arange(batch_size).to(words), seq_len + 1] = self.eos_index
        seq_len = seq_len + 2
        if hasattr(self, 'cached_word_embedding'):
            token_embedding = self.cached_word_embedding(expanded_words)
@@ -767,16 +884,18 @@ class _ElmoModel(nn.Module):
                chars = self.words_to_chars_embedding[expanded_words]
            else:
                chars = None
            token_embedding = self.token_embedder(expanded_words, chars)
            token_embedding = self.token_embedder(expanded_words, chars)  # batch_size x max_len x embed_dim
        if self.config['encoder']['name'] == 'elmo':
            encoder_output = self.encoder(token_embedding, seq_len)
            if encoder_output.size(2) < max_len+2:
                dummy_tensor = encoder_output.new_zeros(encoder_output.size(0), batch_size,
                                                        max_len + 2 - encoder_output.size(2), encoder_output.size(-1))
                encoder_output = torch.cat([encoder_output, dummy_tensor], 2)
            sz = encoder_output.size()  # 2, batch_size, max_len, hidden_size
            token_embedding = torch.cat([token_embedding, token_embedding], dim=2).view(1, sz[1], sz[2], sz[3])
            encoder_output = torch.cat([token_embedding, encoder_output], dim=0)
            if encoder_output.size(2) < max_len + 2:
                num_layers, _, output_len, hidden_size = encoder_output.size()
                dummy_tensor = encoder_output.new_zeros(num_layers, batch_size,
                                                        max_len + 2 - output_len, hidden_size)
                encoder_output = torch.cat((encoder_output, dummy_tensor), 2)
            sz = encoder_output.size() # 2, batch_size, max_len, hidden_size
            token_embedding = torch.cat((token_embedding, token_embedding), dim=2).view(1, sz[1], sz[2], sz[3])
            encoder_output = torch.cat((token_embedding, encoder_output), dim=0)
        elif self.config['encoder']['name'] == 'lstm':
            encoder_output = self.encoder(token_embedding, seq_len)
        else:
@@ -784,5 +903,4 @@ class _ElmoModel(nn.Module):
        # 删除<eos>, <bos>. 这里没有精确地删除，但应该也不会影响最后的结果了。
        encoder_output = encoder_output[:, :, 1:-1]
        return encoder_output
--- a/fastNLP/modules/encoder/embedding.py
+++ b/fastNLP/modules/encoder/embedding.py
@@ -179,16 +179,16 @@ class StaticEmbedding(TokenEmbedding):
    :param model_dir_or_name: 可以有两种方式调用预训练好的static embedding：第一种是传入embedding的文件名，第二种是传入embedding
        的名称。目前支持的embedding包括{`en` 或者 `en-glove-840b-300` : glove.840B.300d, `en-glove-6b-50` : glove.6B.50d,
        `en-word2vec-300` : GoogleNews-vectors-negative300}。第二种情况将自动查看缓存中是否存在该模型，没有的话将自动下载。
    :param requires_grad: 是否需要gradient. 默认为True
    :param init_method: 如何初始化没有找到的值。可以使用torch.nn.init.*中各种方法。调用该方法时传入一个tensor对象。
    :param normailize: 是否对vector进行normalize，使得每个vector的norm为1。
    :param bool requires_grad: 是否需要gradient. 默认为True
    :param callable init_method: 如何初始化没有找到的值。可以使用torch.nn.init.*中各种方法。调用该方法时传入一个tensor对象。
    :param bool normailize: 是否对vector进行normalize，使得每个vector的norm为1。
    :param bool lower: 是否将vocab中的词语小写后再和预训练的词表进行匹配。如果你的词表中包含大写的词语，或者就是需要单独
        为大写的词语开辟一个vector表示，则将lower设置为False。
    """
    def __init__(self, vocab: Vocabulary, model_dir_or_name: str='en', requires_grad: bool=True, init_method=None,
                 normalize=False):
                 normalize=False, lower=False):
        super(StaticEmbedding, self).__init__(vocab)
        # 优先定义需要下载的static embedding有哪些。这里估计需要自己搞一个server，
        # 得到cache_path
        if model_dir_or_name.lower() in PRETRAIN_STATIC_FILES:
            PRETRAIN_URL = _get_base_url('static')
@@ -202,8 +202,40 @@ class StaticEmbedding(TokenEmbedding):
            raise ValueError(f"Cannot recognize {model_dir_or_name}.")
        # 读取embedding
        embedding = self._load_with_vocab(model_path, vocab=vocab, init_method=init_method,
                                                      normalize=normalize)
        if lower:
            lowered_vocab = Vocabulary(padding=vocab.padding, unknown=vocab.unknown)
            for word, index in vocab:
                if not vocab._is_word_no_create_entry(word):
                    lowered_vocab.add_word(word.lower())  # 先加入需要创建entry的
            for word in vocab._no_create_word.keys():  # 不需要创建entry的
                if word in vocab:
                    lowered_word = word.lower()
                    if lowered_word not in lowered_vocab.word_count:
                        lowered_vocab.add_word(lowered_word)
                        lowered_vocab._no_create_word[lowered_word] += 1
            print(f"All word in vocab have been lowered. There are {len(vocab)} words, {len(lowered_vocab)} unique lowered "
                  f"words.")
            embedding = self._load_with_vocab(model_path, vocab=lowered_vocab, init_method=init_method,
                                                          normalize=normalize)
            # 需要适配一下
            if not hasattr(self, 'words_to_words'):
                self.words_to_words = torch.arange(len(lowered_vocab, )).long()
            if lowered_vocab.unknown:
                unknown_idx = lowered_vocab.unknown_idx
            else:
                unknown_idx = embedding.size(0) - 1  # 否则是最后一个为unknow
            words_to_words = nn.Parameter(torch.full((len(vocab),), fill_value=unknown_idx).long(),
                                          requires_grad=False)
            for word, index in vocab:
                if word not in lowered_vocab:
                    word = word.lower()
                    if lowered_vocab._is_word_no_create_entry(word):  # 如果不需要创建entry,已经默认unknown了
                        continue
                words_to_words[index] = self.words_to_words[lowered_vocab.to_index(word)]
            self.words_to_words = words_to_words
        else:
            embedding = self._load_with_vocab(model_path, vocab=vocab, init_method=init_method,
                                                          normalize=normalize)
        self.embedding = nn.Embedding(num_embeddings=embedding.shape[0], embedding_dim=embedding.shape[1],
                                      padding_idx=vocab.padding_idx,
                                      max_norm=None, norm_type=2, scale_grad_by_freq=False,
@@ -301,7 +333,7 @@ class StaticEmbedding(TokenEmbedding):
            if vocab._no_create_word_length>0:
                if vocab.unknown is None:  # 创建一个专门的unknown
                    unknown_idx = len(matrix)
                    vectors = torch.cat([vectors, torch.zeros(1, dim)], dim=0).contiguous()
                    vectors = torch.cat((vectors, torch.zeros(1, dim)), dim=0).contiguous()
                else:
                    unknown_idx = vocab.unknown_idx
                words_to_words = nn.Parameter(torch.full((len(vocab),), fill_value=unknown_idx).long(),
@@ -438,19 +470,15 @@ class ElmoEmbedding(ContextualEmbedding):
    :param model_dir_or_name: 可以有两种方式调用预训练好的ELMo embedding：第一种是传入ELMo权重的文件名，第二种是传入ELMo版本的名称，
        目前支持的ELMo包括{`en` : 英文版本的ELMo, `cn` : 中文版本的ELMo,}。第二种情况将自动查看缓存中是否存在该模型，没有的话将自动下载
    :param layers: str, 指定返回的层数, 以,隔开不同的层。如果要返回第二层的结果'2', 返回后两层的结果'1,2'。不同的层的结果
        按照这个顺序concat起来。默认为'2'。
    :param requires_grad: bool, 该层是否需要gradient. 默认为False
        按照这个顺序concat起来。默认为'2'。'mix'会使用可学习的权重结合不同层的表示(权重是否可训练与requires_grad保持一致，
        初始化权重对三层结果进行mean-pooling, 可以通过ElmoEmbedding.set_mix_weights_requires_grad()方法只将mix weights设置为可学习。)
    :param requires_grad: bool, 该层是否需要gradient, 默认为False.
    :param cache_word_reprs: 可以选择对word的表示进行cache; 设置为True的话，将在初始化的时候为每个word生成对应的embedding，
        并删除character encoder，之后将直接使用cache的embedding。默认为False。
    """
    def __init__(self, vocab: Vocabulary, model_dir_or_name: str='en',
                 layers: str='2', requires_grad: bool=False, cache_word_reprs: bool=False):
        super(ElmoEmbedding, self).__init__(vocab)
        layers = list(map(int, layers.split(',')))
        assert len(layers) > 0, "Must choose one output"
        for layer in layers:
            assert 0 <= layer <= 2, "Layer index should be in range [0, 2]."
        self.layers = layers
        # 根据model_dir_or_name检查是否存在并下载
        if model_dir_or_name.lower() in PRETRAINED_ELMO_MODEL_DIR:
@@ -464,8 +492,49 @@ class ElmoEmbedding(ContextualEmbedding):
        else:
            raise ValueError(f"Cannot recognize {model_dir_or_name}.")
        self.model = _ElmoModel(model_dir, vocab, cache_word_reprs=cache_word_reprs)
        if layers=='mix':
            self.layer_weights = nn.Parameter(torch.zeros(self.model.config['encoder']['n_layers']+1),
                                              requires_grad=requires_grad)
            self.gamma = nn.Parameter(torch.ones(1), requires_grad=requires_grad)
            self._get_outputs = self._get_mixed_outputs
            self._embed_size = self.model.config['encoder']['projection_dim'] * 2
        else:
            layers = list(map(int, layers.split(',')))
            assert len(layers) > 0, "Must choose one output"
            for layer in layers:
                assert 0 <= layer <= 2, "Layer index should be in range [0, 2]."
            self.layers = layers
            self._get_outputs = self._get_layer_outputs
            self._embed_size = len(self.layers) * self.model.config['encoder']['projection_dim'] * 2
        self.requires_grad = requires_grad
        self._embed_size = len(self.layers) * self.model.config['encoder']['projection_dim'] * 2
    def _get_mixed_outputs(self, outputs):
        # outputs: num_layers x batch_size x max_len x hidden_size
        # return: batch_size x max_len x hidden_size
        weights = F.softmax(self.layer_weights+1/len(outputs), dim=0).to(outputs)
        outputs = torch.einsum('l,lbij->bij', weights, outputs)
        return self.gamma.to(outputs)*outputs
    def set_mix_weights_requires_grad(self, flag=True):
        """
        当初始化ElmoEmbedding时layers被设置为mix时，可以通过调用该方法设置mix weights是否可训练。如果layers不是mix，调用
        该方法没有用。
        :param bool flag: 混合不同层表示的结果是否可以训练。
        :return:
        """
        if hasattr(self, 'layer_weights'):
            self.layer_weights.requires_grad = flag
            self.gamma.requires_grad = flag
    def _get_layer_outputs(self, outputs):
        if len(self.layers) == 1:
            outputs = outputs[self.layers[0]]
        else:
            outputs = torch.cat(tuple([*outputs[self.layers]]), dim=-1)
        return outputs
    def forward(self, words: torch.LongTensor):
        """
@@ -480,15 +549,12 @@ class ElmoEmbedding(ContextualEmbedding):
        if outputs is not None:
            return outputs
        outputs = self.model(words)
        if len(self.layers) == 1:
            outputs = outputs[self.layers[0]]
        else:
            outputs = torch.cat([*outputs[self.layers]], dim=-1)
        return outputs
        return self._get_outputs(outputs)
    def _delete_model_weights(self):
        del self.layers, self.model
        for name in ['layers', 'model', 'layer_weights', 'gamma']:
            if hasattr(self, name):
                delattr(self, name)
    @property
    def requires_grad(self):
@@ -892,10 +958,11 @@ class StackEmbedding(TokenEmbedding):
    def __init__(self, embeds: List[TokenEmbedding]):
        vocabs = []
        for embed in embeds:
            vocabs.append(embed.get_word_vocab())
            if hasattr(embed, 'get_word_vocab'):
                vocabs.append(embed.get_word_vocab())
        _vocab = vocabs[0]
        for vocab in vocabs[1:]:
            assert vocab == _vocab, "All embeddings should use the same word vocabulary."
            assert vocab == _vocab, "All embeddings in StackEmbedding should use the same word vocabulary."
        super(StackEmbedding, self).__init__(_vocab)
        assert isinstance(embeds, list)
--- a/reproduction/CNN-sentence_classification/.gitignore
+++ b/reproduction/CNN-sentence_classification/.gitignore
@@ -1,110 +0,0 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 .hypothesis/
 .pytest_cache/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # pyenv
 .python-version
 # celery beat schedule file
 celerybeat-schedule
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache
 #custom
 GoogleNews-vectors-negative300.bin/
 GoogleNews-vectors-negative300.bin.gz
 models/
 *.swp
--- a/reproduction/CNN-sentence_classification/README.md
+++ b/reproduction/CNN-sentence_classification/README.md
@@ -1,77 +0,0 @@
 ## Introduction
 This is the implementation of [Convolutional Neural Networks for Sentence Classification](https://arxiv.org/abs/1408.5882) paper in PyTorch.
 * MRDataset, non-static-model(word2vec rained by Mikolov etal. (2013) on 100 billion words of Google News)
 * It can be run in both CPU and GPU
 * The best accuracy is 82.61%, which is better than 81.5% in the paper
 (by Jingyuan Liu @Fudan University; Email:(fdjingyuan@outlook.com) Welcome to discussion!)
 ## Requirement
 * python 3.6
 * pytorch > 0.1
 * numpy
 * gensim
 ## Run
 STEP 1
 install packages like gensim (other needed pakages is the same)
 ```
 pip install gensim
 ```
 STEP 2
 install MRdataset and word2vec resources
 * MRdataset: you can download the dataset in (https://www.cs.cornell.edu/people/pabo/movie-review-data/rt-polaritydata.tar.gz)
 * word2vec: you can download the file in (https://drive.google.com/file/d/0B7XkCwpI5KDYNlNUTTlSS21pQmM/edit)
 Since this file is more than 1.5G, I did not display in folders. If you download the file, please remember modify the path in Function def word_embeddings(path = './GoogleNews-vectors-negative300.bin/'):
 STEP 3
 train the model 
 ```
 python train.py
 ```
 you will get the information printed in the screen, like
 ```
 Epoch [1/20], Iter [100/192] Loss: 0.7008
 Test Accuracy: 71.869159 %
 Epoch [2/20], Iter [100/192] Loss: 0.5957
 Test Accuracy: 75.700935 %
 Epoch [3/20], Iter [100/192] Loss: 0.4934
 Test Accuracy: 78.130841 %
 ......
 Epoch [20/20], Iter [100/192] Loss: 0.0364
 Test Accuracy: 81.495327 %
 Best Accuracy: 82.616822 %
 Best Model: models/cnn.pkl
 ```
 ## Hyperparameters
 According to the paper and experiment, I set:
 |Epoch|Kernel Size|dropout|learning rate|batch size|
 |---|---|---|---|---|
 |20|\(h,300,100\)|0.5|0.0001|50|
 h = [3,4,5]
 If the accuracy is not improved, the learning rate will \*0.8.
 ## Result
 I just tried one dataset : MR. (Other 6 dataset in paper SST-1, SST-2, TREC, CR, MPQA)
 There are four models in paper: CNN-rand, CNN-static, CNN-non-static, CNN-multichannel.
 I have tried CNN-non-static:A model with pre-trained vectors from word2vec. 
 All words—including the unknown ones that are randomly initialized and the pretrained vectors are fine-tuned for each task
 (which has almost the best performance and the most difficut to implement among the four models)
 |Dataset|Class Size|Best Result|Kim's Paper Result|
 |---|---|---|---|
 |MR|2|82.617%(CNN-non-static)|81.5%(CNN-nonstatic)|
 ## Reference
 * [Convolutional Neural Networks for Sentence Classification](https://arxiv.org/abs/1408.5882)
 * https://github.com/Shawn1993/cnn-text-classification-pytorch
 * https://github.com/junwang4/CNN-sentence-classification-pytorch-2017/blob/master/utils.py
--- a/reproduction/CNN-sentence_classification/init.py
+++ b/reproduction/CNN-sentence_classification/init.py
--- a/reproduction/CNN-sentence_classification/dataset.py
+++ b/reproduction/CNN-sentence_classification/dataset.py
@@ -1,136 +0,0 @@
 import codecs
 import random
 import re
 import gensim
 import numpy as np
 from gensim import corpora
 from torch.utils.data import Dataset
 def clean_str(string):
    """
    Tokenization/string cleaning for all datasets except for SST.
    Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
    """
    string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
    string = re.sub(r"\'s", " \'s", string)
    string = re.sub(r"\'ve", " \'ve", string)
    string = re.sub(r"n\'t", " n\'t", string)
    string = re.sub(r"\'re", " \'re", string)
    string = re.sub(r"\'d", " \'d", string)
    string = re.sub(r"\'ll", " \'ll", string)
    string = re.sub(r",", " , ", string)
    string = re.sub(r"!", " ! ", string)
    string = re.sub(r"\(", " \( ", string)
    string = re.sub(r"\)", " \) ", string)
    string = re.sub(r"\?", " \? ", string)
    string = re.sub(r"\s{2,}", " ", string)
    return string.strip()
 def pad_sentences(sentence, padding_word=" <PAD/>"):
    sequence_length = 64
    sent = sentence.split()
    padded_sentence = sentence + padding_word * (sequence_length - len(sent))
    return padded_sentence
 # data loader
 class MRDataset(Dataset):
    def __init__(self):
        # load positive and negative sentenses from files
        with codecs.open("./rt-polaritydata/rt-polarity.pos", encoding='ISO-8859-1') as f:
            positive_examples = list(f.readlines())
        with codecs.open("./rt-polaritydata/rt-polarity.neg", encoding='ISO-8859-1') as f:
            negative_examples = list(f.readlines())
        # s.strip: clear "\n"; clear_str; pad
        positive_examples = [pad_sentences(clean_str(s.strip())) for s in positive_examples]
        negative_examples = [pad_sentences(clean_str(s.strip())) for s in negative_examples]
        self.examples = positive_examples + negative_examples
        self.sentences_texts = [sample.split() for sample in self.examples]
        # word dictionary
        dictionary = corpora.Dictionary(self.sentences_texts)
        self.word2id_dict = dictionary.token2id  # transform to dict, like {"human":0, "a":1,...}
        # set lables: postive is 1; negative is 0
        positive_labels = [1 for _ in positive_examples]
        negative_labels = [0 for _ in negative_examples]
        self.lables = positive_labels + negative_labels
        examples_lables = list(zip(self.examples, self.lables))
        random.shuffle(examples_lables)
        self.MRDataset_frame = examples_lables
        # transform word to id
        self.MRDataset_wordid = \
            [(
                np.array([self.word2id_dict[word] for word in sent[0].split()], dtype=np.int64),
                sent[1]
            ) for sent in self.MRDataset_frame]
    def word_embeddings(self, path="./GoogleNews-vectors-negative300.bin/GoogleNews-vectors-negative300.bin"):
        # establish from google
        model = gensim.models.KeyedVectors.load_word2vec_format(path, binary=True)
        print('Please wait ... (it could take a while to load the file : {})'.format(path))
        word_dict = self.word2id_dict
        embedding_weights = np.random.uniform(-0.25, 0.25, (len(self.word2id_dict), 300))
        for word in word_dict:
            word_id = word_dict[word]
            if word in model.wv.vocab:
                embedding_weights[word_id, :] = model[word]
            return embedding_weights
    def __len__(self):
        return len(self.MRDataset_frame)
    def __getitem__(self, idx):
        sample = self.MRDataset_wordid[idx]
        return sample
    def getsent(self, idx):
        sample = self.MRDataset_wordid[idx][0]
        return sample
    def getlabel(self, idx):
        label = self.MRDataset_wordid[idx][1]
        return label
    def word2id(self):
        return self.word2id_dict
    def id2word(self):
        id2word_dict = dict([val, key] for key, val in self.word2id_dict.items())
        return id2word_dict
 class train_set(Dataset):
    def __init__(self, samples):
        self.train_frame = samples
    def __len__(self):
        return len(self.train_frame)
    def __getitem__(self, idx):
        return self.train_frame[idx]
 class test_set(Dataset):
    def __init__(self, samples):
        self.test_frame = samples
    def __len__(self):
        return len(self.test_frame)
    def __getitem__(self, idx):
        return self.test_frame[idx]
--- a/reproduction/CNN-sentence_classification/model.py
+++ b/reproduction/CNN-sentence_classification/model.py
@@ -1,42 +0,0 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 class CNN_text(nn.Module):
    def __init__(self, kernel_h=[3, 4, 5], kernel_num=100, embed_num=1000, embed_dim=300, num_classes=2, dropout=0.5,
                 L2_constrain=3,
                 pretrained_embeddings=None):
        super(CNN_text, self).__init__()
        self.embedding = nn.Embedding(embed_num, embed_dim)
        self.dropout = nn.Dropout(dropout)
        if pretrained_embeddings is not None:
            self.embedding.weight.data.copy_(torch.from_numpy(pretrained_embeddings))
        # the network structure
        # Conv2d: input- N,C,H,W output- (50,100,62,1)
        self.conv1 = nn.ModuleList([nn.Conv2d(1, kernel_num, (K, embed_dim)) for K in kernel_h])
        self.fc1 = nn.Linear(len(kernel_h) * kernel_num, num_classes)
    def max_pooling(self, x):
        x = F.relu(self.conv1(x)).squeeze(3)  # N,C,L - (50,100,62)
        x = F.max_pool1d(x, x.size(2)).squeeze(2)
        # x.size(2)=62  squeeze: (50,100,1) -> (50,100)
        return x
    def forward(self, x):
        x = self.embedding(x)  # output: (N,H,W) = (50,64,300)
        x = x.unsqueeze(1)  # (N,C,H,W)
        x = [F.relu(conv(x)).squeeze(3) for conv in self.conv1]  # [N, C, H(50,100,62),(50,100,61),(50,100,60)]
        x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]  # [N,C(50,100),(50,100),(50,100)]
        x = torch.cat(x, 1)
        x = self.dropout(x)
        x = self.fc1(x)
        return x
 if __name__ == '__main__':
    model = CNN_text(kernel_h=[1, 2, 3, 4], embed_num=3, embed_dim=2)
    x = torch.LongTensor([[1, 2, 1, 2, 0]])
    print(model(x))
--- a/reproduction/CNN-sentence_classification/train.py
+++ b/reproduction/CNN-sentence_classification/train.py
@@ -1,92 +0,0 @@
 import os
 import torch
 import torch.nn as nn
 from torch.autograd import Variable
 from . import dataset as dst
 from .model import CNN_text
 # Hyper Parameters
 batch_size = 50
 learning_rate = 0.0001
 num_epochs = 20
 cuda = True
 # split Dataset
 dataset = dst.MRDataset()
 length = len(dataset)
 train_dataset = dataset[:int(0.9 * length)]
 test_dataset = dataset[int(0.9 * length):]
 train_dataset = dst.train_set(train_dataset)
 test_dataset = dst.test_set(test_dataset)
 # Data Loader
 train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)
 test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)
 # cnn
 cnn = CNN_text(embed_num=len(dataset.word2id()), pretrained_embeddings=dataset.word_embeddings())
 if cuda:
    cnn.cuda()
 # Loss and Optimizer
 criterion = nn.CrossEntropyLoss()
 optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate)
 # train and test
 best_acc = None
 for epoch in range(num_epochs):
    # Train the Model
    cnn.train()
    for i, (sents, labels) in enumerate(train_loader):
        sents = Variable(sents)
        labels = Variable(labels)
        if cuda:
            sents = sents.cuda()
        labels = labels.cuda()
        optimizer.zero_grad()
        outputs = cnn(sents)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        if (i + 1) % 100 == 0:
            print('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f'
                  % (epoch + 1, num_epochs, i + 1, len(train_dataset) // batch_size, loss.data[0]))
    # Test the Model
    cnn.eval()
    correct = 0
    total = 0
    for sents, labels in test_loader:
        sents = Variable(sents)
        if cuda:
            sents = sents.cuda()
            labels = labels.cuda()
        outputs = cnn(sents)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum()
    acc = 100. * correct / total
    print('Test Accuracy: %f %%' % (acc))
    if best_acc is None or acc > best_acc:
        best_acc = acc
        if os.path.exists("models") is False:
            os.makedirs("models")
        torch.save(cnn.state_dict(), 'models/cnn.pkl')
    else:
        learning_rate = learning_rate * 0.8
 print("Best Accuracy: %f %%" % best_acc)
 print("Best Model: models/cnn.pkl")
--- a/reproduction/Char-aware_NLM/LICENSE
+++ b/reproduction/Char-aware_NLM/LICENSE
@@ -1,21 +0,0 @@
 MIT License
 Copyright (c) 2017 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/reproduction/Char-aware_NLM/README.md
+++ b/reproduction/Char-aware_NLM/README.md
@@ -1,40 +0,0 @@
 # PyTorch-Character-Aware-Neural-Language-Model
 This is the PyTorch implementation of character-aware neural language model proposed in this [paper](https://arxiv.org/abs/1508.06615) by Yoon Kim. 
 ## Requiredments
 The code is run and tested with **Python 3.5.2** and **PyTorch 0.3.1**.
 ## HyperParameters
 | HyperParam | value |
 | ------ | :-------|
 | LSTM batch size | 20 |
 | LSTM sequence length | 35 |
 | LSTM hidden units | 300 |
 | epochs | 35 |
 | initial learning rate | 1.0 |
 | character embedding dimension | 15 |
 ## Demo
 Train the model with split train/valid/test data.
 `python train.py`
 The trained model will saved in `cache/net.pkl`.
 Test the model.
 `python test.py`
 Best result on test set: 
 PPl=127.2163
 cross entropy loss=4.8459
 ## Acknowledgement 
 This implementation borrowed ideas from
 https://github.com/jarfo/kchar
 https://github.com/cronos123/Character-Aware-Neural-Language-Models
--- a/reproduction/Char-aware_NLM/init.py
+++ b/reproduction/Char-aware_NLM/init.py
--- a/reproduction/Char-aware_NLM/main.py
+++ b/reproduction/Char-aware_NLM/main.py
@@ -1,9 +0,0 @@
 PICKLE = "./save/"
 def train():
    pass
 if __name__ == "__main__":
    train()
--- a/reproduction/Char-aware_NLM/model.py
+++ b/reproduction/Char-aware_NLM/model.py
@@ -1,145 +0,0 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 class Highway(nn.Module):
    """Highway network"""
    def __init__(self, input_size):
        super(Highway, self).__init__()
        self.fc1 = nn.Linear(input_size, input_size, bias=True)
        self.fc2 = nn.Linear(input_size, input_size, bias=True)
    def forward(self, x):
        t = F.sigmoid(self.fc1(x))
        return torch.mul(t, F.relu(self.fc2(x))) + torch.mul(1 - t, x)
 class charLM(nn.Module):
    """CNN + highway network + LSTM
    # Input: 
        4D tensor with shape [batch_size, in_channel, height, width]
    # Output:
        2D Tensor with shape [batch_size, vocab_size]
    # Arguments:
        char_emb_dim: the size of each character's attention
        word_emb_dim: the size of each word's attention
        vocab_size: num of unique words
        num_char: num of characters
        use_gpu: True or False
    """
    def __init__(self, char_emb_dim, word_emb_dim,
                 vocab_size, num_char, use_gpu):
        super(charLM, self).__init__()
        self.char_emb_dim = char_emb_dim
        self.word_emb_dim = word_emb_dim
        self.vocab_size = vocab_size
        # char attention layer
        self.char_embed = nn.Embedding(num_char, char_emb_dim)
        # convolutions of filters with different sizes
        self.convolutions = []
        # list of tuples: (the number of filter, width)
        self.filter_num_width = [(25, 1), (50, 2), (75, 3), (100, 4), (125, 5), (150, 6)]
        for out_channel, filter_width in self.filter_num_width:
            self.convolutions.append(
                nn.Conv2d(
                    1,  # in_channel
                    out_channel,  # out_channel
                    kernel_size=(char_emb_dim, filter_width),  # (height, width)
                    bias=True
                )
            )
        self.highway_input_dim = sum([x for x, y in self.filter_num_width])
        self.batch_norm = nn.BatchNorm1d(self.highway_input_dim, affine=False)
        # highway net
        self.highway1 = Highway(self.highway_input_dim)
        self.highway2 = Highway(self.highway_input_dim)
        # LSTM
        self.lstm_num_layers = 2
        self.lstm = nn.LSTM(input_size=self.highway_input_dim,
                            hidden_size=self.word_emb_dim,
                            num_layers=self.lstm_num_layers,
                            bias=True,
                            dropout=0.5,
                            batch_first=True)
        # output layer
        self.dropout = nn.Dropout(p=0.5)
        self.linear = nn.Linear(self.word_emb_dim, self.vocab_size)
        if use_gpu is True:
            for x in range(len(self.convolutions)):
                self.convolutions[x] = self.convolutions[x].cuda()
            self.highway1 = self.highway1.cuda()
            self.highway2 = self.highway2.cuda()
            self.lstm = self.lstm.cuda()
            self.dropout = self.dropout.cuda()
            self.char_embed = self.char_embed.cuda()
            self.linear = self.linear.cuda()
            self.batch_norm = self.batch_norm.cuda()
    def forward(self, x, hidden):
        # Input: Variable of Tensor with shape [num_seq, seq_len, max_word_len+2]
        # Return: Variable of Tensor with shape [num_words, len(word_dict)]
        lstm_batch_size = x.size()[0]
        lstm_seq_len = x.size()[1]
        x = x.contiguous().view(-1, x.size()[2])
        # [num_seq*seq_len, max_word_len+2]
        x = self.char_embed(x)
        # [num_seq*seq_len, max_word_len+2, char_emb_dim]
        x = torch.transpose(x.view(x.size()[0], 1, x.size()[1], -1), 2, 3)
        # [num_seq*seq_len, 1, max_word_len+2, char_emb_dim]
        x = self.conv_layers(x)
        # [num_seq*seq_len, total_num_filters]
        x = self.batch_norm(x)
        # [num_seq*seq_len, total_num_filters]
        x = self.highway1(x)
        x = self.highway2(x)
        # [num_seq*seq_len, total_num_filters]
        x = x.contiguous().view(lstm_batch_size, lstm_seq_len, -1)
        # [num_seq, seq_len, total_num_filters]
        x, hidden = self.lstm(x, hidden)
        # [seq_len, num_seq, hidden_size]
        x = self.dropout(x)
        # [seq_len, num_seq, hidden_size]
        x = x.contiguous().view(lstm_batch_size * lstm_seq_len, -1)
        # [num_seq*seq_len, hidden_size]
        x = self.linear(x)
        # [num_seq*seq_len, vocab_size]
        return x, hidden
    def conv_layers(self, x):
        chosen_list = list()
        for conv in self.convolutions:
            feature_map = F.tanh(conv(x))
            # (batch_size, out_channel, 1, max_word_len-width+1)
            chosen = torch.max(feature_map, 3)[0]
            # (batch_size, out_channel, 1)            
            chosen = chosen.squeeze()
            # (batch_size, out_channel)
            chosen_list.append(chosen)
        # (batch_size, total_num_filers)
        return torch.cat(chosen_list, 1)
--- a/reproduction/Char-aware_NLM/test.py
+++ b/reproduction/Char-aware_NLM/test.py
@@ -1,117 +0,0 @@
 import os
 from collections import namedtuple
 import numpy as np
 import torch
 import torch.nn as nn
 from torch.autograd import Variable
 from utilities import *
 def to_var(x):
    if torch.cuda.is_available():
        x = x.cuda()
    return Variable(x)
 def test(net, data, opt):
    net.eval()
    test_input = torch.from_numpy(data.test_input)
    test_label = torch.from_numpy(data.test_label)
    num_seq = test_input.size()[0] // opt.lstm_seq_len
    test_input = test_input[:num_seq * opt.lstm_seq_len, :]
    # [num_seq, seq_len, max_word_len+2]
    test_input = test_input.view(-1, opt.lstm_seq_len, opt.max_word_len + 2)
    criterion = nn.CrossEntropyLoss()
    loss_list = []
    num_hits = 0
    total = 0
    iterations = test_input.size()[0] // opt.lstm_batch_size
    test_generator = batch_generator(test_input, opt.lstm_batch_size)
    label_generator = batch_generator(test_label, opt.lstm_batch_size * opt.lstm_seq_len)
    hidden = (to_var(torch.zeros(2, opt.lstm_batch_size, opt.word_embed_dim)),
              to_var(torch.zeros(2, opt.lstm_batch_size, opt.word_embed_dim)))
    add_loss = 0.0
    for t in range(iterations):
        batch_input = test_generator.__next__()
        batch_label = label_generator.__next__()
        net.zero_grad()
        hidden = [state.detach() for state in hidden]
        test_output, hidden = net(to_var(batch_input), hidden)
        test_loss = criterion(test_output, to_var(batch_label)).data
        loss_list.append(test_loss)
        add_loss += test_loss
    print("Test Loss={0:.4f}".format(float(add_loss) / iterations))
    print("Test PPL={0:.4f}".format(float(np.exp(add_loss / iterations))))
 #############################################################
 if __name__ == "__main__":
    word_embed_dim = 300
    char_embedding_dim = 15
    if os.path.exists("cache/prep.pt") is False:
        print("Cannot find prep.pt")
    objetcs = torch.load("cache/prep.pt")
    word_dict = objetcs["word_dict"]
    char_dict = objetcs["char_dict"]
    reverse_word_dict = objetcs["reverse_word_dict"]
    max_word_len = objetcs["max_word_len"]
    num_words = len(word_dict)
    print("word/char dictionary built. Start making inputs.")
    if os.path.exists("cache/data_sets.pt") is False:
        test_text = read_data("./test.txt")
        test_set = np.array(text2vec(test_text, char_dict, max_word_len))
        # Labels are next-word index in word_dict with the same length as inputs
        test_label = np.array([word_dict[w] for w in test_text[1:]] + [word_dict[test_text[-1]]])
        category = {"test": test_set, "tlabel": test_label}
        torch.save(category, "cache/data_sets.pt")
    else:
        data_sets = torch.load("cache/data_sets.pt")
        test_set = data_sets["test"]
        test_label = data_sets["tlabel"]
        train_set = data_sets["tdata"]
        train_label = data_sets["trlabel"]
    DataTuple = namedtuple("DataTuple", "test_input test_label train_input train_label ")
    data = DataTuple(test_input=test_set,
                     test_label=test_label, train_label=train_label, train_input=train_set)
    print("Loaded data sets. Start building network.")
    USE_GPU = True
    cnn_batch_size = 700
    lstm_seq_len = 35
    lstm_batch_size = 20
    net = torch.load("cache/net.pkl")
    Options = namedtuple("Options", ["cnn_batch_size", "lstm_seq_len",
                                     "max_word_len", "lstm_batch_size", "word_embed_dim"])
    opt = Options(cnn_batch_size=lstm_seq_len * lstm_batch_size,
                  lstm_seq_len=lstm_seq_len,
                  max_word_len=max_word_len,
                  lstm_batch_size=lstm_batch_size,
                  word_embed_dim=word_embed_dim)
    print("Network built. Start testing.")
    test(net, data, opt)
--- a/reproduction/Char-aware_NLM/test.txt
+++ b/reproduction/Char-aware_NLM/test.txt
@@ -1,320 +0,0 @@
 no it was n't black monday
 but while the new york stock exchange did n't fall apart friday as the dow jones industrial average plunged N points most of it in the final hour it barely managed to stay this side of chaos
 some circuit breakers installed after the october N crash failed their first test traders say unable to cool the selling panic in both stocks and futures
 the N stock specialist firms on the big board floor the buyers and sellers of last resort who were criticized after the N crash once again could n't handle the selling pressure
 big investment banks refused to step up to the plate to support the beleaguered floor traders by buying big blocks of stock traders say
 heavy selling of standard & poor 's 500-stock index futures in chicago <unk> beat stocks downward
 seven big board stocks ual amr bankamerica walt disney capital cities\/abc philip morris and pacific telesis group stopped trading and never resumed
 the <unk> has already begun
 the equity market was <unk>
 once again the specialists were not able to handle the imbalances on the floor of the new york stock exchange said christopher <unk> senior vice president at <unk> securities corp
 <unk> james <unk> chairman of specialists henderson brothers inc. it is easy to say the specialist is n't doing his job
 when the dollar is in a <unk> even central banks ca n't stop it
 speculators are calling for a degree of liquidity that is not there in the market
 many money managers and some traders had already left their offices early friday afternoon on a warm autumn day because the stock market was so quiet
 then in a <unk> plunge the dow jones industrials in barely an hour surrendered about a third of their gains this year <unk> up a 190.58-point or N N loss on the day in <unk> trading volume
 <unk> trading accelerated to N million shares a record for the big board
 at the end of the day N million shares were traded
 the dow jones industrials closed at N
 the dow 's decline was second in point terms only to the <unk> black monday crash that occurred oct. N N
 in percentage terms however the dow 's dive was the <unk> ever and the sharpest since the market fell N or N N a week after black monday
 the dow fell N N on black monday
 shares of ual the parent of united airlines were extremely active all day friday reacting to news and rumors about the proposed $ N billion buy-out of the airline by an <unk> group
 wall street 's takeover-stock speculators or risk arbitragers had placed unusually large bets that a takeover would succeed and ual stock would rise
 at N p.m. edt came the <unk> news the big board was <unk> trading in ual pending news
 on the exchange floor as soon as ual stopped trading we <unk> for a panic said one top floor trader
 several traders could be seen shaking their heads when the news <unk>
 for weeks the market had been nervous about takeovers after campeau corp. 's cash crunch spurred concern about the prospects for future highly leveraged takeovers
 and N minutes after the ual trading halt came news that the ual group could n't get financing for its bid
 at this point the dow was down about N points
 the market <unk>
 arbitragers could n't dump their ual stock but they rid themselves of nearly every rumor stock they had
 for example their selling caused trading halts to be declared in usair group which closed down N N to N N delta air lines which fell N N to N N and <unk> industries which sank N to N N
 these stocks eventually reopened
 but as panic spread speculators began to sell blue-chip stocks such as philip morris and international business machines to offset their losses
 when trading was halted in philip morris the stock was trading at N down N N while ibm closed N N lower at N
 selling <unk> because of waves of automatic stop-loss orders which are triggered by computer when prices fall to certain levels
 most of the stock selling pressure came from wall street professionals including computer-guided program traders
 traders said most of their major institutional investors on the other hand sat tight
 now at N one of the market 's post-crash reforms took hold as the s&p N futures contract had plunged N points equivalent to around a <unk> drop in the dow industrials
 under an agreement signed by the big board and the chicago mercantile exchange trading was temporarily halted in chicago
 after the trading halt in the s&p N pit in chicago waves of selling continued to hit stocks themselves on the big board and specialists continued to <unk> prices down
 as a result the link between the futures and stock markets <unk> apart
 without the <unk> of stock-index futures the barometer of where traders think the overall stock market is headed many traders were afraid to trust stock prices quoted on the big board
 the futures halt was even <unk> by big board floor traders
 it <unk> things up said one major specialist
 this confusion effectively halted one form of program trading stock index arbitrage that closely links the futures and stock markets and has been blamed by some for the market 's big swings
 in a stock-index arbitrage sell program traders buy or sell big baskets of stocks and offset the trade in futures to lock in a price difference
 when the airline information came through it <unk> every model we had for the marketplace said a managing director at one of the largest program-trading firms
 we did n't even get a chance to do the programs we wanted to do
 but stocks kept falling
 the dow industrials were down N points at N p.m. before the <unk> halt
 at N p.m. at the end of the cooling off period the average was down N points
 meanwhile during the the s&p trading halt s&p futures sell orders began <unk> up while stocks in new york kept falling sharply
 big board chairman john j. phelan said yesterday the circuit breaker worked well <unk>
 i just think it 's <unk> at this point to get into a debate if index arbitrage would have helped or hurt things
 under another post-crash system big board president richard <unk> mr. phelan was flying to <unk> as the market was falling was talking on an <unk> hot line to the other exchanges the securities and exchange commission and the federal reserve board
 he <unk> out at a high-tech <unk> center on the floor of the big board where he could watch <unk> on prices and pending stock orders
 at about N p.m. edt s&p futures resumed trading and for a brief time the futures and stock markets started to come back in line
 buyers stepped in to the futures pit
 but the <unk> of s&p futures sell orders weighed on the market and the link with stocks began to fray again
 at about N the s&p market <unk> to still another limit of N points down and trading was locked again
 futures traders say the s&p was <unk> that the dow could fall as much as N points
 during this time small investors began ringing their brokers wondering whether another crash had begun
 at prudential-bache securities inc. which is trying to cater to small investors some <unk> brokers thought this would be the final <unk>
 that 's when george l. ball chairman of the prudential insurance co. of america unit took to the internal <unk> system to declare that the plunge was only mechanical
 i have a <unk> that this particular decline today is something more <unk> about less
 it would be my <unk> to advise clients not to sell to look for an opportunity to buy mr. ball told the brokers
 at merrill lynch & co. the nation 's biggest brokerage firm a news release was prepared <unk> merrill lynch comments on market drop
 the release cautioned that there are significant differences between the current environment and that of october N and that there are still attractive investment opportunities in the stock market
 however jeffrey b. lane president of shearson lehman hutton inc. said that friday 's plunge is going to set back relations with customers because it <unk> the concern of volatility
 and i think a lot of people will <unk> on program trading
 it 's going to bring the debate right back to the <unk>
 as the dow average ground to its final N loss friday the s&p pit stayed locked at its <unk> trading limit
 jeffrey <unk> of program trader <unk> investment group said N s&p contracts were for sale on the close the equivalent of $ N million in stock
 but there were no buyers
 while friday 's debacle involved mainly professional traders rather than investors it left the market vulnerable to continued selling this morning traders said
 stock-index futures contracts settled at much lower prices than indexes of the stock market itself
 at those levels stocks are set up to be <unk> by index arbitragers who lock in profits by buying futures when futures prices fall and simultaneously sell off stocks
 but nobody knows at what level the futures and stocks will open today
 the <unk> between the stock and futures markets friday will undoubtedly cause renewed debate about whether wall street is properly prepared for another crash situation
 the big board 's mr. <unk> said our <unk> performance was good
 but the exchange will look at the performance of all specialists in all stocks
 obviously we 'll take a close look at any situation in which we think the <unk> obligations were n't met he said
 see related story fed ready to <unk> big funds wsj oct. N N
 but specialists complain privately that just as in the N crash the <unk> firms big investment banks that support the market by trading big blocks of stock stayed on the sidelines during friday 's <unk>
 mr. phelan said it will take another day or two to analyze who was buying and selling friday
 concerning your sept. N page-one article on prince charles and the <unk> it 's a few hundred years since england has been a kingdom
 it 's now the united kingdom of great britain and northern ireland <unk> <unk> northern ireland scotland and oh yes england too
 just thought you 'd like to know
 george <unk>
 ports of call inc. reached agreements to sell its remaining seven aircraft to buyers that were n't disclosed
 the agreements bring to a total of nine the number of planes the travel company has sold this year as part of a restructuring
 the company said a portion of the $ N million realized from the sales will be used to repay its bank debt and other obligations resulting from the currently suspended <unk> operations
 earlier the company announced it would sell its aging fleet of boeing co. <unk> because of increasing maintenance costs
 a consortium of private investors operating as <unk> funding co. said it has made a $ N million cash bid for most of l.j. hooker corp. 's real-estate and <unk> holdings
 the $ N million bid includes the assumption of an estimated $ N million in secured liabilities on those properties according to those making the bid
 the group is led by jay <unk> chief executive officer of <unk> investment corp. in <unk> and a. boyd simpson chief executive of the atlanta-based simpson organization inc
 mr. <unk> 's company specializes in commercial real-estate investment and claims to have $ N billion in assets mr. simpson is a developer and a former senior executive of l.j. hooker
 the assets are good but they require more money and management than can be provided in l.j. hooker 's current situation said mr. simpson in an interview
 hooker 's philosophy was to build and sell
 we want to build and hold
 l.j. hooker based in atlanta is operating with protection from its creditors under chapter N of the u.s. bankruptcy code
 its parent company hooker corp. of sydney australia is currently being managed by a court-appointed provisional <unk>
 sanford <unk> chief executive of l.j. hooker said yesterday in a statement that he has not yet seen the bid but that he would review it and bring it to the attention of the creditors committee
 the $ N million bid is estimated by mr. simpson as representing N N of the value of all hooker real-estate holdings in the u.s.
 not included in the bid are <unk> teller or b. altman & co. l.j. hooker 's department-store chains
 the offer covers the massive N <unk> forest fair mall in cincinnati the N <unk> <unk> fashion mall in columbia s.c. and the N <unk> <unk> town center mall in <unk> <unk>
 the <unk> mall opened sept. N with a <unk> 's <unk> as its <unk> the columbia mall is expected to open nov. N
 other hooker properties included are a <unk> office tower in <unk> atlanta expected to be completed next february vacant land sites in florida and ohio l.j. hooker international the commercial real-estate brokerage company that once did business as merrill lynch commercial real estate plus other shopping centers
 the consortium was put together by <unk> <unk> the london-based investment banking company that is a subsidiary of security pacific corp
 we do n't anticipate any problems in raising the funding for the bid said <unk> campbell the head of mergers and acquisitions at <unk> <unk> in an interview
 <unk> <unk> is acting as the consortium 's investment bankers
 according to people familiar with the consortium the bid was <unk> project <unk> a reference to the film <unk> in which a <unk> played by actress <unk> <unk> is saved from a <unk> businessman by a police officer named john <unk>
 l.j. hooker was a small <unk> company based in atlanta in N when mr. simpson was hired to push it into commercial development
 the company grew modestly until N when a majority position in hooker corp. was acquired by australian developer george <unk> currently hooker 's chairman
 mr. <unk> <unk> to launch an ambitious but <unk> $ N billion acquisition binge that included <unk> teller and b. altman & co. as well as majority positions in merksamer jewelers a sacramento chain <unk> inc. the <unk> retailer and <unk> inc. the southeast department-store chain
 eventually mr. simpson and mr. <unk> had a falling out over the direction of the company and mr. simpson said he resigned in N
 since then hooker corp. has sold its interest in the <unk> chain back to <unk> 's management and is currently attempting to sell the b. altman & co. chain
 in addition robert <unk> chief executive of the <unk> chain is seeking funds to buy out the hooker interest in his company
 the merksamer chain is currently being offered for sale by first boston corp
 reached in <unk> mr. <unk> said that he believes the various hooker <unk> can become profitable with new management
 these are n't mature assets but they have the potential to be so said mr. <unk>
 managed properly and with a long-term outlook these can become investment-grade quality properties
 canadian <unk> production totaled N metric tons in the week ended oct. N up N N from the preceding week 's total of N tons statistics canada a federal agency said
 the week 's total was up N N from N tons a year earlier
 the <unk> total was N tons up N N from N tons a year earlier
 the treasury plans to raise $ N million in new cash thursday by selling about $ N billion of 52-week bills and <unk> $ N billion of maturing bills
 the bills will be dated oct. N and will mature oct. N N
 they will be available in minimum denominations of $ N
 bids must be received by N p.m. edt thursday at the treasury or at federal reserve banks or branches
 as small investors <unk> their mutual funds with phone calls over the weekend big fund managers said they have a strong defense against any wave of withdrawals cash
 unlike the weekend before black monday the funds were n't <unk> with heavy withdrawal requests
 and many fund managers have built up cash levels and say they will be buying stock this week
 at fidelity investments the nation 's largest fund company telephone volume was up sharply but it was still at just half the level of the weekend preceding black monday in N
 the boston firm said <unk> redemptions were running at less than one-third the level two years ago
 as of yesterday afternoon the redemptions represented less than N N of the total cash position of about $ N billion of fidelity 's stock funds
 two years ago there were massive redemption levels over the weekend and a lot of fear around said c. bruce <unk> who runs fidelity investments ' $ N billion <unk> fund
 this feels more like a <unk> deal
 people are n't <unk>
 the test may come today
 friday 's stock market sell-off came too late for many investors to act
 some shareholders have held off until today because any fund exchanges made after friday 's close would take place at today 's closing prices
 stock fund redemptions during the N debacle did n't begin to <unk> until after the market opened on black monday
 but fund managers say they 're ready
 many have raised cash levels which act as a buffer against steep market declines
 mario <unk> for instance holds cash positions well above N N in several of his funds
 windsor fund 's john <unk> and mutual series ' michael price said they had raised their cash levels to more than N N and N N respectively this year
 even peter lynch manager of fidelity 's $ N billion <unk> fund the nation 's largest stock fund built up cash to N N or $ N million
 one reason is that after two years of monthly net redemptions the fund posted net inflows of money from investors in august and september
 i 've let the money build up mr. lynch said who added that he has had trouble finding stocks he likes
 not all funds have raised cash levels of course
 as a group stock funds held N N of assets in cash as of august the latest figures available from the investment company institute
 that was modestly higher than the N N and N N levels in august and september of N
 also persistent redemptions would force some fund managers to dump stocks to raise cash
 but a strong level of investor withdrawals is much more unlikely this time around fund managers said
 a major reason is that investors already have sharply scaled back their purchases of stock funds since black monday
 <unk> sales have rebounded in recent months but monthly net purchases are still running at less than half N levels
 there 's not nearly as much <unk> said john <unk> chairman of vanguard group inc. a big valley forge pa. fund company
 many fund managers argue that now 's the time to buy
 vincent <unk> manager of the $ N billion wellington fund added to his positions in bristol-myers squibb woolworth and dun & bradstreet friday
 and today he 'll be looking to buy drug stocks like eli lilly pfizer and american home products whose dividend yields have been bolstered by stock declines
 fidelity 's mr. lynch for his part snapped up southern co. shares friday after the stock got <unk>
 if the market drops further today he said he 'll be buying blue chips such as bristol-myers and kellogg
 if they <unk> stocks like that he said it presents an opportunity that is the kind of thing you dream about
 major mutual-fund groups said phone calls were <unk> at twice the normal weekend pace yesterday
 but most investors were seeking share prices and other information
 trading volume was only modestly higher than normal
 still fund groups are n't taking any chances
 they hope to avoid the <unk> phone lines and other <unk> that <unk> some fund investors in october N
 fidelity on saturday opened its N <unk> investor centers across the country
 the centers normally are closed through the weekend
 in addition east coast centers will open at N edt this morning instead of the normal N
 t. rowe price associates inc. increased its staff of phone representatives to handle investor requests
 the <unk> group noted that some investors moved money from stock funds to money-market funds
 but most investors seemed to be in an information mode rather than in a transaction mode said steven <unk> a vice president
 and vanguard among other groups said it was adding more phone representatives today to help investors get through
 in an unusual move several funds moved to calm investors with <unk> on their <unk> phone lines
 we view friday 's market decline as offering us a buying opportunity as long-term investors a recording at <unk> & co. funds said over the weekend
 the <unk> group had a similar recording for investors
 several fund managers expect a rough market this morning before prices stabilize
 some early selling is likely to stem from investors and portfolio managers who want to lock in this year 's fat profits
 stock funds have averaged a staggering gain of N N through september according to lipper analytical services inc
 <unk> <unk> who runs shearson lehman hutton inc. 's $ N million sector analysis portfolio predicts the market will open down at least N points on technical factors and some panic selling
 but she expects prices to rebound soon and is telling investors she expects the stock market wo n't decline more than N N to N N from recent highs
 this is not a major crash she said
 nevertheless ms. <unk> said she was <unk> with phone calls over the weekend from nervous shareholders
 half of them are really scared and want to sell she said but i 'm trying to talk them out of it
 she added if they all were bullish i 'd really be upset
 the backdrop to friday 's slide was <unk> different from that of the october N crash fund managers argue
 two years ago unlike today the dollar was weak interest rates were rising and the market was very <unk> they say
 from the investors ' standpoint institutions and individuals learned a painful lesson by selling at the lows on black monday said stephen boesel manager of the $ N million t. rowe price growth and income fund
 this time i do n't think we 'll get a panic reaction
 newport corp. said it expects to report <unk> earnings of between N cents and N cents a share somewhat below analysts ' estimates of N cents to N cents
 the maker of scientific instruments and laser parts said orders fell below expectations in recent months
 a spokesman added that sales in the current quarter will about equal the <unk> quarter 's figure when newport reported net income of $ N million or N cents a share on $ N million in sales
 <unk> from the strike by N machinists union members against boeing co. reached air carriers friday as america west airlines announced it will postpone its new service out of houston because of delays in receiving aircraft from the seattle jet maker
 peter <unk> vice president for planning at the phoenix ariz. carrier said in an interview that the work <unk> at boeing now entering its 13th day has caused some turmoil in our scheduling and that more than N passengers who were booked to fly out of houston on america west would now be put on other airlines
 mr. <unk> said boeing told america west that the N it was supposed to get this thursday would n't be delivered until nov. N the day after the airline had been planning to <unk> service at houston with four daily flights including three <unk> to phoenix and one <unk> to las vegas
 now those routes are n't expected to begin until jan
 boeing is also supposed to send to america west another N <unk> aircraft as well as a N by year 's end
 those too are almost certain to arrive late
 at this point no other america west flights including its new service at san antonio texas newark n.j. and <unk> calif. have been affected by the delays in boeing deliveries
 nevertheless the company 's reaction <unk> the <unk> effect that a huge manufacturer such as boeing can have on other parts of the economy
 it also is sure to help the machinists put added pressure on the company
 i just do n't feel that the company can really stand or would want a prolonged <unk> tom baker president of machinists ' district N said in an interview yesterday
 i do n't think their customers would like it very much
 america west though is a smaller airline and therefore more affected by the delayed delivery of a single plane than many of its competitors would be
 i figure that american and united probably have such a hard time counting all the planes in their fleets they might not miss one at all mr. <unk> said
 indeed a random check friday did n't seem to indicate that the strike was having much of an effect on other airline operations
 southwest airlines has a boeing N set for delivery at the end of this month and expects to have the plane on time
 it 's so close to completion boeing 's told us there wo n't be a problem said a southwest spokesman
 a spokesman for amr corp. said boeing has assured american airlines it will deliver a N on time later this month
 american is preparing to take delivery of another N in early december and N more next year and is n't anticipating any changes in that timetable
 in seattle a boeing spokesman explained that the company has been in constant communication with all of its customers and that it was impossible to predict what further disruptions might be triggered by the strike
 meanwhile supervisors and <unk> employees have been trying to finish some N aircraft mostly N and N jumbo jets at the company 's <unk> wash. plant that were all but completed before the <unk>
 as of friday four had been delivered and a fifth plane a N was supposed to be <unk> out over the weekend to air china
 no date has yet been set to get back to the bargaining table
 we want to make sure they know what they want before they come back said doug hammond the federal mediator who has been in contact with both sides since the strike began
 the investment community for one has been anticipating a <unk> resolution
 though boeing 's stock price was battered along with the rest of the market friday it actually has risen over the last two weeks on the strength of new orders
 the market has taken two views that the labor situation will get settled in the short term and that things look very <unk> for boeing in the long term said howard <unk> an analyst at <unk> j. lawrence inc
 boeing 's shares fell $ N friday to close at $ N in composite trading on the new york stock exchange
 but mr. baker said he thinks the earliest a pact could be struck would be the end of this month <unk> that the company and union may resume negotiations as early as this week
 still he said it 's possible that the strike could last considerably longer
 i would n't expect an immediate resolution to anything
 last week boeing chairman frank <unk> sent striking workers a letter saying that to my knowledge boeing 's offer represents the best overall three-year contract of any major u.s. industrial firm in recent history
 but mr. baker called the letter and the company 's offer of a N N wage increase over the life of the pact plus bonuses very weak
 he added that the company <unk> the union 's resolve and the workers ' <unk> with being forced to work many hours overtime
 in separate developments talks have broken off between machinists representatives at lockheed corp. and the <unk> calif. aerospace company
 the union is continuing to work through its expired contract however
 it had planned a strike vote for next sunday but that has been pushed back indefinitely
 united auto workers local N which represents N workers at boeing 's helicopter unit in delaware county pa. said it agreed to extend its contract on a <unk> basis with a <unk> notification to cancel while it continues bargaining
 the accord expired yesterday
 and boeing on friday said it received an order from <unk> <unk> for four model N <unk> <unk> valued at a total of about $ N million
 the planes long range versions of the <unk> <unk> will be delivered with <unk> & <unk> <unk> engines
 <unk> & <unk> is a unit of united technologies inc
 <unk> <unk> is based in amsterdam
 a boeing spokeswoman said a delivery date for the planes is still being worked out for a variety of reasons but not because of the strike
 <unk> <unk> contributed to this article
 <unk> ltd. said its utilities arm is considering building new electric power plants some valued at more than one billion canadian dollars us$ N million in great britain and elsewhere
 <unk> <unk> <unk> 's senior vice president finance said its <unk> canadian utilities ltd. unit is reviewing <unk> projects in eastern canada and conventional electric power generating plants elsewhere including britain where the british government plans to allow limited competition in electrical generation from private-sector suppliers as part of its privatization program
 the projects are big
 they can be c$ N billion plus mr. <unk> said
 but we would n't go into them alone and canadian utilities ' equity stake would be small he said
 <unk> we 'd like to be the operator of the project and a modest equity investor
 our long suit is our proven ability to operate power plants he said
 mr. <unk> would n't offer <unk> regarding <unk> 's proposed british project but he said it would compete for customers with two huge british power generating companies that would be formed under the country 's plan to <unk> its massive water and electric utilities
 britain 's government plans to raise about # N billion $ N billion from the sale of most of its giant water and electric utilities beginning next month
 the planned electric utility sale scheduled for next year is alone expected to raise # N billion making it the world 's largest public offering
 under terms of the plan independent <unk> would be able to compete for N N of customers until N and for another N N between N and N
 canadian utilities had N revenue of c$ N billion mainly from its natural gas and electric utility businesses in alberta where the company serves about N customers
 there seems to be a move around the world to <unk> the generation of electricity mr. <unk> said and canadian utilities hopes to capitalize on it
 this is a real thrust on our utility side he said adding that canadian utilities is also <unk> projects in <unk> countries though he would be specific
 canadian utilities is n't alone in exploring power generation opportunities in britain in anticipation of the privatization program
 we 're certainly looking at some power generating projects in england said bruce <unk> vice president corporate strategy and corporate planning with enron corp. houston a big natural gas producer and pipeline operator
 mr. <unk> said enron is considering building <unk> power plants in the u.k. capable of producing about N <unk> of power at a cost of about $ N million to $ N million
 pse inc. said it expects to report third earnings of $ N million to $ N million or N cents to N cents a share
 in the year-ago quarter the designer and operator of <unk> and waste heat recovery plants had net income of $ N or four cents a share on revenue of about $ N million
 the company said the improvement is related to additional <unk> facilities that have been put into operation
 <unk> <unk> flights are $ N to paris and $ N to london
 in a centennial journal article oct. N the fares were reversed
 diamond <unk> offshore partners said it had discovered gas offshore louisiana
 the well <unk> at a rate of N million cubic feet of gas a day through a N <unk> opening at <unk> between N and N feet
 diamond <unk> is the operator with a N N interest in the well
 diamond <unk> offshore 's stock rose N cents friday to close at $ N in new york stock exchange composite trading
 <unk> & broad home corp. said it formed a $ N million limited partnership subsidiary to buy land in california suitable for residential development
 the partnership <unk> & broad land development venture limited partnership is a N joint venture with a trust created by institutional clients of <unk> advisory corp. a unit of <unk> financial corp. a real estate advisory management and development company with offices in chicago and beverly hills calif
 <unk> & broad a home building company declined to identify the institutional investors
 the land to be purchased by the joint venture has n't yet received <unk> and other approvals required for development and part of <unk> & broad 's job will be to obtain such approvals
 the partnership runs the risk that it may not get the approvals for development but in return it can buy land at wholesale rather than retail prices which can result in sizable savings said bruce <unk> president and chief executive officer of <unk> & broad
 there are really very few companies that have adequate capital to buy properties in a raw state for cash
 typically developers option property and then once they get the administrative approvals they buy it said mr. <unk> adding that he believes the joint venture is the first of its kind
 we usually operate in that conservative manner
 by setting up the joint venture <unk> & broad can take the more aggressive approach of buying raw land while avoiding the negative <unk> to its own balance sheet mr. <unk> said
 the company is putting up only N N of the capital although it is responsible for providing management planning and processing services to the joint venture
 this is one of the best ways to assure a pipeline of land to fuel our growth at a minimum risk to our company mr. <unk> said
 when the price of plastics took off in N quantum chemical corp. went along for the ride
 the timing of quantum 's chief executive officer john <unk> <unk> appeared to be nothing less than inspired because he had just increased quantum 's reliance on plastics
 the company <unk> much of the chemical industry as annual profit grew <unk> in two years
 mr. <unk> said of the boom it 's going to last a whole lot longer than anybody thinks
 but now prices have <unk> and quantum 's profit is <unk>
 some securities analysts are looking for no better than break-even results from the company for the third quarter compared with year-earlier profit of $ N million or $ N a share on sales of $ N million
 the stock having lost nearly a quarter of its value since sept. N closed at $ N share down $ N in new york stock exchange composite trading friday
 to a degree quantum represents the new times that have arrived for producers of the so-called commodity plastics that <unk> modern life
 having just passed through one of the most profitable periods in their history these producers now see their prices eroding
 pricing cycles to be sure are nothing new for plastics producers
 and the financial decline of some looks steep only in comparison with the <unk> period that is just behind them
 we were all wonderful heroes last year says an executive at one of quantum 's competitors
 now we 're at the bottom of the <unk>
 at quantum which is based in new york the trouble is magnified by the company 's heavy <unk> on plastics
 once known as national <unk> & chemical corp. the company <unk> the wine and spirits business and <unk> more of its resources into plastics after mr. <unk> took the chief executive 's job in N
 mr. <unk> N years old declined to be interviewed for this article but he has consistently argued that over the long haul across both the <unk> and the <unk> of the plastics market quantum will <unk> through its new direction
 quantum 's lot is mostly tied to polyethylene <unk> used to make garbage bags milk <unk> <unk> toys and meat packaging among other items
 in the u.s. polyethylene market quantum has claimed the largest share about N N
 but its competitors including dow chemical co. union carbide corp. and several oil giants have much broader business interests and so are better <unk> against price swings
 when the price of polyethylene moves a mere penny a pound quantum 's annual profit <unk> by about N cents a share provided no other <unk> are changing
 in recent months the price of polyethylene even more than that of other commodity plastics has taken a dive
 benchmark grades which still sold for as much as N cents a pound last spring have skidded to between N cents and N cents
 meanwhile the price of <unk> the chemical building block of polyethylene has n't dropped nearly so fast
 that <unk> <unk> quantum badly because its own plants cover only about half of its <unk> needs
 by many accounts an early hint of a price rout in the making came at the start of this year
 china which had been putting in huge orders for polyethylene abruptly halted them
 <unk> that excess polyethylene would soon be <unk> around the world other buyers then bet that prices had peaked and so began to draw down inventories rather than order new product
 kenneth mitchell director of dow 's polyethylene business says producers were surprised to learn how much inventories had swelled throughout the distribution chain as prices <unk> up
 people were even <unk> bags he says
 now producers hope prices have hit bottom
 they recently announced increases of a few cents a pound to take effect in the next several weeks
 no one knows however whether the new posted prices will stick once producers and customers start to <unk>
 one <unk> is george <unk> a <unk> analyst at oppenheimer & co. and a bear on plastics stocks
 noting others ' estimates of when price increases can be sustained he remarks some say october
 some say november
 i say N
 he argues that efforts to firm up prices will be undermined by producers ' plans to expand production capacity
 a quick turnaround is crucial to quantum because its cash requirements remain heavy
 the company is trying to carry out a three-year $ N billion <unk> program started this year
 at the same time its annual payments on long-term debt will more than double from a year ago to about $ N million largely because of debt taken on to pay a $ <unk> special dividend earlier this year
 quantum described the payout at the time as a way for it to share the <unk> with its holders because its stock price was n't reflecting the huge profit increases
 some analysts saw the payment as an effort also to <unk> takeover speculation
 whether a cash crunch might eventually force the company to cut its quarterly dividend raised N N to N cents a share only a year ago has become a topic of intense speculation on wall street since mr. <unk> <unk> dividend questions in a sept. N meeting with analysts
 some viewed his response that company directors review the dividend regularly as nothing more than the standard line from executives
--- a/reproduction/Char-aware_NLM/train.py
+++ b/reproduction/Char-aware_NLM/train.py
@@ -1,263 +0,0 @@
 import os
 from collections import namedtuple
 import numpy as np
 import torch.optim as optim
 from .model import charLM
 from .test import test
 from .utilities import *
 def preprocess():
    word_dict, char_dict = create_word_char_dict("charlm.txt", "train.txt", "test.txt")
    num_words = len(word_dict)
    num_char = len(char_dict)
    char_dict["BOW"] = num_char + 1
    char_dict["EOW"] = num_char + 2
    char_dict["PAD"] = 0
    #  dict of (int, string)
    reverse_word_dict = {value: key for key, value in word_dict.items()}
    max_word_len = max([len(word) for word in word_dict])
    objects = {
        "word_dict": word_dict,
        "char_dict": char_dict,
        "reverse_word_dict": reverse_word_dict,
        "max_word_len": max_word_len
    }
    torch.save(objects, "cache/prep.pt")
    print("Preprocess done.")
 def to_var(x):
    if torch.cuda.is_available():
        x = x.cuda()
    return Variable(x)
 def train(net, data, opt):
    """
    :param net: the pytorch models
    :param data: numpy array
    :param opt: named tuple
    1. random seed
    2. define local input
    3. training settting: learning rate, loss, etc
    4. main loop epoch
    5. batchify
    6. validation
    7. save models
    """
    torch.manual_seed(1024)
    train_input = torch.from_numpy(data.train_input)
    train_label = torch.from_numpy(data.train_label)
    valid_input = torch.from_numpy(data.valid_input)
    valid_label = torch.from_numpy(data.valid_label)
    # [num_seq, seq_len, max_word_len+2]
    num_seq = train_input.size()[0] // opt.lstm_seq_len
    train_input = train_input[:num_seq * opt.lstm_seq_len, :]
    train_input = train_input.view(-1, opt.lstm_seq_len, opt.max_word_len + 2)
    num_seq = valid_input.size()[0] // opt.lstm_seq_len
    valid_input = valid_input[:num_seq * opt.lstm_seq_len, :]
    valid_input = valid_input.view(-1, opt.lstm_seq_len, opt.max_word_len + 2)
    num_epoch = opt.epochs
    num_iter_per_epoch = train_input.size()[0] // opt.lstm_batch_size
    learning_rate = opt.init_lr
    old_PPL = 100000
    best_PPL = 100000
    # Log-SoftMax
    criterion = nn.CrossEntropyLoss()
    # word_emb_dim == hidden_size / num of hidden units 
    hidden = (to_var(torch.zeros(2, opt.lstm_batch_size, opt.word_embed_dim)),
              to_var(torch.zeros(2, opt.lstm_batch_size, opt.word_embed_dim)))
    for epoch in range(num_epoch):
        ################  Validation  ####################
        net.eval()
        loss_batch = []
        PPL_batch = []
        iterations = valid_input.size()[0] // opt.lstm_batch_size
        valid_generator = batch_generator(valid_input, opt.lstm_batch_size)
        vlabel_generator = batch_generator(valid_label, opt.lstm_batch_size * opt.lstm_seq_len)
        for t in range(iterations):
            batch_input = valid_generator.__next__()
            batch_label = vlabel_generator.__next__()
            hidden = [state.detach() for state in hidden]
            valid_output, hidden = net(to_var(batch_input), hidden)
            length = valid_output.size()[0]
            # [num_sample-1, len(word_dict)] vs [num_sample-1]
            valid_loss = criterion(valid_output, to_var(batch_label))
            PPL = torch.exp(valid_loss.data)
            loss_batch.append(float(valid_loss))
            PPL_batch.append(float(PPL))
        PPL = np.mean(PPL_batch)
        print("[epoch {}] valid PPL={}".format(epoch, PPL))
        print("valid loss={}".format(np.mean(loss_batch)))
        print("PPL decrease={}".format(float(old_PPL - PPL)))
        # Preserve the best models
        if best_PPL > PPL:
            best_PPL = PPL
            torch.save(net.state_dict(), "cache/models.pt")
            torch.save(net, "cache/net.pkl")
        # Adjust the learning rate
        if float(old_PPL - PPL) <= 1.0:
            learning_rate /= 2
            print("halved lr:{}".format(learning_rate))
        old_PPL = PPL
        ##################################################
        #################### Training ####################
        net.train()
        optimizer = optim.SGD(net.parameters(),
                              lr=learning_rate,
                              momentum=0.85)
        # split the first dim
        input_generator = batch_generator(train_input, opt.lstm_batch_size)
        label_generator = batch_generator(train_label, opt.lstm_batch_size * opt.lstm_seq_len)
        for t in range(num_iter_per_epoch):
            batch_input = input_generator.__next__()
            batch_label = label_generator.__next__()
            # detach hidden state of LSTM from last batch
            hidden = [state.detach() for state in hidden]
            output, hidden = net(to_var(batch_input), hidden)
            # [num_word, vocab_size]
            loss = criterion(output, to_var(batch_label))
            net.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm(net.parameters(), 5, norm_type=2)
            optimizer.step()
            if (t + 1) % 100 == 0:
                print("[epoch {} step {}] train loss={}, Perplexity={}".format(epoch + 1,
                                                                               t + 1, float(loss.data),
                                                                               float(np.exp(loss.data))))
    torch.save(net.state_dict(), "cache/models.pt")
    print("Training finished.")
 ################################################################
 if __name__ == "__main__":
    word_embed_dim = 300
    char_embedding_dim = 15
    if os.path.exists("cache/prep.pt") is False:
        preprocess()
    objetcs = torch.load("cache/prep.pt")
    word_dict = objetcs["word_dict"]
    char_dict = objetcs["char_dict"]
    reverse_word_dict = objetcs["reverse_word_dict"]
    max_word_len = objetcs["max_word_len"]
    num_words = len(word_dict)
    print("word/char dictionary built. Start making inputs.")
    if os.path.exists("cache/data_sets.pt") is False:
        train_text = read_data("./train.txt")
        valid_text = read_data("./charlm.txt")
        test_text = read_data("./test.txt")
        train_set = np.array(text2vec(train_text, char_dict, max_word_len))
        valid_set = np.array(text2vec(valid_text, char_dict, max_word_len))
        test_set = np.array(text2vec(test_text, char_dict, max_word_len))
        # Labels are next-word index in word_dict with the same length as inputs
        train_label = np.array([word_dict[w] for w in train_text[1:]] + [word_dict[train_text[-1]]])
        valid_label = np.array([word_dict[w] for w in valid_text[1:]] + [word_dict[valid_text[-1]]])
        test_label = np.array([word_dict[w] for w in test_text[1:]] + [word_dict[test_text[-1]]])
        category = {"tdata": train_set, "vdata": valid_set, "test": test_set,
                    "trlabel": train_label, "vlabel": valid_label, "tlabel": test_label}
        torch.save(category, "cache/data_sets.pt")
    else:
        data_sets = torch.load("cache/data_sets.pt")
        train_set = data_sets["tdata"]
        valid_set = data_sets["vdata"]
        test_set = data_sets["test"]
        train_label = data_sets["trlabel"]
        valid_label = data_sets["vlabel"]
        test_label = data_sets["tlabel"]
    DataTuple = namedtuple("DataTuple",
                           "train_input train_label valid_input valid_label test_input test_label")
    data = DataTuple(train_input=train_set,
                     train_label=train_label,
                     valid_input=valid_set,
                     valid_label=valid_label,
                     test_input=test_set,
                     test_label=test_label)
    print("Loaded data sets. Start building network.")
    USE_GPU = True
    cnn_batch_size = 700
    lstm_seq_len = 35
    lstm_batch_size = 20
    # cnn_batch_size == lstm_seq_len * lstm_batch_size
    net = charLM(char_embedding_dim,
                 word_embed_dim,
                 num_words,
                 len(char_dict),
                 use_gpu=USE_GPU)
    for param in net.parameters():
        nn.init.uniform(param.data, -0.05, 0.05)
    Options = namedtuple("Options", [
        "cnn_batch_size", "init_lr", "lstm_seq_len",
        "max_word_len", "lstm_batch_size", "epochs",
        "word_embed_dim"])
    opt = Options(cnn_batch_size=lstm_seq_len * lstm_batch_size,
                  init_lr=1.0,
                  lstm_seq_len=lstm_seq_len,
                  max_word_len=max_word_len,
                  lstm_batch_size=lstm_batch_size,
                  epochs=35,
                  word_embed_dim=word_embed_dim)
    print("Network built. Start training.")
    # You can stop training anytime by "ctrl+C"
    try:
        train(net, data, opt)
    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early')
    torch.save(net, "cache/net.pkl")
    print("save net")
    test(net, data, opt)
--- a/reproduction/Char-aware_NLM/train.txt
+++ b/reproduction/Char-aware_NLM/train.txt
@@ -1,360 +0,0 @@
 aer banknote berlitz calloway centrust cluett fromstein gitano guterman hydro-quebec ipo kia memotec mlx nahb punts rake regatta rubens sim snack-food ssangyong swapo wachter
 pierre <unk> N years old will join the board as a nonexecutive director nov. N
 mr. <unk> is chairman of <unk> n.v. the dutch publishing group
 rudolph <unk> N years old and former chairman of consolidated gold fields plc was named a nonexecutive director of this british industrial conglomerate
 a form of asbestos once used to make kent cigarette filters has caused a high percentage of cancer deaths among a group of workers exposed to it more than N years ago researchers reported
 the asbestos fiber <unk> is unusually <unk> once it enters the <unk> with even brief exposures to it causing symptoms that show up decades later researchers said
 <unk> inc. the unit of new york-based <unk> corp. that makes kent cigarettes stopped using <unk> in its <unk> cigarette filters in N
 although preliminary findings were reported more than a year ago the latest results appear in today 's new england journal of medicine a forum likely to bring new attention to the problem
 a <unk> <unk> said this is an old story
 we 're talking about years ago before anyone heard of asbestos having any questionable properties
 there is no asbestos in our products now
 neither <unk> nor the researchers who studied the workers were aware of any research on smokers of the kent cigarettes
 we have no useful information on whether users are at risk said james a. <unk> of boston 's <unk> cancer institute
 dr. <unk> led a team of researchers from the national cancer institute and the medical schools of harvard university and boston university
 the <unk> spokeswoman said asbestos was used in very modest amounts in making paper for the filters in the early 1950s and replaced with a different type of <unk> in N
 from N to N N billion kent cigarettes with the filters were sold the company said
 among N men who worked closely with the substance N have died more than three times the expected number
 four of the five surviving workers have <unk> diseases including three with recently <unk> cancer
 the total of N deaths from malignant <unk> lung cancer and <unk> was far higher than expected the researchers said
 the <unk> rate is a striking finding among those of us who study <unk> diseases said dr. <unk>
 the percentage of lung cancer deaths among the workers at the west <unk> mass. paper factory appears to be the highest for any asbestos workers studied in western industrialized countries he said
 the plant which is owned by <unk> & <unk> co. was under contract with <unk> to make the cigarette filters
 the finding probably will support those who argue that the u.s. should regulate the class of asbestos including <unk> more <unk> than the common kind of asbestos <unk> found in most schools and other buildings dr. <unk> said
 the u.s. is one of the few industrialized nations that does n't have a higher standard of regulation for the smooth <unk> fibers such as <unk> that are classified as <unk> according to <unk> t. <unk> a professor of <unk> at the university of vermont college of medicine
 more common <unk> fibers are <unk> and are more easily rejected by the body dr. <unk> explained
 in july the environmental protection agency imposed a gradual ban on virtually all uses of asbestos
 by N almost all remaining uses of <unk> asbestos will be outlawed
 about N workers at a factory that made paper for the kent filters were exposed to asbestos in the 1950s
 areas of the factory were particularly dusty where the <unk> was used
 workers dumped large <unk> <unk> of the imported material into a huge <unk> poured in cotton and <unk> fibers and <unk> mixed the dry fibers in a process used to make filters
 workers described clouds of blue dust that hung over parts of the factory even though <unk> fans <unk> the area
 there 's no question that some of those workers and managers contracted <unk> diseases said <unk> phillips vice president of human resources for <unk> & <unk>
 but you have to recognize that these events took place N years ago
 it has no bearing on our work force today
 yields on money-market mutual funds continued to slide amid signs that portfolio managers expect further declines in interest rates
 the average seven-day compound yield of the N taxable funds tracked by <unk> 's money fund report eased a fraction of a percentage point to N N from N N for the week ended tuesday
 compound yields assume reinvestment of dividends and that the current yield continues for a year
 average maturity of the funds ' investments <unk> by a day to N days the longest since early august according to donoghue 's
 longer maturities are thought to indicate declining interest rates because they permit portfolio managers to retain relatively higher rates for a longer period
 shorter maturities are considered a sign of rising rates because portfolio managers can capture higher rates sooner
 the average maturity for funds open only to institutions considered by some to be a stronger indicator because those managers watch the market closely reached a high point for the year N days
 nevertheless said <unk> <unk> <unk> editor of money fund report yields may <unk> up again before they <unk> down because of recent rises in short-term interest rates
 the yield on six-month treasury bills sold at monday 's auction for example rose to N N from N N
 despite recent declines in yields investors continue to pour cash into money funds
 assets of the N taxable funds grew by $ N billion during the latest week to $ N billion
 typically money-fund yields beat comparable short-term investments because portfolio managers can vary maturities and go after the highest rates
 the top money funds are currently yielding well over N N
 dreyfus world-wide dollar the <unk> fund had a seven-day compound yield of N N during the latest week down from N N a week earlier
 it invests heavily in dollar-denominated securities overseas and is currently <unk> management fees which boosts its yield
 the average seven-day simple yield of the N funds was N N down from N N
 the 30-day simple yield fell to an average N N from N N the 30-day compound yield slid to an average N N from N N
 j.p. <unk> vice chairman of <unk> grace & co. which holds a N N interest in this <unk> company was elected a director
 he succeeds <unk> d. <unk> formerly a <unk> grace vice chairman who resigned
 <unk> grace holds three of grace energy 's seven board seats
 pacific first financial corp. said shareholders approved its acquisition by royal <unk> ltd. of toronto for $ N a share or $ N million
 the thrift holding company said it expects to obtain regulatory approval and complete the transaction by year-end
 <unk> international inc. said its <unk> & <unk> unit completed the sale of its <unk> controls operations to <unk> s.p a. for $ N million
 <unk> is an italian state-owned holding company with interests in the mechanical engineering industry
 <unk> controls based in <unk> ohio makes computerized industrial controls systems
 it employs N people and has annual revenue of about $ N million
 the federal government suspended sales of u.s. savings bonds because congress has n't lifted the ceiling on government debt
 until congress acts the government has n't any authority to issue new debt obligations of any kind the treasury said
 the government 's borrowing authority dropped at midnight tuesday to $ N trillion from $ N trillion
 legislation to lift the debt ceiling is <unk> in the fight over cutting capital-gains taxes
 the house has voted to raise the ceiling to $ N trillion but the senate is n't expected to act until next week at the earliest
 the treasury said the u.s. will default on nov. N if congress does n't act by then
 clark j. <unk> was named senior vice president and general manager of this u.s. sales and marketing arm of japanese auto maker mazda motor corp
 in the new position he will oversee mazda 's u.s. sales service parts and marketing operations
 previously mr. <unk> N years old was general marketing manager of chrysler corp. 's chrysler division
 he had been a sales and marketing executive with chrysler for N years
 when it 's time for their <unk> <unk> the nation 's manufacturing <unk> typically jet off to the <unk> <unk> of resort towns like <unk> <unk> and hot springs
 not this year
 the national association of manufacturers settled on the <unk> capital of indianapolis for its fall board meeting
 and the city decided to treat its guests more like royalty or rock stars than factory owners
 the idea of course to prove to N corporate decision makers that the buckle on the <unk> belt is n't so <unk> after all that it 's a good place for a company to expand
 on the receiving end of the message were officials from giants like du pont and <unk> along with lesser <unk> like <unk> steel and the valley queen <unk> factory
 for <unk> the executives joined mayor william h. <unk> iii for an evening of the indianapolis <unk> <unk> and a guest <unk> victor <unk>
 champagne and <unk> followed
 the next morning with a police <unk> <unk> of executives and their wives <unk> to the indianapolis motor <unk> <unk> by traffic or red lights
 the governor could n't make it so the <unk> governor welcomed the special guests
 a buffet breakfast was held in the museum where food and drinks are banned to everyday visitors
 then in the guests ' honor the <unk> <unk> out four drivers crews and even the official indianapolis N announcer for a <unk> exhibition race
 after the race fortune N executives <unk> like <unk> over the cars and drivers
 no <unk> the drivers pointed out they still had space on their machines for another sponsor 's name or two
 back downtown the <unk> squeezed in a few meetings at the hotel before <unk> the buses again
 this time it was for dinner and <unk> a block away
 under the stars and <unk> of the <unk> indiana <unk> <unk> nine of the hottest chefs in town fed them indiana <unk> <unk> <unk> <unk> <unk> <unk> and <unk> <unk> with a <unk> <unk>
 knowing a <unk> and free <unk> when they eat one the executives gave the chefs a standing <unk>
 more than a few <unk> say the <unk> treatment <unk> them to return to a <unk> city for future meetings
 but for now they 're looking forward to their winter meeting <unk> in february
 south korea registered a trade deficit of $ N million in october reflecting the country 's economic <unk> according to government figures released wednesday
 preliminary <unk> by the trade and industry ministry showed another trade deficit in october the fifth monthly setback this year casting a cloud on south korea 's <unk> economy
 exports in october stood at $ N billion a mere N N increase from a year earlier while imports increased sharply to $ N billion up N N from last october
 south korea 's economic boom which began in N stopped this year because of prolonged labor disputes trade conflicts and sluggish exports
 government officials said exports at the end of the year would remain under a government target of $ N billion
 despite the gloomy forecast south korea has recorded a trade surplus of $ N million so far this year
 from january to october the nation 's accumulated exports increased N N from the same period last year to $ N billion
 imports were at $ N billion up N N
 newsweek trying to keep pace with rival time magazine announced new advertising rates for N and said it will introduce a new incentive plan for advertisers
 the new ad plan from newsweek a unit of the washington post co. is the second incentive plan the magazine has offered advertisers in three years
 plans that give advertisers discounts for maintaining or increasing ad spending have become permanent <unk> at the news <unk> and underscore the fierce competition between newsweek time warner inc. 's time magazine and <unk> b. <unk> 's u.s. news & world report
 alan <unk> recently named newsweek president said newsweek 's ad rates would increase N N in january
 a full <unk> page in newsweek will cost $ N
 in mid-october time magazine lowered its guaranteed circulation rate base for N while not increasing ad page rates with a lower circulation base time 's ad rate will be effectively N N higher per subscriber a full page in time costs about $ N
 u.s. news has yet to announce its N ad rates
 newsweek said it will introduce the circulation credit plan which <unk> space credits to advertisers on renewal advertising
 the magazine will reward with page bonuses advertisers who in N meet or exceed their N spending as long as they spent $ N in N and $ N in N
 mr. <unk> said the plan is not an attempt to shore up a decline in ad pages in the first nine months of N newsweek 's ad pages totaled N a drop of N N from last year according to publishers information bureau
 what matters is what advertisers are paying per page and in that department we are doing fine this fall said mr. <unk>
 both newsweek and u.s. news have been gaining circulation in recent years without heavy use of electronic <unk> to subscribers such as telephones or watches
 however none of the big three <unk> recorded circulation gains recently
 according to audit bureau of <unk> time the largest <unk> had average circulation of N a decrease of N N
 newsweek 's circulation for the first six months of N was N flat from the same period last year
 u.s. news ' circulation in the same time was N down N N
 new england electric system bowed out of the bidding for public service co. of new hampshire saying that the risks were too high and the potential <unk> too far in the future to justify a higher offer
 the move leaves united illuminating co. and northeast utilities as the remaining outside bidders for ps of new hampshire which also has proposed an internal reorganization plan in chapter N bankruptcy proceedings under which it would remain an independent company
 new england electric based in <unk> mass. had offered $ N billion to acquire ps of new hampshire well below the $ N billion value united illuminating places on its bid and the $ N billion northeast says its bid is worth
 united illuminating is based in new haven conn. and northeast is based in hartford conn
 ps of new hampshire <unk> n.h. values its internal reorganization plan at about $ N billion
 john rowe president and chief executive officer of new england electric said the company 's return on equity could suffer if it made a higher bid and its forecasts related to ps of new hampshire such as growth in electricity demand and improved operating <unk> did n't come true
 when we <unk> raising our bid the risks seemed substantial and persistent over the next five years and the rewards seemed a long way out
 that got hard to take he added
 mr. rowe also noted that political concerns also worried new england electric
 no matter who owns ps of new hampshire after it emerges from bankruptcy proceedings its rates will be among the highest in the nation he said
 that attracts attention
 it was just another one of the risk factors that led to the company 's decision to withdraw from the bidding he added
 wilbur ross jr. of rothschild inc. the financial adviser to the troubled company 's equity holders said the withdrawal of new england electric might speed up the reorganization process
 the fact that new england proposed lower rate increases N N over seven years against around N N boosts proposed by the other two outside bidders complicated negotiations with state officials mr. ross asserted
 now the field is less <unk> he added
 separately the federal energy regulatory commission turned down for now a request by northeast seeking approval of its possible purchase of ps of new hampshire
 northeast said it would <unk> its request and still hopes for an <unk> review by the ferc so that it could complete the purchase by next summer if its bid is the one approved by the bankruptcy court
 ps of new hampshire shares closed yesterday at $ N off N cents in new york stock exchange composite trading
 norman <unk> N years old and former president and chief operating officer of toys r us inc. and frederick <unk> jr. N chairman of <unk> banking corp. were elected directors of this consumer electronics and appliances retailing chain
 they succeed daniel m. <unk> retired circuit city executive vice president and robert r. <unk> u.s. treasury undersecretary on the <unk> board
 commonwealth edison co. was ordered to refund about $ N million to its current and former <unk> for illegal rates collected for cost overruns on a nuclear power plant
 the refund was about $ N million more than previously ordered by the illinois commerce commission and trade groups said it may be the largest ever required of a state or local utility
 state court judge richard curry ordered edison to make average refunds of about $ N to $ N each to edison customers who have received electric service since april N including about two million customers who have moved during that period
 judge curry ordered the refunds to begin feb. N and said that he would n't <unk> any appeals or other attempts to block his order by commonwealth edison
 the refund pool may not be held <unk> through another round of appeals judge curry said
 commonwealth edison said it is already appealing the underlying commission order and is considering appealing judge curry 's order
 the exact amount of the refund will be determined next year based on actual <unk> made until dec. N of this year
 commonwealth edison said the ruling could force it to slash its N earnings by $ N a share
 for N commonwealth edison reported earnings of $ N million or $ N a share
 a commonwealth edison spokesman said that tracking down the two million customers whose addresses have changed during the past N N years would be an administrative nightmare
 in new york stock exchange composite trading yesterday commonwealth edison closed at $ N down N cents
 the $ N billion <unk> N plant near <unk> ill. was completed in N
 in a disputed N ruling the commerce commission said commonwealth edison could raise its electricity rates by $ N million to pay for the plant
 but state courts upheld a challenge by consumer groups to the commission 's rate increase and found the rates illegal
 the illinois supreme court ordered the commission to audit commonwealth edison 's construction expenses and refund any <unk> expenses
 the utility has been collecting for the plant 's construction cost from its N million customers subject to a refund since N
 in august the commission ruled that between $ N million and $ N million of the plant 's construction cost was <unk> and should be <unk> plus interest
 in his ruling judge curry added an additional $ N million to the commission 's calculations
 last month judge curry set the interest rate on the refund at N N
 commonwealth edison now faces an additional <unk> refund on its <unk> rate <unk> <unk> that the illinois appellate court has estimated at $ N million
 and consumer groups hope that judge curry 's <unk> N order may set a precedent for a second nuclear rate case involving commonwealth edison 's <unk> N plant
 commonwealth edison is seeking about $ N million in rate increases to pay for <unk> N
 the commission is expected to rule on the <unk> N case by year end
 last year commonwealth edison had to refund $ N million for poor performance of its <unk> i nuclear plant
 japan 's domestic sales of cars trucks and buses in october rose N N from a year earlier to N units a record for the month the japan automobile dealers ' association said
 the strong growth followed year-to-year increases of N N in august and N N in september
 the monthly sales have been setting records every month since march
 october sales compared with the previous month inched down N N
 sales of passenger cars grew N N from a year earlier to N units
 sales of medium-sized cars which benefited from price reductions arising from introduction of the consumption tax more than doubled to N units from N in october N
 texas instruments japan ltd. a unit of texas instruments inc. said it opened a plant in south korea to manufacture control devices
 the new plant located in <unk> about N miles from seoul will help meet increasing and diversifying demand for control products in south korea the company said
 the plant will produce control devices used in motor vehicles and household appliances
 the survival of spinoff cray computer corp. as a fledgling in the supercomputer business appears to depend heavily on the creativity and <unk> of its chairman and chief designer seymour cray
 not only is development of the new company 's initial machine tied directly to mr. cray so is its balance sheet
 documents filed with the securities and exchange commission on the pending spinoff disclosed that cray research inc. will withdraw the almost $ N million in financing it is providing the new firm if mr. cray leaves or if the <unk> project he heads is scrapped
 the documents also said that although the <unk> mr. cray has been working on the project for more than six years the cray-3 machine is at least another year away from a fully operational prototype
 moreover there have been no orders for the cray-3 so far though the company says it is talking with several prospects
 while many of the risks were anticipated when <unk> cray research first announced the spinoff in may the <unk> it attached to the financing had n't been made public until yesterday
 we did n't have much of a choice cray computer 's chief financial officer gregory <unk> said in an interview
 the theory is that seymour is the chief designer of the cray-3 and without him it could not be completed
 cray research did not want to fund a project that did not include seymour
 the documents also said that cray computer anticipates <unk> perhaps another $ N million in financing beginning next september
 but mr. <unk> called that a <unk> scenario
 the filing on the details of the spinoff caused cray research stock to jump $ N yesterday to close at $ N in new york stock exchange composite trading
 analysts noted yesterday that cray research 's decision to link its $ N million <unk> note to mr. cray 's presence will complicate a valuation of the new company
 it has to be considered as an additional risk for the investor said gary p. <unk> of <unk> group inc. minneapolis
 cray computer will be a concept stock he said
 you either believe seymour can do it again or you do n't
 besides the designer 's age other risk factors for mr. cray 's new company include the cray-3 's tricky <unk> chip technology
 the sec documents describe those chips which are made of <unk> <unk> as being so fragile and minute they will require special <unk> handling equipment
 in addition the cray-3 will contain N processors twice as many as the largest current supercomputer
 cray computer also will face intense competition not only from cray research which has about N N of the world-wide supercomputer market and which is expected to roll out the <unk> machine a direct competitor with the cray-3 in N
 the spinoff also will compete with international business machines corp. and japan 's big three hitachi ltd. nec corp. and fujitsu ltd
 the new company said it believes there are fewer than N potential customers for <unk> priced between $ N million and $ N million presumably the cray-3 price range
 under terms of the spinoff cray research stockholders are to receive one cray computer share for every two cray research shares they own in a distribution expected to occur in about two weeks
 no price for the new shares has been set
 instead the companies will leave it up to the marketplace to decide
 cray computer has applied to trade on nasdaq
 analysts calculate cray computer 's initial book value at about $ N a share
 along with the note cray research is <unk> about $ N million in assets primarily those related to the cray-3 development which has been a drain on cray research 's earnings
 <unk> balance sheets clearly show why cray research favored the spinoff
 without the cray-3 research and development expenses the company would have been able to report a profit of $ N million for the first half of N rather than the $ N million it posted
 on the other hand had it existed then cray computer would have incurred a $ N million loss
 mr. cray who could n't be reached for comment will work for the new colorado springs colo. company as an independent contractor the arrangement he had with cray research
 regarded as the father of the supercomputer mr. cray was paid $ N at cray research last year
 at cray computer he will be paid $ N
 besides messrs. cray and <unk> other senior management at the company includes neil <unk> N president and chief executive officer joseph m. <unk> N vice president engineering malcolm a. <unk> N vice president software and douglas r. <unk> N vice president hardware
 all came from cray research
 cray computer which currently employs N people said it expects a work force of N by the end of N
 john r. stevens N years old was named senior executive vice president and chief operating officer both new positions
 he will continue to report to donald <unk> president and chief executive officer
 mr. stevens was executive vice president of this <unk> holding company
 arthur a. hatch N was named executive vice president of the company
 he was previously president of the company 's eastern edison co. unit
 john d. <unk> N was named to succeed mr. hatch as president of eastern edison
 previously he was vice president of eastern edison
 robert p. <unk> N was named senior vice president of eastern utilities
 he was previously vice president
 the u.s. claiming some success in its trade <unk> removed south korea taiwan and saudi arabia from a list of countries it is closely watching for allegedly failing to honor u.s. patents <unk> and other <unk> rights
 however five other countries china thailand india brazil and mexico will remain on that so-called priority watch list as a result of an interim review u.s. trade representative carla hills announced
 under the new u.s. trade law those countries could face accelerated <unk> investigations and stiff trade sanctions if they do n't improve their protection of intellectual property by next spring
 mrs. hills said many of the N countries that she placed under <unk> degrees of scrutiny have made genuine progress on this touchy issue
 she said there is growing <unk> around the world that <unk> of <unk> rights <unk> all trading nations and particularly the creativity and <unk> of an <unk> country 's own citizens
 u.s. trade negotiators argue that countries with inadequate <unk> for <unk> rights could be hurting themselves by discouraging their own scientists and authors and by <unk> u.s. high-technology firms from investing or marketing their best products there
 mrs. hills <unk> south korea for creating an <unk> task force and special enforcement teams of police officers and prosecutors trained to pursue movie and book <unk>
 seoul also has instituted effective <unk> procedures to aid these teams she said
 taiwan has improved its standing with the u.s. by <unk> a <unk> copyright agreement <unk> its trademark law and introducing legislation to protect foreign movie producers from unauthorized <unk> of their films
 that measure could <unk> taipei 's growing number of small <unk> <unk> to pay movie producers for showing their films
 saudi arabia for its part has vowed to enact a copyright law compatible with international standards and to apply the law to computer software as well as to literary works mrs. hills said
 these three countries are n't completely off the hook though
 they will remain on a <unk> list that includes N other countries
 those countries including japan italy canada greece and spain are still of some concern to the u.s. but are deemed to pose <unk> problems for american patent and copyright owners than those on the priority list
 gary hoffman a washington lawyer specializing in <unk> cases said the threat of u.s. <unk> combined with a growing recognition that protecting intellectual property is in a country 's own interest prompted the improvements made by south korea taiwan and saudi arabia
 what this tells us is that u.s. trade law is working he said
 he said mexico could be one of the next countries to be removed from the priority list because of its efforts to craft a new patent law
 mrs. hills said that the u.s. is still concerned about disturbing developments in turkey and continuing slow progress in malaysia
 she did n't elaborate although earlier u.s. trade reports have complained of videocassette <unk> in malaysia and <unk> for u.s. pharmaceutical patents in turkey
 the N trade act requires mrs. hills to issue another review of the performance of these countries by april N
 so far mrs. hills has n't deemed any cases bad enough to merit an accelerated investigation under the so-called special N provision of the act
 argentina said it will ask creditor banks to <unk> its foreign debt of $ N billion the <unk> in the developing world
 the declaration by economy minister <unk> <unk> is believed to be the first time such an action has been called for by an <unk> official of such <unk>
 the latin american nation has paid very little on its debt since early last year
 argentina <unk> to reach a reduction of N N in the value of its external debt mr. <unk> said through his spokesman <unk> <unk>
 mr. <unk> met in august with u.s. assistant treasury secretary david mulford
 <unk> negotiator carlos <unk> was in washington and new york this week to meet with banks
 mr. <unk> recently has said the government of president carlos <unk> who took office july N feels a significant reduction of principal and interest is the only way the debt problem may be solved
 but he has not said before that the country wants half the debt <unk>
 during its centennial year the wall street journal will report events of the past century that stand as milestones of american business history
 three computers that changed the face of personal computing were launched in N
 that year the apple ii commodore pet and tandy <unk> came to market
 the computers were crude by today 's standards
 apple ii owners for example had to use their television sets as screens and <unk> data on <unk>
 but apple ii was a major advance from apple i which was built in a garage by stephen <unk> and steven jobs for <unk> such as the <unk> computer club
 in addition the apple ii was an affordable $ N
 crude as they were these early pcs triggered explosive product development in desktop models for the home and office
 big mainframe computers for business had been around for years
 but the new N pcs unlike earlier <unk> types such as the <unk> <unk> and <unk> had <unk> and could store about two pages of data in their memories
 current pcs are more than N times faster and have memory capacity N times greater than their N counterparts
 there were many pioneer pc <unk>
 william gates and paul allen in N developed an early <unk> system for pcs and gates became an industry billionaire six years after ibm adapted one of these versions in N
 alan f. <unk> currently chairman of seagate technology led the team that developed the disk drives for pcs
 dennis <unk> and dale <unk> two atlanta engineers were <unk> of the internal <unk> that allow pcs to share data via the telephone
 ibm the world leader in computers did n't offer its first pc until august N as many other companies entered the market
 today pc shipments annually total some $ N billion world-wide
 <unk> <unk> & co. an australian pharmaceuticals company said its <unk> inc. affiliate acquired <unk> inc. for $ N million
 <unk> is a new <unk> pharmaceuticals concern that sells products under the <unk> label
 <unk> said it owns N N of <unk> 's voting stock and has an agreement to acquire an additional N N
 that stake together with its convertible preferred stock holdings gives <unk> the right to increase its interest to N N of <unk> 's voting stock
 oil production from australia 's bass <unk> fields will be raised by N barrels a day to about N barrels with the launch of the <unk> field the first of five small fields scheduled to be brought into production before the end of N
 esso australia ltd. a unit of new york-based exxon corp. and broken hill <unk> operate the fields in a joint venture
 esso said the <unk> field started production tuesday
 output will be gradually increased until it reaches about N barrels a day
 the field has reserves of N million barrels
 reserves for the five new fields total N million barrels
 the <unk> and <unk> fields are expected to start producing early next year and the <unk> and <unk> fields later next year
 esso said the fields were developed after the australian government decided in N to make the first N million barrels from new fields free of <unk> tax
 <unk> <unk> corp. said it completed the $ N million sale of its southern optical subsidiary to a group led by the unit 's president thomas r. sloan and other managers
 following the acquisition of <unk> <unk> by a buy-out group led by shearson lehman hutton earlier this year the maker of <unk> <unk> decided to <unk> itself of certain of its <unk> businesses
 the sale of southern optical is a part of the program
 the white house said president bush has approved duty-free treatment for imports of certain types of watches that are n't produced in significant quantities in the u.s. the virgin islands and other u.s. <unk>
 the action came in response to a petition filed by <unk> inc. for changes in the u.s. <unk> system of preferences for imports from developing nations
 previously watch imports were denied such duty-free treatment
 <unk> had requested duty-free treatment for many types of watches covered by N different u.s. tariff <unk>
 the white house said mr. bush decided to grant duty-free status for N categories but turned down such treatment for other types of watches because of the potential for material injury to watch producers located in the u.s. and the virgin islands
 <unk> is a major u.s. producer and seller of watches including <unk> <unk> watches assembled in the philippines and other developing nations covered by the u.s. tariff preferences
 u.s. trade officials said the philippines and thailand would be the main beneficiaries of the president 's action
 imports of the types of watches that now will be eligible for duty-free treatment totaled about $ N million in N a relatively small share of the $ N billion in u.s. watch imports that year according to an aide to u.s. trade representative carla hills
 magna international inc. 's chief financial officer james mcalpine resigned and its chairman frank <unk> is stepping in to help turn the <unk> manufacturer around the company said
 mr. <unk> will direct an effort to reduce overhead and curb capital spending until a more satisfactory level of profit is achieved and maintained magna said
 stephen <unk> currently vice president finance will succeed mr. mcalpine
 an ambitious expansion has left magna with excess capacity and a heavy debt load as the automotive industry enters a downturn
 the company has reported declines in operating profit in each of the past three years despite steady sales growth
 magna recently cut its quarterly dividend in half and the company 's class a shares are <unk> far below their 52-week high of N canadian dollars us$ N
 on the toronto stock exchange yesterday magna shares closed up N canadian cents to c$ N
 mr. <unk> founder and controlling shareholder of magna resigned as chief executive officer last year to seek unsuccessfully a seat in canada 's parliament
 analysts said mr. <unk> wants to resume a more influential role in running the company
 they expect him to cut costs throughout the organization
 the company said mr. <unk> will personally direct the restructuring <unk> by <unk> <unk> president and chief executive
 neither they nor mr. mcalpine could be reached for comment
 magna said mr. mcalpine resigned to pursue a consulting career with magna as one of his clients
 lord <unk> <unk> chairman of english china <unk> plc was named a nonexecutive director of this british chemical company
 japanese investors nearly <unk> bought up two new mortgage <unk> mutual funds totaling $ N million the u.s. federal national mortgage association said
 the purchases show the strong interest of japanese investors in u.s. <unk> instruments fannie mae 's chairman david o. maxwell said at a news conference
 he said more than N N of the funds were placed with japanese institutional investors
 the rest went to investors from france and hong kong
 earlier this year japanese investors snapped up a similar $ N million mortgage-backed securities mutual fund
 that fund was put together by blackstone group a new york investment bank
 the latest two funds were assembled jointly by goldman sachs & co. of the u.s. and japan 's daiwa securities co
 the new seven-year funds one offering a fixed-rate return and the other with a floating-rate return linked to the london interbank offered rate offer two key advantages to japanese investors
 first they are designed to eliminate the risk of prepayment mortgage-backed securities can be retired early if interest rates decline and such prepayment forces investors to <unk> their money at lower rates
 second they channel monthly mortgage payments into semiannual payments reducing the administrative burden on investors
 by addressing those problems mr. maxwell said the new funds have become extremely attractive to japanese and other investors outside the u.s.
 such devices have boosted japanese investment in mortgage-backed securities to more than N N of the $ N billion in such instruments outstanding and their purchases are growing at a rapid rate
 they also have become large purchasers of fannie mae 's corporate debt buying $ N billion in fannie mae bonds during the first nine months of the year or almost a <unk> of the total amount issued
 james l. <unk> <unk> executive vice president was named a director of this oil concern expanding the board to N members
 ltv corp. said a federal bankruptcy court judge agreed to extend until march N N the period in which the steel aerospace and energy products company has the exclusive right to file a reorganization plan
 the company is operating under chapter N of the federal bankruptcy code giving it court protection from creditors ' lawsuits while it attempts to work out a plan to pay its debts
 italian chemical giant montedison <unk> through its montedison acquisition n.v. indirect unit began its $ <unk> tender offer for all the common shares outstanding of erbamont n.v. a maker of pharmaceuticals incorporated in the netherlands
 the offer advertised in today 's editions of the wall street journal is scheduled to expire at the end of november
 montedison currently owns about N N of erbamont 's common shares outstanding
 the offer is being launched <unk> to a previously announced agreement between the companies
 japan 's reserves of gold convertible foreign currencies and special drawing rights fell by a hefty $ N billion in october to $ N billion the finance ministry said
 the total marks the sixth consecutive monthly decline
 the <unk> downturn reflects the intensity of bank of japan <unk> intervention since june when the u.s. currency temporarily surged above the N yen level
 the announcement follows a sharper $ N billion decline in the country 's foreign reserves in september to $ N billion
 pick a country any country
 it 's the latest investment craze sweeping wall street a rash of new closed-end country funds those publicly traded portfolios that invest in stocks of a single foreign country
 no fewer than N country funds have been launched or registered with regulators this year triple the level of all of N according to charles e. simon & co. a washington-based research firm
 the turf recently has ranged from chile to <unk> to portugal
 next week the philippine fund 's launch will be capped by a visit by philippine president <unk> aquino the first time a head of state has kicked off an issue at the big board here
 the next province
 anything 's possible how about the new guinea fund <unk> george foot a managing partner at <unk> management associates of <unk> mass
 the recent explosion of country funds <unk> the closed-end fund mania of the 1920s mr. foot says when narrowly focused funds grew wildly popular
 they fell into <unk> after the N crash
 unlike traditional <unk> mutual funds most of these <unk> portfolios are the closed-end type issuing a fixed number of shares that trade publicly
 the surge brings to nearly N the number of country funds that are or soon will be listed in new york or london
 these funds now account for several billions of dollars in assets
 people are looking to stake their claims now before the number of available nations runs out says michael porter an analyst at smith barney harris upham & co. new york
 behind all the <unk> is some <unk> competition
 as individual investors have turned away from the stock market over the years securities firms have scrambled to find new products that brokers find easy to sell
 and the firms are stretching their <unk> far and wide to do it
 financial planners often urge investors to diversify and to hold a <unk> of international securities
 and many emerging markets have <unk> more mature markets such as the u.s. and japan
 country funds offer an easy way to get a taste of foreign stocks without the hard research of seeking out individual companies
 but it does n't take much to get burned
 political and currency gyrations can <unk> the funds
 another concern the funds ' share prices tend to swing more than the broader market
 when the stock market dropped nearly N N oct. N for instance the mexico fund plunged about N N and the spain fund fell N N
 and most country funds were clobbered more than most stocks after the N crash
 what 's so wild about the funds ' frenzy right now is that many are trading at historically fat premiums to the value of their underlying portfolios
 after trading at an average discount of more than N N in late N and part of last year country funds currently trade at an average premium of N N
 the reason share prices of many of these funds this year have climbed much more sharply than the foreign stocks they hold
 it 's probably worth paying a premium for funds that invest in markets that are partially closed to foreign investors such as south korea some specialists say
 but some european funds recently have skyrocketed spain fund has surged to a startling N N premium
 it has been targeted by japanese investors as a good long-term play tied to N 's european economic integration
 and several new funds that are n't even fully invested yet have jumped to trade at big premiums
 i 'm very alarmed to see these rich <unk> says smith barney 's mr. porter
 the newly <unk> premiums reflect the increasingly global marketing of some country funds mr. porter suggests
 unlike many u.s. investors those in asia or europe seeking <unk> exposure may be less <unk> to paying higher prices for country funds
 there may be an international viewpoint cast on the funds listed here mr. porter says
 nonetheless plenty of u.s. analysts and money managers are <unk> at the <unk> trading levels of some country funds
 they argue that u.s. investors often can buy american depositary receipts on the big stocks in many funds these so-called adrs represent shares of foreign companies traded in the u.s.
 that way investors can essentially buy the funds without paying the premium
 for people who insist on jumping in now to buy the funds <unk> 's mr. foot says the only advice i have for these folks is that those who come to the party late had better be ready to leave quickly
 the u.s. and soviet union are holding technical talks about possible repayment by moscow of $ N million in <unk> russian debts owed to the u.s. government the state department said
--- a/reproduction/Char-aware_NLM/utilities.py
+++ b/reproduction/Char-aware_NLM/utilities.py
@@ -1,82 +0,0 @@
 import torch
 import torch.nn.functional as F
 def batch_generator(x, batch_size):
    # x: [num_words, in_channel, height, width]
    # partitions x into batches
    num_step = x.size()[0] // batch_size
    for t in range(num_step):
        yield x[t * batch_size:(t + 1) * batch_size]
 def text2vec(words, char_dict, max_word_len):
    """ Return list of list of int """
    word_vec = []
    for word in words:
        vec = [char_dict[ch] for ch in word]
        if len(vec) < max_word_len:
            vec += [char_dict["PAD"] for _ in range(max_word_len - len(vec))]
        vec = [char_dict["BOW"]] + vec + [char_dict["EOW"]]
        word_vec.append(vec)
    return word_vec
 def seq2vec(input_words, char_embedding, char_embedding_dim, char_table):
    """ convert the input strings into character embeddings """
    # input_words == list of string
    # char_embedding == torch.nn.Embedding
    # char_embedding_dim == int
    # char_table == list of unique chars
    # Returns: tensor of shape [len(input_words), char_embedding_dim, max_word_len+2]
    max_word_len = max([len(word) for word in input_words])
    print("max_word_len={}".format(max_word_len))
    tensor_list = []
    start_column = torch.ones(char_embedding_dim, 1)
    end_column = torch.ones(char_embedding_dim, 1)
    for word in input_words:
        # convert string to word attention
        word_encoding = char_embedding_lookup(word, char_embedding, char_table)
        # add start and end columns
        word_encoding = torch.cat([start_column, word_encoding, end_column], 1)
        # zero-pad right columns
        word_encoding = F.pad(word_encoding, (0, max_word_len - word_encoding.size()[1] + 2)).data
        # create dimension
        word_encoding = word_encoding.unsqueeze(0)
        tensor_list.append(word_encoding)
    return torch.cat(tensor_list, 0)
 def read_data(file_name):
    # Return: list of strings
    with open(file_name, 'r') as f:
        corpus = f.read().lower()
    import re
    corpus = re.sub(r"<unk>", "unk", corpus)
    return corpus.split()
 def get_char_dict(vocabulary):
    # vocabulary == dict of (word, int)
    # Return: dict of (char, int), starting from 1
    char_dict = dict()
    count = 1
    for word in vocabulary:
        for ch in word:
            if ch not in char_dict:
                char_dict[ch] = count
                count += 1
    return char_dict
 def create_word_char_dict(*file_name):
    text = []
    for file in file_name:
        text += read_data(file)
    word_dict = {word: ix for ix, word in enumerate(set(text))}
    char_dict = get_char_dict(word_dict)
    return word_dict, char_dict
--- a/reproduction/Char-aware_NLM/valid.txt
+++ b/reproduction/Char-aware_NLM/valid.txt
@@ -1,336 +0,0 @@
 consumers may want to move their telephones a little closer to the tv set
 <unk> <unk> watching abc 's monday night football can now vote during <unk> for the greatest play in N years from among four or five <unk> <unk>
 two weeks ago viewers of several nbc <unk> consumer segments started calling a N number for advice on various <unk> issues
 and the new syndicated reality show hard copy records viewers ' opinions for possible airing on the next day 's show
 interactive telephone technology has taken a new leap in <unk> and television programmers are racing to exploit the possibilities
 eventually viewers may grow <unk> with the technology and <unk> the cost
 but right now programmers are figuring that viewers who are busy dialing up a range of services may put down their <unk> control <unk> and stay <unk>
 we 've been spending a lot of time in los angeles talking to tv production people says mike parks president of call interactive which supplied technology for both abc sports and nbc 's consumer minutes
 with the competitiveness of the television market these days everyone is looking for a way to get viewers more excited
 one of the leaders behind the expanded use of N numbers is call interactive a joint venture of giants american express co. and american telephone & telegraph co
 formed in august the venture <unk> at&t 's newly expanded N service with N <unk> computers in american express 's omaha neb. service center
 other long-distance carriers have also begun marketing enhanced N service and special consultants are <unk> up to exploit the new tool
 blair entertainment a new york firm that advises tv stations and sells ads for them has just formed a subsidiary N blair to apply the technology to television
 the use of N toll numbers has been expanding rapidly in recent years
 for a while <unk> <unk> lines and services that <unk> children to dial and <unk> movie or music information earned the service a somewhat <unk> image but new legal restrictions are aimed at trimming excesses
 the cost of a N call is set by the <unk> abc sports for example with the cheapest starting at N cents
 billing is included in a caller 's regular phone bill
 from the fee the local phone company and the long-distance carrier extract their costs to carry the call passing the rest of the money to the <unk> which must cover advertising and other costs
 in recent months the technology has become more flexible and able to handle much more volume
 before callers of N numbers would just listen and not talk or they 'd vote yes or no by calling one of two numbers
 people in the phone business call this technology N <unk>
 now callers are led through complex <unk> of choices to retrieve information they want and the hardware can process N calls in N seconds
 up to now N numbers have mainly been used on local tv stations and cable channels
 <unk> used one to give away the house that rock star jon <unk> <unk> grew up in
 for several years turner broadcasting system 's cable news network has invited viewers to respond <unk> to <unk> issues should the u.s. military intervene in panama but even the hottest <unk> on <unk> <unk> only about N calls
 the newest uses of the <unk> technology demonstrate the growing variety of applications
 capital cities\/abc inc. cbs inc. and general electric co. 's national broadcasting co. unit are expected to announce soon a joint campaign to raise awareness about <unk>
 the subject will be written into the <unk> of prime-time shows and viewers will be given a N number to call
 callers will be sent educational booklets and the call 's modest cost will be an immediate method of raising money
 other network applications have very different goals
 abc sports was looking for ways to lift <unk> <unk> ratings for monday night football
 kurt <unk> abc sports 's marketing director says that now tens of thousands of fans call its N number each week to vote for the best <unk> return <unk> <unk> etc
 profit from the calls goes to charity but abc sports also uses the calls as a sales tool after <unk> callers for voting frank <unk> offers a football <unk> for $ N and N N of callers stay on the line to order it
 jackets may be sold next
 meanwhile nbc sports recently began scores plus a <unk> 24-hour N line providing a complex array of scores analysis and fan news
 a spokesman said its purpose is to bolster the impression that nbc sports is always there for people
 nbc 's <unk> consumer minutes have increased advertiser spending during the day the network 's weakest period
 each <unk> matches a sponsor and a topic on <unk> unilever n.v. 's <unk> bros. sponsors tips on diet and exercise followed by a <unk> <unk> bros. commercial
 viewers can call a N number for additional advice which will be tailored to their needs based on the numbers they <unk> press one if you 're pregnant etc
 if the caller stays on the line and leaves a name and address for the sponsor coupons and a newsletter will be <unk> and the sponsor will be able to gather a list of desirable potential customers
 <unk> <unk> an <unk> vice president says nbc has been able to charge premium rates for this ad time
 she would n't say what the premium is but it 's believed to be about N N above regular <unk> rates
 we were able to get advertisers to use their promotion budget for this because they get a chance to do <unk> says ms. <unk>
 and we were able to attract some new advertisers because this is something new
 mr. parks of call interactive says tv executives are considering the use of N numbers for talk shows game shows news and opinion surveys
 experts are predicting a big influx of new shows in N when a service called automatic number information will become widely available
 this service <unk> each caller 's phone number and it can be used to generate instant mailing lists
 hard copy the new syndicated tabloid show from paramount pictures will use its N number for additional purposes that include research says executive producer mark b. von s. <unk>
 for a piece on local heroes of world war ii we can ask people to leave the name and number of anyone they know who won a <unk> he says
 that 'll save us time and get people involved
 but mr. <unk> sees much bigger changes ahead
 these are just baby steps toward real interactive video which i believe will be the biggest thing yet to affect television he says
 although it would be costly to shoot multiple versions tv programmers could let audiences vote on different <unk> for a movie
 fox broadcasting <unk> with this concept last year when viewers of married with children voted on whether al should say i love you to <unk> on <unk> 's day
 someday viewers may also choose different <unk> of news coverage
 a <unk> by phone could let you decide i 'm interested in just the beginning of story no. N and i want story no. N in <unk> mr. <unk> says
 you 'll start to see shows where viewers program the program
 integrated resources inc. the troubled financial-services company that has been trying to sell its core companies to restructure debt said talks with a potential buyer ended
 integrated did n't identify the party or say why the talks failed
 last week another potential buyer <unk> financial group which had agreed in august to purchase most of integrated 's core companies for $ N million ended talks with integrated
 integrated said that it would continue to pursue other alternatives to sell the five core companies and that a group of senior executives plans to make a proposal to purchase three of the companies integrated resources equity corp. resources trust co. and integrated resources asset management corp
 a price was n't disclosed
 integrated also said it expects to report a second-quarter loss wider than the earlier estimate of about $ N million
 the company did n't disclose the new estimate but said the change was related to integrated 's failure to sell its core businesses as well as other events which it did n't detail that occurred after its announcement last week that it was in talks with the unidentified prospective buyer
 meanwhile a number of top sales producers from integrated resources equity will meet this afternoon in chicago to discuss their options
 the unit is a <unk> constructed group of about N independent brokers and financial planners who sell insurance annuities limited partnerships mutual funds and other investments for integrated and other firms
 the sales force is viewed as a critical asset in integrated 's attempt to sell its core companies
 <unk> cited concerns about how long integrated would be able to hold together the sales force as one reason its talks with integrated failed
 in composite trading on the new york stock exchange yesterday integrated closed at $ N a share down N cents
 integrated has been struggling to avoid a bankruptcy-law filing since june when it failed to make interest payments on nearly $ N billion of debt
 integrated senior and junior creditors are owed a total of about $ N billion
 an earthquake struck northern california killing more than N people
 the violent temblor which lasted about N seconds and registered N on the richter scale also caused the collapse of a <unk> section of the san <unk> bay bridge and shook candlestick park
 the tremor was centered near <unk> southeast of san francisco and was felt as far as N miles away
 numerous injuries were reported
 some buildings collapsed gas and water lines <unk> and fires <unk>
 the quake which also caused damage in san jose and berkeley knocked out electricity and telephones <unk> roadways and disrupted subway service in the bay area
 major injuries were n't reported at candlestick park where the third game of baseball 's world series was canceled and fans <unk> from the stadium
 bush vowed to veto a bill allowing federal financing for abortions in cases of rape and incest saying tax dollars should n't be used to compound a violent act with the taking of an <unk> life
 his pledge in a letter to democratic sen. byrd came ahead of an expected senate vote on spending legislation containing the provision
 east germany 's politburo met amid speculation that the ruling body would oust hard-line leader honecker whose rule has been challenged by mass emigration and calls for democratic freedoms
 meanwhile about N refugees flew to <unk> west germany from warsaw the first <unk> in east germany 's <unk> exodus
 the world psychiatric association voted at an <unk> <unk> to <unk> <unk> the soviet union
 moscow which left the group in N to avoid <unk> over allegations that political <unk> were being certified as <unk> could be suspended if the <unk> of <unk> against <unk> is discovered during a review within a year
 nasa postponed the <unk> of the space shuttle atlantis because of rain near the site of the launch <unk> in <unk> <unk> fla
 the flight was <unk> for today
 the spacecraft 's five <unk> are to <unk> the <unk> galileo space probe on an <unk> mission to jupiter
 senate democratic leaders said they had enough votes to defeat a proposed constitutional amendment to ban flag burning
 the amendment is aimed at <unk> a supreme court ruling that threw out the conviction of a texas <unk> on grounds that his freedom of speech was violated
 federal researchers said lung-cancer mortality rates for people under N years of age have begun to decline particularly for white males
 the national cancer institute also projected that overall u.s. mortality rates from lung cancer should begin to drop in several years if cigarette smoking continues to <unk>
 bush met with south korean president roh who indicated that seoul plans to further ease trade rules to ensure that its economy becomes as open as the other industrialized nations by the mid-1990s
 bush assured roh that the u.s. would stand by its security commitments as long as there is a threat from communist north korea
 the bush administration is seeking an understanding with congress to ease restrictions on u.s. involvement in foreign coups that might result in the death of a country 's leader
 a white house spokesman said that while bush would n't alter a longstanding ban on such involvement there 's a <unk> needed on its interpretation
 india 's gandhi called for parliamentary elections next month
 the balloting considered a test for the prime minister and the ruling congress i party comes amid charges of <unk> leadership and government corruption
 gandhi 's family has ruled independent india for all but five years of its <unk> history
 the soviet union <unk> from a u.n. general assembly vote to reject israel 's credentials
 it was the first time in seven years that moscow has n't joined efforts led by <unk> nations to <unk> israel from the world body and was viewed as a sign of improving <unk> ties
 israel was <unk> by a vote of N with N <unk>
 black activist walter sisulu said the african national congress would n't reject violence as a way to pressure the south african government into concessions that might lead to negotiations over apartheid
 the <unk> sisulu was among eight black political activists freed sunday from prison
 london has concluded that <unk> president <unk> was n't responsible for the execution of six british <unk> in world war ii although he probably was aware of the <unk>
 the report by the defense ministry also rejected allegations that britain covered up evidence of <unk> 's activities as a german army officer
 an international group approved a formal ban on ivory trade despite objections from southern african governments which threatened to find alternative channels for selling elephant <unk>
 the move by the convention on trade in endangered <unk> meeting in switzerland places the elephant on the <unk> list
 an <unk> in colombia killed a federal judge on a <unk> street
 an <unk> caller to a local radio station said cocaine traffickers had <unk> the <unk> in <unk> for the <unk> of <unk> wanted on drug charges in the u.s.
 <unk> leader <unk> met with egypt 's president <unk> and the two officials pledged to respect each other 's laws security and stability
 they stopped short of <unk> diplomatic ties <unk> in N
 the reconciliation talks in the <unk> desert town of <unk> followed a meeting monday in the egyptian resort of <unk> <unk>
 <unk> group inc. revised its exchange offer for $ N million face amount of N N senior subordinated debt due N and extended the offer to oct. N from oct. N
 the <unk> n.j. company said holders would receive for each $ N face amount $ N face amount of a new issue of secured senior subordinated notes convertible into common stock at an initial rate of $ N a share and N common shares
 the new notes will bear interest at N N through july N N and thereafter at N N
 under the original proposal the maker of specialty coatings and a developer of <unk> technologies offered $ N of notes due N N common shares and $ N in cash for each $ N face amount
 completion of the exchange offer is subject to the tender of at least N N of the debt among other things
 <unk> which said it does n't plan to further extend the offer said it received $ N face amount of debt under the original offer
 the stock of ual corp. continued to be <unk> amid signs that british airways may <unk> at any <unk> <unk> of the aborted $ N billion buy-out of united airlines ' parent
 ual stock plummeted a further $ N to $ N on volume of more than N million shares in new york stock exchange composite trading
 the plunge followed a drop of $ N monday amid indications the takeover may take weeks to be revived
 the stock has fallen $ N or N N in the three trading days since announcement of the collapse of the $ 300-a-share takeover jolted the entire stock market into its <unk> plunge ever
 this is a total <unk> for takeover-stock traders one investment banker said
 los angeles financier marvin davis who put united in play with a $ N billion bid two months ago last night <unk> both a ray of hope and an extra element of uncertainty by saying he remains interested in acquiring ual
 but he dropped his earlier $ 300-a-share <unk> bid saying he must first explore bank financing
 even as citicorp and chase manhattan corp. scrambled to line up bank financing for a revised version of the <unk> labor-management bid british airways a N N partner in the buying group indicated it wants to start from <unk>
 its partners are united 's pilots who were to own N N and ual management at N N
 adding <unk> to injury united 's <unk> machinists ' union which helped scuttle financing for the first bid yesterday asked ual chairman stephen wolf and other ual directors to resign
 a similar demand was made by a group that represents some of united 's N <unk> employees
 john <unk> machinists union general vice president attacked mr. wolf as greedy and irresponsible for pursuing the buy-out
 although mr. wolf and john pope ual 's chief financial officer stood to <unk> $ N million for stock and options in the buy-out ual executives planned to reinvest only $ N million in the new company
 the blue-collar machinists longtime rivals of the white-collar pilots say the <unk> would load the company with debt and weaken its finances
 confusion about the two banks ' <unk> efforts to round up financing for a new bid that the ual board has n't even seen yet helped send ual stock <unk> downward
 and rumors of forced selling by takeover-stock traders triggered a <unk> <unk> in the dow jones industrial average around N a.m. edt yesterday
 yesterday 's selling began after a japanese news agency reported that japanese banks which balked at the first bid were ready to reject a revised version at around $ N a share or $ N billion
 several reports as the day <unk> gave vague or <unk> indications about whether banks would sign up
 citicorp for example said only that it had <unk> of interest of a transaction from both the borrowers and the banks but did n't have an agreement
 late in the day mr. wolf issued a <unk> statement calling mr. <unk> 's blast divisive and <unk> for
 but he gave few details on the progress toward a new bid saying only we are working toward a revised proposal for majority employee ownership
 meanwhile in another sign that a new bid is n't imminent it was learned that the ual board held a telephone meeting monday to hear an update on the situation but that a formal board meeting is n't likely to be <unk> until early next week
 in london british airways chairman lord king was quoted in the times as declaring he is not prepared to take my shareholders into a <unk> deal
 observers said it appeared that british air was angered at the way the bid has <unk> into confusion as well as by the banks ' effort to round up financing for what one called a deal that is n't a deal
 the effort to revive the bid was complicated by the <unk> nature of the <unk> buying group
 the pilots were meeting outside chicago yesterday
 but british air which was to have supplied $ N million out of $ N million in equity financing apparently was n't involved in the second proposal and could well reject it even if banks obtain financing
 a group of united 's <unk> employees said in a statement the fact that wolf and other officers were going to line their pockets with literally millions of dollars while <unk> severe pay cuts on the <unk> employees of united is not only <unk> but <unk>
 the machinists also asked for an investigation by the securities and exchange commission into possible <unk> violations in the original bid for ual by mr. davis as well as in the response by ual
 last week just before the bank commitments were due the union asked the u.s. labor department to study whether the bid violated legal standards of fairness governing employee investment funds
 in his statement mr. wolf said we continue to believe our approach is sound and that it is far better for all employees than the alternative of having an outsider own the company with employees paying for it just the same
 mr. wolf has <unk> merger advice from a major wall street securities firm relying instead only on a takeover lawyer peter <unk> of <unk> <unk> slate <unk> & flom
 the huge drop in ual stock prompted one takeover stock trader george <unk> managing partner of <unk> <unk> & co. to deny publicly rumors that his firm was going out of business
 mr. <unk> said that despite losses on ual stock his firm 's health is excellent
 the stock 's decline also has left the ual board in a <unk>
 although it may not be legally obligated to sell the company if the buy-out group ca n't revive its bid it may have to explore alternatives if the buyers come back with a bid much lower than the group 's original $ 300-a-share proposal
 at a meeting sept. N to consider the labor-management bid the board also was informed by its investment adviser first boston corp. of interest expressed by buy-out funds including kohlberg kravis roberts & co. and <unk> little & co. as well as by robert bass morgan stanley 's buy-out fund and pan am corp
 the takeover-stock traders were hoping that mr. davis or one of the other interested parties might <unk> with the situation in disarray or that the board might consider a recapitalization
 meanwhile japanese bankers said they were still <unk> about accepting citicorp 's latest proposal
 macmillan inc. said it plans a public offering of N million shares of its berlitz international inc. unit at $ N to $ N a share
 the offering for the language school unit was announced by robert maxwell chairman and chief executive officer of london-based maxwell communication corp. which owns macmillan
 after the offering is completed macmillan will own about N N of the berlitz common stock outstanding
 five million shares will be offered in the u.s. and N million additional shares will be offered in <unk> international offerings outside the u.s.
 goldman sachs & co. will manage the offering
 macmillan said berlitz intends to pay quarterly dividends on the stock
 the company said it expects to pay the first dividend of N cents a share in the N first quarter
 berlitz will borrow an amount equal to its expected net proceeds from the offerings plus $ N million in connection with a credit agreement with lenders
 the total borrowing will be about $ N million the company said
 proceeds from the borrowings under the credit agreement will be used to pay an $ N million cash dividend to macmillan and to lend the remainder of about $ N million to maxwell communications in connection with a <unk> note
 proceeds from the offering will be used to repay borrowings under the short-term parts of a credit agreement
 berlitz which is based in princeton n.j. provides language instruction and translation services through more than N language centers in N countries
 in the past five years more than N N of its sales have been outside the u.s.
 macmillan has owned berlitz since N
 in the first six months of this year berlitz posted net income of $ N million on sales of $ N million compared with net income of $ N million on sales of $ N million
 right away you notice the following things about a philip glass concert
 it attracts people with funny hair or with no hair in front of me a girl with <unk> <unk> sat <unk> a boy who had <unk> his
 whoever constitute the local left bank come out in force dressed in black along with a <unk> of <unk> who want to be on the cutting edge
 people in glass houses tend to look <unk>
 and if still <unk> at the evening 's end you notice something else the audience at first <unk> and <unk> by the music releases its <unk> feelings in collective <unk>
 currently in the middle of a <unk> <unk> tour as a solo <unk> mr. glass has left behind his <unk> equipment and <unk> in favor of going it alone
 he sits down at the piano and plays
 and plays
 either one likes it or one does n't
 the typical glass audience which is more likely to be composed of music students than their teachers certainly does
 the work though sounds like <unk> for <unk>
 philip glass is the <unk> and his music the new clothes of the <unk>
 his success is easy to understand
 <unk> introducing and explaining his pieces mr. glass looks and sounds more like a <unk> <unk> describing his work than a classical <unk> playing a recital
 the piano <unk> which have been labeled <unk> as <unk> <unk> <unk> cyclical <unk> and <unk> are <unk> <unk> therefore <unk> <unk> <unk> therefore <unk> and <unk> <unk> but <unk> therefore both pretty and <unk>
 it is music for people who want to hear something different but do n't want to work especially hard at the task
 it is <unk> listening for the now generation
 mr. glass has <unk> the famous <unk> <unk> less is more
 his more is always less
 far from being <unk> the music <unk> <unk> us with apparent <unk> not so <unk> <unk> in the <unk> of N time <unk> <unk> and <unk> or <unk> <unk> <unk>
 but the music has its <unk> and mr. glass has constructed his solo program around a move from the simple to the relatively complex
 opening N from <unk> <unk> the audience to the glass technique never <unk> too far from the piano 's center mr. glass works in the two <unk> on either side of middle c and his fingers seldom leave the <unk>
 there is a <unk> musical style here but not a particular performance style
 the music is not especially <unk> indeed it 's hard to imagine a bad performance of it
 nothing <unk> no <unk> no <unk> <unk> problems challenge the performer
 we hear we may think inner voices but they all seem to be saying the same thing
 with planet news music meant to <unk> <unk> of allen <unk> 's wichita <unk> <unk> mr. glass gets going
 his hands sit <unk> apart on the <unk>
 seventh <unk> make you feel as though he may break into a very slow <unk> <unk>
 the <unk> <unk> but there is little <unk> even though his fingers begin to <unk> over more of the <unk>
 contrasts predictably <unk> first the music is loud then it becomes soft then you realize it becomes <unk> again
 the fourth <unk> play an <unk> from <unk> on the beach is like a <unk> but it does n't seem to move much beyond its <unk> ground in three blind mice
 when mr. glass decides to get really fancy he <unk> his hands and hits a <unk> bass note with his right hand
 he does this in at least three of his solo pieces
 you might call it a <unk> or a <unk> <unk>
 in mad rush which came from a commission to write a piece of <unk> length mr. glass <unk> and <unk> confessed that this was no problem for me an a <unk> with a b section several times before the piece ends <unk>
 not only is the typical <unk> <unk> it is also often multiple in its context s
 mad rush began its life as the <unk> to the <unk> lama 's first public address in the u.s. when mr. glass played it on the <unk> at new york 's <unk> of st. john the <unk>
 later it was performed on radio <unk> in germany and then <unk> <unk> took it for one of her dance pieces
 the point is that any piece can be used as background music for virtually anything
 the evening ended with mr. glass 's <unk> another multiple work
 parts N N and N come from the <unk> of <unk> morris 's <unk> film the thin blue line and the two other parts from <unk> music to two separate <unk> of the <unk> story of the same name
 when used as background in this way the music has an appropriate <unk> as when a <unk> phrase a <unk> minor third <unk> the seemingly endless <unk> of reports interviews and <unk> of witnesses in the morris film
 served up as a solo however the music lacks the <unk> provided by a context within another medium
 <unk> of mr. glass may agree with the critic richard <unk> 's sense that the N music in twelve parts is as <unk> and <unk> as the <unk> <unk>
 but while making the obvious point that both <unk> develop variations from themes this comparison <unk> the intensely <unk> nature of mr. glass 's music
 its supposedly <unk> <unk> <unk> a <unk> that makes one <unk> for the <unk> of <unk> <unk> the <unk> radical <unk> of <unk> and <unk> and what in <unk> even seems like <unk> in <unk>
 mr. <unk> is professor of english at southern <unk> university and editor of the southwest review
 honeywell inc. said it hopes to complete shortly the first of two sales of shares in its japanese joint venture <unk> for about $ N million
 the company would n't disclose the buyer of the initial N N stake
 proceeds of the sale expected to be completed next week would be used to repurchase as many as N million shares of honeywell stock the company said
 honeywell said it is negotiating the sale of a second stake in <unk> but indicated it intends to hold at least N N of the joint venture 's stock long term
 a N N stake would allow honeywell to include <unk> earnings in its results
 honeywell previously said it intended to reduce its holding in the japanese concern as part of a restructuring plan which also calls for a reduction of <unk> on weapons sales
 yesterday a spokeswoman said the company was pleased with our progress in that regard and hopes to provide additional details soon
 honeywell said its defense and marine systems group incurred delays in shipping some undisclosed contracts during the third quarter resulting in lower operating profit for that business
 overall honeywell reported earnings of $ N million or $ N a share for the three months ended oct. N compared with a loss of $ N million or N cents a share a year earlier
 the previous period 's results included a $ N million pretax charge related to <unk> contract costs and a $ N million pretax gain on real estate sales
 sales for the latest quarter were flat at $ N billion
 for the nine months honeywell reported earnings of $ N million or $ N a share compared with earnings of $ N million or $ N a share a year earlier
 sales declined slightly to $ N billion
 once again your editorial page <unk> the law to conform to your almost <unk> <unk>
 in an <unk> of little <unk> to his central point about private enforcement suits by environmental groups michael s. <unk> <unk> your readers the clean water act is written upon the <unk> the <unk> rather that nothing but zero risk will do it <unk> a legal standard of zero <unk> <unk> environmental <unk> sept. N
 this statement surely <unk> your editorial viewpoint that environmental protection is generally silly or excessive but it is simply wrong
 the clean water act contains no legal standard of zero <unk>
 it requires that <unk> of <unk> into the waters of the united states be authorized by permits that reflect the <unk> limitations developed under section N
 whatever may be the problems with this system it <unk> reflects zero risk or zero <unk>
 perhaps mr. <unk> was confused by congress 's <unk> statement of the national goal in section N which indeed calls for the elimination of <unk> by N no less
 this <unk> statement was not taken seriously when enacted in N and should not now be confused with the <unk> provisions of the statute
 thus you do the public a great <unk> when mr. <unk> suggests even <unk> that the clean water act prohibits the preparation of a <unk> and water your <unk> readers may be led to believe that nothing but chance or oversight protects them as they <unk> in the night with their <unk> and waters from the <unk> knock of the sierra club at their doors
 robert j. <unk>
 national geographic the <unk> u.s. magazine is attracting more readers than ever and offers the glossy <unk> pages that upscale advertisers love
 so why did advertising pages plunge by almost N N and ad revenue by N N in the first half
 to hear advertisers tell it the magazine just has n't kept up with the times
 despite renewed interest by the public in such topics as the environment and the third world it has n't been able to shake its reputation as a magazine boys like to <unk> through in search of <unk> tribe women
 worse it lagged behind competitors in offering <unk> <unk> from regional editions to discounts for frequent advertisers
 but now the magazine is attempting to fight back with an ambitious plan including a revamped sales strategy and a surprisingly aggressive ad campaign
 advertisers do n't think of the magazine first says joan <unk> who joined in april as national advertising director
 what we want to do is take a more aggressive stance
 people did n't believe we were in tune with the marketplace and in many ways we were n't
 the <unk> magazine has never had to woo advertisers with quite so much <unk> before
 it largely <unk> on its <unk> <unk> N million subscribers in the first half up from N million a year ago an average age of N for readers at the <unk> of their <unk> years loyalty to the tune of an N N average subscription renewal rate
 the magazine had its best year yet in N when it <unk> its centennial and racked up a N N gain in ad pages to N
 but this year when the <unk> surrounding its centennial died so too did some advertiser interest
 the reason ad executives say is that the entire magazine business has been soft and national geographic has some <unk> that make it especially <unk> during a soft market
 perhaps the biggest of those factors is its high ad prices $ N for a <unk> page vs. $ N for the <unk> a comparable publication with a far smaller circulation
 when ad dollars are tight the high page cost is a major <unk> for advertisers who generally want to appear regularly in a publication or not at all
 even though national geographic offers far more readers than does a magazine like <unk> the page costs you an arm and a leg to develop any frequency says harry glass new york media manager for bozell inc
 to combat that problem national geographic like other magazines began offering regional editions allowing advertisers to appear in only a portion of its magazines for example ads can run only in the magazines sent to subscribers in the largest N markets
 but the magazine was slower than its competitors to come up with its regional editions and until last year offered fewer of them than did competitors
 time magazine for example has more than N separate editions going to different regions top management and other groups
 another sticking point for advertisers was national geographic 's tradition of <unk> its ads together usually at the beginning or end of the magazine rather than spreading ads out among its articles as most magazines do
 and national geographic 's <unk> size means extra production costs for advertisers
 but ms. <unk> says the magazine is fighting back
 it now offers N regional editions it very recently began running ads adjacent to articles and it has been <unk> up its sales force
 and it just launched a promotional campaign to tell chief executives marketing directors and media executives just that
 the centerpiece of the promotion is its new ad campaign into which the magazine will pour about $ N mostly in the next few weeks
 the campaign created by <unk> group 's ddb needham agency takes advantage of the <unk> photography that national geographic is known for
 in one ad a photo of the interior of the <unk> in paris is <unk> with the headline the only book more respected than <unk> does n't accept advertising
 another ad pictures a tree <unk> magnified N times with the headline for impact far beyond your size consider our regional editions
 ms. <unk> says she wants the campaign to help attract advertisers in N categories including corporate financial services consumer electronics insurance and food
 her goal to top N ad pages in N up from about N this year
 whether she can meet that ambitious goal is still far from certain
 the ad campaign is meant to <unk> the thought of national geographic she says
 we want it to be a <unk> kind of image
 wcrs plans <unk> sale
 wcrs group hopes to announce perhaps today an agreement to sell the majority of its ad unit to <unk> eurocom a european ad executive said
 wcrs has been in discussions with eurocom for several months
 however when negotiations <unk> down recently wcrs 's chief executive peter scott met in paris with another french firm <unk> <unk> <unk> <unk> or <unk>
 according to the executive <unk> 's involvement prompted renewed <unk> in the <unk> talks and the two agencies were hoping to <unk> out details by today
 executives of the two agencies could n't be reached last night
 ad notes
 new account procter & gamble co. cincinnati awarded the ad accounts for its line of professional <unk> <unk> <unk> and oil products to <unk> <unk> <unk> cincinnati
 billings were n't disclosed
 professional <unk> products are specially made for the <unk> industry
 who 's news stephen <unk> N was named executive vice president deputy creative director at grey advertising new york
 he was executive vice president director of broadcast production
 the commodity futures trading commission plans to restrict dual trading on commodity exchanges a move almost certain to <unk> exchange officials and traders
 the cftc said it will propose the restrictions after the release of a study that shows little economic benefit resulting from dual trading and cites problems associated with the practice
 dual trading gives an exchange trader the right to trade both for his own account and for customers
 the issue exploded this year after a federal bureau of investigation operation led to charges of widespread trading abuses at the chicago board of trade and chicago mercantile exchange
 while not specifically mentioned in the fbi charges dual trading became a focus of attempts to tighten industry regulations
 critics contend that traders were putting buying or selling for their own accounts ahead of other traders ' customer orders
 traders are likely to oppose such restrictions because dual trading provides a way to make money in slower markets where there is a shortage of customer orders
 the exchanges contend that dual trading improves liquidity in the markets because traders can buy or sell even when they do n't have a customer order in hand
 the exchanges say liquidity becomes a severe problem for <unk> traded contracts such as those with a long time remaining before expiration
 the cftc may take those arguments into account by allowing exceptions to its restrictions
 the agency did n't cite specific situations where dual trading might be allowed but smaller exchanges or contracts that need additional liquidity are expected to be among them
 wendy <unk> the agency 's chairman told the senate agriculture committee that she expects the study to be released within two weeks and the rule changes to be completed by <unk>
 the study by the cftc 's division of economic analysis shows that a trade is a trade a member of the study team said
 whether a trade is done on a dual or <unk> basis the member said does n't seem to have much economic impact
 currently most traders on commodity exchanges specialize in trading either for customer accounts which makes them brokers or for their own accounts as <unk> <unk>
 the tests indicate that dual and <unk> traders are similar in terms of the trade executions and liquidity they provide to the market mrs. <unk> told the senate panel
 members of congress have proposed restricting dual trading in bills to <unk> cftc operations
 the house 's bill would prohibit dual trading in markets with daily average volume of N contracts or more <unk> those considered too difficult to track without a sophisticated computer system
 the senate bill would force the cftc to suspend dual trading if an exchange ca n't show that its oversight system can detect <unk> abuses
 so far one test of restricting dual trading has worked well
 the chicago merc banned dual trading in its standard & poor 's 500-stock index futures pit in N
 under the rules traders decide before a session begins whether they will trade for their own account or for customers
 traders who stand on the pit 's top step where most customer orders are executed ca n't trade for themselves
 a merc spokesman said the plan has n't made much difference in liquidity in the pit
 it 's too soon to tell but people do n't seem to be unhappy with it he said
 he said he would n't comment on the cftc plan until the exchange has seen the full proposal
 but at a meeting last week tom <unk> the board of trade 's president told commodity lawyers dual trading is definitely worth saving
 it adds something to the market
 japanese firms push <unk> car <unk>
 japanese luxury-car makers are trying to set strict design standards for their dealerships
 but some dealers are negotiating <unk> terms while others decline to deal at all
 nissan motor co. 's infiniti division likes to insist that every dealer construct and <unk> a building in a japanese style
 specifications include a <unk> <unk> <unk> at the center of each showroom and a <unk> bridge <unk> a stream that flows into the building from outside
 infiniti has it down to the <unk> says jay <unk> a partner at <unk> power & associates an auto research firm
 toyota motor corp. 's lexus division also provides specifications
 but only two-thirds of lexus dealers are <unk> new buildings according to the lexus <unk>
 some are even coming up with their own novel designs
 in louisville ky. for example david peterson has built a lexus dealership with the showroom on the second floor
 yet some dealers have turned down infiniti or lexus <unk> because they were unwilling or unable to meet the design requirements
 lee seidman of cleveland says infiniti was a bear on <unk> but at least let him <unk> an existing building without the stream
 mr. seidman says he turned down a lexus franchise in part because the building was <unk> but very expensive
 to head off arguments infiniti offers dealers cash bonuses and <unk> construction loans
 <unk> device 's <unk> plays back a lesson
 products <unk> have to be first to be winners
 that 's the lesson offered through one case study featured in a design exhibit
 dictaphone corp. was caught off guard in N when its main competitor <unk> office products of japan introduced a <unk> <unk> recorder half the size of standard <unk> devices
 blocked by patent protection from following suit dictaphone decided to go a step further and cut the <unk> in half again down to the length of a <unk>
--- a/reproduction/HAN-document_classification/README.md
+++ b/reproduction/HAN-document_classification/README.md
@@ -1,36 +0,0 @@
 ## Introduction
 This is the implementation of [Hierarchical Attention Networks for Document Classification](https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf) paper in PyTorch.
 * Dataset is 600k documents extracted from [Yelp 2018](https://www.yelp.com/dataset) customer reviews
 * Use [NLTK](http://www.nltk.org/) and [Stanford CoreNLP](https://stanfordnlp.github.io/CoreNLP/) to tokenize documents and sentences
 * Both CPU & GPU support
 * The best accuracy is 71%, reaching the same performance in the paper
 ## Requirement
 * python 3.6
 * pytorch = 0.3.0
 * numpy
 * gensim
 * nltk
 * coreNLP
 ## Parameters
 According to the paper and experiment, I set model parameters:
 |word embedding dimension|GRU hidden size|GRU layer|word/sentence context vector dimension|
 |---|---|---|---|
 |200|50|1|100|
 And the training parameters:
 |Epoch|learning rate|momentum|batch size|
 |---|---|---|---|
 |3|0.01|0.9|64|
 ## Run
 1. Prepare dataset. Download the [data set](https://www.yelp.com/dataset), and unzip the custom reviews as a file. Use preprocess.py to transform file into data set foe model input.
 2. Train the model. Word enbedding of train data in 'yelp.word2vec'. The model will trained and autosaved in 'model.dict'
 ```
 python train
 ```
 3. Test the model.
 ```
 python evaluate
 ```
--- a/reproduction/HAN-document_classification/init.py
+++ b/reproduction/HAN-document_classification/init.py
--- a/reproduction/HAN-document_classification/evaluate.py
+++ b/reproduction/HAN-document_classification/evaluate.py
@@ -1,45 +0,0 @@
 from model import *
 from train import *
 def evaluate(net, dataset, bactch_size=64, use_cuda=False):
    dataloader = DataLoader(dataset, batch_size=bactch_size, collate_fn=collate, num_workers=0)
    count = 0
    if use_cuda:
        net.cuda()
    for i, batch_samples in enumerate(dataloader):
        x, y = batch_samples
        doc_list = []
        for sample in x:
            doc = []
            for sent_vec in sample:
                if use_cuda:
                    sent_vec = sent_vec.cuda()
                doc.append(Variable(sent_vec, volatile=True))
            doc_list.append(pack_sequence(doc))
        if use_cuda:
            y = y.cuda()
        predicts = net(doc_list)
        p, idx = torch.max(predicts, dim=1)
        idx = idx.data
        count += torch.sum(torch.eq(idx, y))
    return count
 if __name__ == '__main__':
    '''
    Evaluate the performance of models
    '''
    from gensim.models import Word2Vec
    embed_model = Word2Vec.load('yelp.word2vec')
    embedding = Embedding_layer(embed_model.wv, embed_model.wv.vector_size)
    del embed_model
    net = HAN(input_size=200, output_size=5,
              word_hidden_size=50, word_num_layers=1, word_context_size=100,
              sent_hidden_size=50, sent_num_layers=1, sent_context_size=100)
    net.load_state_dict(torch.load('models.dict'))
    test_dataset = YelpDocSet('reviews', 199, 4, embedding)
    correct = evaluate(net, test_dataset, True)
    print('accuracy {}'.format(correct / len(test_dataset)))
--- a/reproduction/HAN-document_classification/model.py
+++ b/reproduction/HAN-document_classification/model.py
@@ -1,113 +0,0 @@
 import torch
 import torch.nn as nn
 from torch.autograd import Variable
 def pack_sequence(tensor_seq, padding_value=0.0):
    if len(tensor_seq) <= 0:
        return
    length = [v.size(0) for v in tensor_seq]
    max_len = max(length)
    size = [len(tensor_seq), max_len]
    size.extend(list(tensor_seq[0].size()[1:]))
    ans = torch.Tensor(*size).fill_(padding_value)
    if tensor_seq[0].data.is_cuda:
        ans = ans.cuda()
    ans = Variable(ans)
    for i, v in enumerate(tensor_seq):
        ans[i, :length[i], :] = v
    return ans
 class HAN(nn.Module):
    def __init__(self, input_size, output_size,
                 word_hidden_size, word_num_layers, word_context_size,
                 sent_hidden_size, sent_num_layers, sent_context_size):
        super(HAN, self).__init__()
        self.word_layer = AttentionNet(input_size,
                                       word_hidden_size,
                                       word_num_layers,
                                       word_context_size)
        self.sent_layer = AttentionNet(2 * word_hidden_size,
                                       sent_hidden_size,
                                       sent_num_layers,
                                       sent_context_size)
        self.output_layer = nn.Linear(2 * sent_hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
    def forward(self, batch_doc):
        # input is a sequence of matrix
        doc_vec_list = []
        for doc in batch_doc:
            sent_mat = self.word_layer(doc)  # doc's dim (num_sent, seq_len, word_dim)
            doc_vec_list.append(sent_mat)  # sent_mat's dim (num_sent, vec_dim)
        doc_vec = self.sent_layer(pack_sequence(doc_vec_list))
        output = self.softmax(self.output_layer(doc_vec))
        return output
 class AttentionNet(nn.Module):
    def __init__(self, input_size, gru_hidden_size, gru_num_layers, context_vec_size):
        super(AttentionNet, self).__init__()
        self.input_size = input_size
        self.gru_hidden_size = gru_hidden_size
        self.gru_num_layers = gru_num_layers
        self.context_vec_size = context_vec_size
        # Encoder
        self.gru = nn.GRU(input_size=input_size,
                          hidden_size=gru_hidden_size,
                          num_layers=gru_num_layers,
                          batch_first=True,
                          bidirectional=True)
        # Attention
        self.fc = nn.Linear(2 * gru_hidden_size, context_vec_size)
        self.tanh = nn.Tanh()
        self.softmax = nn.Softmax(dim=1)
        # context vector
        self.context_vec = nn.Parameter(torch.Tensor(context_vec_size, 1))
        self.context_vec.data.uniform_(-0.1, 0.1)
    def forward(self, inputs):
        # GRU part
        h_t, hidden = self.gru(inputs)  # inputs's dim (batch_size, seq_len,  word_dim)
        u = self.tanh(self.fc(h_t))
        # Attention part
        alpha = self.softmax(torch.matmul(u, self.context_vec))  # u's dim (batch_size, seq_len, context_vec_size)
        output = torch.bmm(torch.transpose(h_t, 1, 2), alpha)  # alpha's dim (batch_size, seq_len, 1)
        return torch.squeeze(output, dim=2)  # output's dim (batch_size, 2*hidden_size, 1)
 if __name__ == '__main__':
    '''
    Test the models correctness
    '''
    import numpy as np
    use_cuda = True
    net = HAN(input_size=200, output_size=5,
              word_hidden_size=50, word_num_layers=1, word_context_size=100,
              sent_hidden_size=50, sent_num_layers=1, sent_context_size=100)
    optimizer = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
    criterion = nn.NLLLoss()
    test_time = 10
    batch_size = 64
    if use_cuda:
        net.cuda()
    print('test training')
    for step in range(test_time):
        x_data = [torch.randn(np.random.randint(1, 10), 200, 200) for i in range(batch_size)]
        y_data = torch.LongTensor([np.random.randint(0, 5) for i in range(batch_size)])
        if use_cuda:
            x_data = [x_i.cuda() for x_i in x_data]
            y_data = y_data.cuda()
        x = [Variable(x_i) for x_i in x_data]
        y = Variable(y_data)
        predict = net(x)
        loss = criterion(predict, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(loss.data[0])
--- a/reproduction/HAN-document_classification/preprocess.py
+++ b/reproduction/HAN-document_classification/preprocess.py
@@ -1,50 +0,0 @@
 ''''
    Tokenize yelp dataset's documents using stanford core nlp
 '''
 import json
 import os
 import pickle
 import nltk
 from nltk.tokenize import stanford
 input_filename = 'review.json'
 # config for stanford core nlp
 os.environ['JAVAHOME'] = 'D:\\java\\bin\\java.exe'
 path_to_jar = 'E:\\College\\fudanNLP\\stanford-corenlp-full-2018-02-27\\stanford-corenlp-3.9.1.jar'
 tokenizer = stanford.CoreNLPTokenizer()
 in_dirname = 'review'
 out_dirname = 'reviews'
 f = open(input_filename, encoding='utf-8')
 samples = []
 j = 0
 for i, line in enumerate(f.readlines()):
    review = json.loads(line)
    samples.append((review['stars'], review['text']))
    if (i + 1) % 5000 == 0:
        print(i)
        pickle.dump(samples, open(in_dirname + '/samples%d.pkl' % j, 'wb'))
        j += 1
        samples = []
 pickle.dump(samples, open(in_dirname + '/samples%d.pkl' % j, 'wb'))
 # samples = pickle.load(open(out_dirname + '/samples0.pkl', 'rb'))
 # print(samples[0])
 for fn in os.listdir(in_dirname):
    print(fn)
    precessed = []
    for stars, text in pickle.load(open(os.path.join(in_dirname, fn), 'rb')):
        tokens = []
        sents = nltk.tokenize.sent_tokenize(text)
        for s in sents:
            tokens.append(tokenizer.tokenize(s))
        precessed.append((stars, tokens))
        # print(tokens)
        if len(precessed) % 100 == 0:
            print(len(precessed))
    pickle.dump(precessed, open(os.path.join(out_dirname, fn), 'wb'))
--- a/reproduction/HAN-document_classification/train.py
+++ b/reproduction/HAN-document_classification/train.py
@@ -1,171 +0,0 @@
 import os
 import pickle
 import numpy as np
 import torch
 from model import *
 class SentIter:
    def __init__(self, dirname, count):
        self.dirname = dirname
        self.count = int(count)
    def __iter__(self):
        for f in os.listdir(self.dirname)[:self.count]:
            with open(os.path.join(self.dirname, f), 'rb') as f:
                for y, x in pickle.load(f):
                    for sent in x:
                        yield sent
 def train_word_vec():
    # load data
    dirname = 'reviews'
    sents = SentIter(dirname, 238)
    # define models and train
    model = models.Word2Vec(size=200, sg=0, workers=4, min_count=5)
    model.build_vocab(sents)
    model.train(sents, total_examples=model.corpus_count, epochs=10)
    model.save('yelp.word2vec')
    print(model.wv.similarity('woman', 'man'))
    print(model.wv.similarity('nice', 'awful'))
 class Embedding_layer:
    def __init__(self, wv, vector_size):
        self.wv = wv
        self.vector_size = vector_size
    def get_vec(self, w):
        try:
            v = self.wv[w]
        except KeyError as e:
            v = np.random.randn(self.vector_size)
        return v
 from torch.utils.data import DataLoader, Dataset
 class YelpDocSet(Dataset):
    def __init__(self, dirname, start_file, num_files, embedding):
        self.dirname = dirname
        self.num_files = num_files
        self._files = os.listdir(dirname)[start_file:start_file + num_files]
        self.embedding = embedding
        self._cache = [(-1, None) for i in range(5)]
    def get_doc(self, n):
        file_id = n // 5000
        idx = file_id % 5
        if self._cache[idx][0] != file_id:
            with open(os.path.join(self.dirname, self._files[file_id]), 'rb') as f:
                self._cache[idx] = (file_id, pickle.load(f))
        y, x = self._cache[idx][1][n % 5000]
        sents = []
        for s_list in x:
            sents.append(' '.join(s_list))
        x = '\n'.join(sents)
        return x, y - 1
    def __len__(self):
        return len(self._files) * 5000
    def __getitem__(self, n):
        file_id = n // 5000
        idx = file_id % 5
        if self._cache[idx][0] != file_id:
            print('load {} to {}'.format(file_id, idx))
            with open(os.path.join(self.dirname, self._files[file_id]), 'rb') as f:
                self._cache[idx] = (file_id, pickle.load(f))
        y, x = self._cache[idx][1][n % 5000]
        doc = []
        for sent in x:
            if len(sent) == 0:
                continue
            sent_vec = []
            for word in sent:
                vec = self.embedding.get_vec(word)
                sent_vec.append(vec.tolist())
            sent_vec = torch.Tensor(sent_vec)
            doc.append(sent_vec)
        if len(doc) == 0:
            doc = [torch.zeros(1, 200)]
        return doc, y - 1
 def collate(iterable):
    y_list = []
    x_list = []
    for x, y in iterable:
        y_list.append(y)
        x_list.append(x)
    return x_list, torch.LongTensor(y_list)
 def train(net, dataset, num_epoch, batch_size, print_size=10, use_cuda=False):
    optimizer = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
    criterion = nn.NLLLoss()
    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
                            collate_fn=collate,
                            num_workers=0)
    running_loss = 0.0
    if use_cuda:
        net.cuda()
    print('start training')
    for epoch in range(num_epoch):
        for i, batch_samples in enumerate(dataloader):
            x, y = batch_samples
            doc_list = []
            for sample in x:
                doc = []
                for sent_vec in sample:
                    if use_cuda:
                        sent_vec = sent_vec.cuda()
                    doc.append(Variable(sent_vec))
                doc_list.append(pack_sequence(doc))
            if use_cuda:
                y = y.cuda()
            y = Variable(y)
            predict = net(doc_list)
            loss = criterion(predict, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.data[0]
            if i % print_size == print_size - 1:
                print('{}, {}'.format(i + 1, running_loss / print_size))
                running_loss = 0.0
                torch.save(net.state_dict(), 'models.dict')
    torch.save(net.state_dict(), 'models.dict')
 if __name__ == '__main__':
    '''
    Train process
    '''
    from gensim.models import Word2Vec
    from gensim import models
    train_word_vec()
    embed_model = Word2Vec.load('yelp.word2vec')
    embedding = Embedding_layer(embed_model.wv, embed_model.wv.vector_size)
    del embed_model
    start_file = 0
    dataset = YelpDocSet('reviews', start_file, 120 - start_file, embedding)
    print('training data size {}'.format(len(dataset)))
    net = HAN(input_size=200, output_size=5,
              word_hidden_size=50, word_num_layers=1, word_context_size=100,
              sent_hidden_size=50, sent_num_layers=1, sent_context_size=100)
    try:
        net.load_state_dict(torch.load('models.dict'))
        print("last time trained models has loaded")
    except Exception:
        print("cannot load models, train the inital models")
    train(net, dataset, num_epoch=5, batch_size=64, use_cuda=True)
--- a/reproduction/Summmarization/BertSum/callback.py
+++ b/reproduction/Summmarization/BertSum/callback.py
@@ -0,0 +1,129 @@
 import os
 import torch
 import sys
 from torch import nn
 from fastNLP.core.callback import Callback
 from fastNLP.core.utils import _get_model_device
 class MyCallback(Callback):
    def __init__(self, args):
        super(MyCallback, self).__init__()
        self.args = args
        self.real_step = 0
    def on_step_end(self):
        if self.step % self.update_every == 0 and self.step > 0:
            self.real_step += 1
            cur_lr = self.args.max_lr * 100 * min(self.real_step ** (-0.5), self.real_step * self.args.warmup_steps**(-1.5))
            for param_group in self.optimizer.param_groups:
                param_group['lr'] = cur_lr
            if self.real_step % 1000 == 0:
                self.pbar.write('Current learning rate is {:.8f}, real_step: {}'.format(cur_lr, self.real_step))
    def on_epoch_end(self):
        self.pbar.write('Epoch {} is done !!!'.format(self.epoch))
 def _save_model(model, model_name, save_dir, only_param=False):
    """ 存储不含有显卡信息的 state_dict 或 model
    :param model:
    :param model_name:
    :param save_dir: 保存的 directory
    :param only_param:
    :return:
    """
    model_path = os.path.join(save_dir, model_name)
    if not os.path.isdir(save_dir):
        os.makedirs(save_dir, exist_ok=True)
    if isinstance(model, nn.DataParallel):
        model = model.module
    if only_param:
        state_dict = model.state_dict()
        for key in state_dict:
            state_dict[key] = state_dict[key].cpu()
        torch.save(state_dict, model_path)
    else:
        _model_device = _get_model_device(model)
        model.cpu()
        torch.save(model, model_path)
        model.to(_model_device)
 class SaveModelCallback(Callback):
    """
    由于Trainer在训练过程中只会保存最佳的模型， 该 callback 可实现多种方式的结果存储。
    会根据训练开始的时间戳在 save_dir 下建立文件夹，在再文件夹下存放多个模型
    -save_dir
        -2019-07-03-15-06-36
            -epoch0step20{metric_key}{evaluate_performance}.pt   # metric是给定的metric_key, evaluate_perfomance是性能
            -epoch1step40
        -2019-07-03-15-10-00
            -epoch:0step:20{metric_key}:{evaluate_performance}.pt   # metric是给定的metric_key, evaluate_perfomance是性能
    :param str save_dir: 将模型存放在哪个目录下，会在该目录下创建以时间戳命名的目录，并存放模型
    :param int top: 保存dev表现top多少模型。-1为保存所有模型
    :param bool only_param: 是否只保存模型权重
    :param save_on_exception: 发生exception时，是否保存一份当时的模型
    """
    def __init__(self, save_dir, top=5, only_param=False, save_on_exception=False):
        super().__init__()
        if not os.path.isdir(save_dir):
            raise IsADirectoryError("{} is not a directory.".format(save_dir))
        self.save_dir = save_dir
        if top < 0:
            self.top = sys.maxsize
        else:
            self.top = top
        self._ordered_save_models = []  # List[Tuple], Tuple[0]是metric， Tuple[1]是path。metric是依次变好的，所以从头删
        self.only_param = only_param
        self.save_on_exception = save_on_exception
    def on_train_begin(self):
        self.save_dir = os.path.join(self.save_dir, self.trainer.start_time)
    def on_valid_end(self, eval_result, metric_key, optimizer, is_better_eval):
        metric_value = list(eval_result.values())[0][metric_key]
        self._save_this_model(metric_value)
    def _insert_into_ordered_save_models(self, pair):
        # pair:(metric_value, model_name)
        # 返回save的模型pair与删除的模型pair. pair中第一个元素是metric的值，第二个元素是模型的名称
        index = -1
        for _pair in self._ordered_save_models:
            if _pair[0]>=pair[0] and self.trainer.increase_better:
                break
            if not self.trainer.increase_better and _pair[0]<=pair[0]:
                break
            index += 1
        save_pair = None
        if len(self._ordered_save_models)<self.top or (len(self._ordered_save_models)>=self.top and index!=-1):
            save_pair = pair
            self._ordered_save_models.insert(index+1, pair)
        delete_pair = None
        if len(self._ordered_save_models)>self.top:
            delete_pair = self._ordered_save_models.pop(0)
        return save_pair, delete_pair
    def _save_this_model(self, metric_value):
        name = "epoch:{}_step:{}_{}:{:.6f}.pt".format(self.epoch, self.step, self.trainer.metric_key, metric_value)
        save_pair, delete_pair = self._insert_into_ordered_save_models((metric_value, name))
        if save_pair:
            try:
                _save_model(self.model, model_name=name, save_dir=self.save_dir, only_param=self.only_param)
            except Exception as e:
                print(f"The following exception:{e} happens when saves model to {self.save_dir}.")
        if delete_pair:
            try:
                delete_model_path = os.path.join(self.save_dir, delete_pair[1])
                if os.path.exists(delete_model_path):
                    os.remove(delete_model_path)
            except Exception as e:
                print(f"Fail to delete model {name} at {self.save_dir} caused by exception:{e}.")
    def on_exception(self, exception):
        if self.save_on_exception:
            name = "epoch:{}_step:{}_Exception:{}.pt".format(self.epoch, self.step, exception.__class__.__name__)
            _save_model(self.model, model_name=name, save_dir=self.save_dir, only_param=self.only_param)
--- a/reproduction/Summmarization/BertSum/dataloader.py
+++ b/reproduction/Summmarization/BertSum/dataloader.py
@@ -0,0 +1,157 @@
 from time import time
 from datetime import timedelta
 from fastNLP.io.dataset_loader import JsonLoader
 from fastNLP.modules.encoder._bert import BertTokenizer
 from fastNLP.io.base_loader import DataInfo
 from fastNLP.core.const import Const
 class BertData(JsonLoader):
    def __init__(self, max_nsents=60, max_ntokens=100, max_len=512):
        fields = {'article': 'article', 
                  'label': 'label'}
        super(BertData, self).__init__(fields=fields)
        self.max_nsents = max_nsents
        self.max_ntokens = max_ntokens
        self.max_len = max_len
        self.tokenizer = BertTokenizer.from_pretrained('/path/to/uncased_L-12_H-768_A-12')
        self.cls_id = self.tokenizer.vocab['[CLS]']
        self.sep_id = self.tokenizer.vocab['[SEP]']
        self.pad_id = self.tokenizer.vocab['[PAD]']
    def _load(self, paths):
        dataset = super(BertData, self)._load(paths)
        return dataset
    def process(self, paths):
        def truncate_articles(instance, max_nsents=self.max_nsents, max_ntokens=self.max_ntokens):
            article = [' '.join(sent.lower().split()[:max_ntokens]) for sent in instance['article']]
            return article[:max_nsents]
        def truncate_labels(instance):
            label = list(filter(lambda x: x < len(instance['article']), instance['label']))
            return label
        def bert_tokenize(instance, tokenizer, max_len, pad_value):
            article = instance['article']
            article = ' [SEP] [CLS] '.join(article)
            word_pieces = tokenizer.tokenize(article)[:(max_len - 2)]
            word_pieces = ['[CLS]'] + word_pieces + ['[SEP]']
            token_ids = tokenizer.convert_tokens_to_ids(word_pieces)
            while len(token_ids) < max_len:
                token_ids.append(pad_value)
            assert len(token_ids) == max_len
            return token_ids
        def get_seg_id(instance, max_len, sep_id):
            _segs = [-1] + [i for i, idx in enumerate(instance['article']) if idx == sep_id]
            segs = [_segs[i] - _segs[i - 1] for i in range(1, len(_segs))]
            segment_id = []
            for i, length in enumerate(segs):
                if i % 2 == 0:
                    segment_id += length * [0]
                else:
                    segment_id += length * [1]
            while len(segment_id) < max_len:
                segment_id.append(0)
            return segment_id
        def get_cls_id(instance, cls_id):
            classification_id = [i for i, idx in enumerate(instance['article']) if idx == cls_id]
            return classification_id
        def get_labels(instance):
            labels = [0] * len(instance['cls_id'])
            label_idx = list(filter(lambda x: x < len(instance['cls_id']), instance['label']))
            for idx in label_idx:
                labels[idx] = 1
            return labels
        datasets = {}
        for name in paths:
            datasets[name] = self._load(paths[name])
            # remove empty samples
            datasets[name].drop(lambda ins: len(ins['article']) == 0 or len(ins['label']) == 0)
            # truncate articles
            datasets[name].apply(lambda ins: truncate_articles(ins, self.max_nsents, self.max_ntokens), new_field_name='article')
            # truncate labels
            datasets[name].apply(truncate_labels, new_field_name='label')
            # tokenize and convert tokens to id
            datasets[name].apply(lambda ins: bert_tokenize(ins, self.tokenizer, self.max_len, self.pad_id), new_field_name='article')
            # get segment id
            datasets[name].apply(lambda ins: get_seg_id(ins, self.max_len, self.sep_id), new_field_name='segment_id')
            # get classification id
            datasets[name].apply(lambda ins: get_cls_id(ins, self.cls_id), new_field_name='cls_id')
            # get label
            datasets[name].apply(get_labels, new_field_name='label')
            # rename filed
            datasets[name].rename_field('article', Const.INPUTS(0))
            datasets[name].rename_field('segment_id', Const.INPUTS(1))
            datasets[name].rename_field('cls_id', Const.INPUTS(2))
            datasets[name].rename_field('lbael', Const.TARGET)
            # set input and target
            datasets[name].set_input(Const.INPUTS(0), Const.INPUTS(1), Const.INPUTS(2))
            datasets[name].set_target(Const.TARGET)
            # set paddding value
            datasets[name].set_pad_val('article', 0)
        return DataInfo(datasets=datasets)
 class BertSumLoader(JsonLoader):
    def __init__(self):
        fields = {'article': 'article',
               'segment_id': 'segment_id',
                   'cls_id': 'cls_id',
                    'label': Const.TARGET
                 }
        super(BertSumLoader, self).__init__(fields=fields)
    def _load(self, paths):
        dataset = super(BertSumLoader, self)._load(paths)
        return dataset
    def process(self, paths):
        def get_seq_len(instance):
            return len(instance['article'])
        print('Start loading datasets !!!')
        start = time()
        # load datasets
        datasets = {}
        for name in paths:
            datasets[name] = self._load(paths[name])
            datasets[name].apply(get_seq_len, new_field_name='seq_len')
            # set input and target
            datasets[name].set_input('article', 'segment_id', 'cls_id')
            datasets[name].set_target(Const.TARGET)
            # set padding value
            datasets[name].set_pad_val('article', 0)
            datasets[name].set_pad_val('segment_id', 0)
            datasets[name].set_pad_val('cls_id', -1)
            datasets[name].set_pad_val(Const.TARGET, 0)
        print('Finished in {}'.format(timedelta(seconds=time()-start)))
        return DataInfo(datasets=datasets)
--- a/reproduction/Summmarization/BertSum/metrics.py
+++ b/reproduction/Summmarization/BertSum/metrics.py
@@ -0,0 +1,178 @@
 import numpy as np
 import json
 from os.path import join
 import torch
 import logging
 import tempfile
 import subprocess as sp
 from datetime import timedelta
 from time import time
 from pyrouge import Rouge155
 from pyrouge.utils import log
 from fastNLP.core.losses import LossBase
 from fastNLP.core.metrics import MetricBase
 _ROUGE_PATH = '/path/to/RELEASE-1.5.5'
 class MyBCELoss(LossBase):      
    def __init__(self, pred=None, target=None, mask=None):
        super(MyBCELoss, self).__init__()
        self._init_param_map(pred=pred, target=target, mask=mask)
        self.loss_func = torch.nn.BCELoss(reduction='none')
    def get_loss(self, pred, target, mask):
        loss = self.loss_func(pred, target.float())
        loss = (loss * mask.float()).sum()
        return loss
 class LossMetric(MetricBase):
    def __init__(self, pred=None, target=None, mask=None):
        super(LossMetric, self).__init__()
        self._init_param_map(pred=pred, target=target, mask=mask)
        self.loss_func = torch.nn.BCELoss(reduction='none')
        self.avg_loss = 0.0
        self.nsamples = 0
    def evaluate(self, pred, target, mask):
        batch_size = pred.size(0)
        loss = self.loss_func(pred, target.float())
        loss = (loss * mask.float()).sum()
        self.avg_loss += loss
        self.nsamples += batch_size
    def get_metric(self, reset=True):
        self.avg_loss = self.avg_loss / self.nsamples
        eval_result = {'loss': self.avg_loss}
        if reset:
            self.avg_loss = 0
            self.nsamples = 0
        return eval_result
 class RougeMetric(MetricBase):
    def __init__(self, data_path, dec_path, ref_path, n_total, n_ext=3, ngram_block=3, pred=None, target=None, mask=None):
        super(RougeMetric, self).__init__()
        self._init_param_map(pred=pred, target=target, mask=mask)
        self.data_path   = data_path
        self.dec_path    = dec_path
        self.ref_path    = ref_path
        self.n_total     = n_total
        self.n_ext       = n_ext
        self.ngram_block = ngram_block
        self.cur_idx = 0
        self.ext = []
        self.start = time()
    @staticmethod
    def eval_rouge(dec_dir, ref_dir):
        assert _ROUGE_PATH is not None
        log.get_global_console_logger().setLevel(logging.WARNING)
        dec_pattern = '(\d+).dec'
        ref_pattern = '#ID#.ref'
        cmd = '-c 95 -r 1000 -n 2 -m'
        with tempfile.TemporaryDirectory() as tmp_dir:
            Rouge155.convert_summaries_to_rouge_format(
                dec_dir, join(tmp_dir, 'dec'))
            Rouge155.convert_summaries_to_rouge_format(
                ref_dir, join(tmp_dir, 'ref'))
            Rouge155.write_config_static(
                join(tmp_dir, 'dec'), dec_pattern,
                join(tmp_dir, 'ref'), ref_pattern,
                join(tmp_dir, 'settings.xml'), system_id=1
            )
            cmd = (join(_ROUGE_PATH, 'ROUGE-1.5.5.pl')
                + ' -e {} '.format(join(_ROUGE_PATH, 'data'))
                + cmd
                + ' -a {}'.format(join(tmp_dir, 'settings.xml')))
            output = sp.check_output(cmd.split(' '), universal_newlines=True)
            R_1 = float(output.split('\n')[3].split(' ')[3])
            R_2 = float(output.split('\n')[7].split(' ')[3])
            R_L = float(output.split('\n')[11].split(' ')[3])
            print(output)
        return R_1, R_2, R_L
    def evaluate(self, pred, target, mask):
        pred = pred + mask.float()
        pred = pred.cpu().data.numpy()
        ext_ids = np.argsort(-pred, 1)
        for sent_id in ext_ids:
            self.ext.append(sent_id)
        self.cur_idx += 1
        print('{}/{} ({:.2f}%) decoded in {} seconds\r'.format(
              self.cur_idx, self.n_total, self.cur_idx/self.n_total*100, timedelta(seconds=int(time()-self.start))
             ), end='')
    def get_metric(self, use_ngram_block=True, reset=True):
        def check_n_gram(sentence, n, dic):
            tokens = sentence.split(' ')
            s_len = len(tokens)
            for i in range(s_len):
                if i + n > s_len:
                    break
                if ' '.join(tokens[i: i + n]) in dic:
                    return False
            return True # no n_gram overlap
        # load original data
        data = []
        with open(self.data_path) as f:
            for line in f:
                cur_data = json.loads(line)
                if 'text' in cur_data:
                    new_data = {}
                    new_data['article'] = cur_data['text']
                    new_data['abstract'] = cur_data['summary']
                    data.append(new_data)
                else:
                    data.append(cur_data)
        # write decode sentences and references
        if use_ngram_block == True:
            print('\nStart {}-gram blocking !!!'.format(self.ngram_block))
        for i, ext_ids in enumerate(self.ext):
            dec, ref = [], []
            if use_ngram_block == False:
                n_sent = min(len(data[i]['article']), self.n_ext)
                for j in range(n_sent):
                    idx = ext_ids[j]
                    dec.append(data[i]['article'][idx])
            else:
                n_sent = len(ext_ids)
                dic = {}
                for j in range(n_sent):
                    sent = data[i]['article'][ext_ids[j]]
                    if check_n_gram(sent, self.ngram_block, dic) == True:
                        dec.append(sent)
                        # update dic
                        tokens = sent.split(' ')
                        s_len = len(tokens)
                        for k in range(s_len):
                            if k + self.ngram_block > s_len:
                                break
                            dic[' '.join(tokens[k: k + self.ngram_block])] = 1
                        if len(dec) >= self.n_ext:
                            break
            for sent in data[i]['abstract']:
                ref.append(sent)
            with open(join(self.dec_path, '{}.dec'.format(i)), 'w') as f:
                for sent in dec:
                    print(sent, file=f)
            with open(join(self.ref_path, '{}.ref'.format(i)), 'w') as f:
                for sent in ref:
                    print(sent, file=f)
        print('\nStart evaluating ROUGE score !!!')
        R_1, R_2, R_L = RougeMetric.eval_rouge(self.dec_path, self.ref_path)
        eval_result = {'ROUGE-1': R_1, 'ROUGE-2': R_2, 'ROUGE-L':R_L}
        if reset == True:
            self.cur_idx = 0
            self.ext = []
            self.start = time()
        return eval_result
--- a/reproduction/Summmarization/BertSum/model.py
+++ b/reproduction/Summmarization/BertSum/model.py
@@ -0,0 +1,51 @@
 import torch
 from torch import nn
 from torch.nn import init
 from fastNLP.modules.encoder._bert import BertModel
 class Classifier(nn.Module):
    def __init__(self, hidden_size):
        super(Classifier, self).__init__()
        self.linear = nn.Linear(hidden_size, 1)
        self.sigmoid = nn.Sigmoid()
    def forward(self, inputs, mask_cls):
        h = self.linear(inputs).squeeze(-1) # [batch_size, seq_len]
        sent_scores = self.sigmoid(h) * mask_cls.float()
        return sent_scores
 class BertSum(nn.Module):
    def __init__(self, hidden_size=768):
        super(BertSum, self).__init__()
        self.hidden_size = hidden_size
        self.encoder = BertModel.from_pretrained('/path/to/uncased_L-12_H-768_A-12')
        self.decoder = Classifier(self.hidden_size)
    def forward(self, article, segment_id, cls_id):
        # print(article.device)
        # print(segment_id.device)
        # print(cls_id.device)
        input_mask = 1 - (article == 0)
        mask_cls = 1 - (cls_id == -1)
        assert input_mask.size() == article.size()
        assert mask_cls.size() == cls_id.size()
        bert_out = self.encoder(article, token_type_ids=segment_id, attention_mask=input_mask)
        bert_out = bert_out[0][-1] # last layer
        sent_emb = bert_out[torch.arange(bert_out.size(0)).unsqueeze(1), cls_id]
        sent_emb = sent_emb * mask_cls.unsqueeze(-1).float()
        assert sent_emb.size() == (article.size(0), cls_id.size(1), self.hidden_size) # [batch_size, seq_len, hidden_size]
        sent_scores = self.decoder(sent_emb, mask_cls) # [batch_size, seq_len]
        assert sent_scores.size() == (article.size(0), cls_id.size(1))
        return {'pred': sent_scores, 'mask': mask_cls}
--- a/reproduction/Summmarization/BertSum/train_BertSum.py
+++ b/reproduction/Summmarization/BertSum/train_BertSum.py
@@ -0,0 +1,147 @@
 import sys
 import argparse
 import os
 import json
 import torch
 from time import time
 from datetime import timedelta
 from os.path import join, exists
 from torch.optim import Adam
 from utils import get_data_path, get_rouge_path
 from dataloader import BertSumLoader
 from model import BertSum
 from fastNLP.core.optimizer import AdamW
 from metrics import MyBCELoss, LossMetric, RougeMetric
 from fastNLP.core.sampler import BucketSampler
 from callback import MyCallback, SaveModelCallback
 from fastNLP.core.trainer import Trainer
 from fastNLP.core.tester import Tester
 def configure_training(args):
    devices = [int(gpu) for gpu in args.gpus.split(',')]
    params = {}
    params['label_type']   = args.label_type
    params['batch_size']   = args.batch_size
    params['accum_count']  = args.accum_count
    params['max_lr']       = args.max_lr
    params['warmup_steps'] = args.warmup_steps
    params['n_epochs']     = args.n_epochs
    params['valid_steps']  = args.valid_steps
    return devices, params
 def train_model(args):
    # check if the data_path and save_path exists
    data_paths = get_data_path(args.mode, args.label_type)
    for name in data_paths:
        assert exists(data_paths[name])
    if not exists(args.save_path):
        os.makedirs(args.save_path)
    # load summarization datasets
    datasets = BertSumLoader().process(data_paths)
    print('Information of dataset is:')
    print(datasets)
    train_set = datasets.datasets['train']
    valid_set = datasets.datasets['val']
    # configure training
    devices, train_params = configure_training(args)
    with open(join(args.save_path, 'params.json'), 'w') as f:
        json.dump(train_params, f, indent=4)
    print('Devices is:')
    print(devices)
    # configure model
    model = BertSum()
    optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0)
    callbacks = [MyCallback(args), SaveModelCallback(args.save_path)]
    criterion = MyBCELoss()
    val_metric = [LossMetric()]
    # sampler = BucketSampler(num_buckets=32, batch_size=args.batch_size)
    trainer = Trainer(train_data=train_set, model=model, optimizer=optimizer,
                      loss=criterion, batch_size=args.batch_size, # sampler=sampler, 
                      update_every=args.accum_count, n_epochs=args.n_epochs, 
                      print_every=100, dev_data=valid_set, metrics=val_metric, 
                      metric_key='-loss', validate_every=args.valid_steps, 
                      save_path=args.save_path, device=devices, callbacks=callbacks)
    print('Start training with the following hyper-parameters:')
    print(train_params)
    trainer.train()
 def test_model(args):
    models = os.listdir(args.save_path)
    # load dataset
    data_paths = get_data_path(args.mode, args.label_type)
    datasets = BertSumLoader().process(data_paths)
    print('Information of dataset is:')
    print(datasets)
    test_set = datasets.datasets['test']
    # only need 1 gpu for testing
    device = int(args.gpus)
    args.batch_size = 1
    for cur_model in models:
        print('Current model is {}'.format(cur_model))
        # load model
        model = torch.load(join(args.save_path, cur_model))
        # configure testing
        original_path, dec_path, ref_path = get_rouge_path(args.label_type)
        test_metric = RougeMetric(data_path=original_path, dec_path=dec_path, 
                                  ref_path=ref_path, n_total = len(test_set))
        tester = Tester(data=test_set, model=model, metrics=[test_metric], 
                        batch_size=args.batch_size, device=device)
        tester.test()
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='training/testing of BertSum(liu et al. 2019)'
    )
    parser.add_argument('--mode', required=True,
                        help='training or testing of BertSum', type=str)
    parser.add_argument('--label_type', default='greedy', 
                        help='greedy/limit', type=str)
    parser.add_argument('--save_path', required=True,
                        help='root of the model', type=str)
    # example for gpus input: '0,1,2,3'
    parser.add_argument('--gpus', required=True,
                        help='available gpus for training(separated by commas)', type=str)
    parser.add_argument('--batch_size', default=18,
                        help='the training batch size', type=int)
    parser.add_argument('--accum_count', default=2,
                        help='number of updates steps to accumulate before performing a backward/update pass.', type=int)
    parser.add_argument('--max_lr', default=2e-5,
                        help='max learning rate for warm up', type=float)
    parser.add_argument('--warmup_steps', default=10000,
                        help='warm up steps for training', type=int)
    parser.add_argument('--n_epochs', default=10,
                        help='total number of training epochs', type=int)
    parser.add_argument('--valid_steps', default=1000,
                        help='number of update steps for checkpoint and validation', type=int)
    args = parser.parse_args()
    if args.mode == 'train':
        print('Training process of BertSum !!!')
        train_model(args)
    else:
        print('Testing process of BertSum !!!')
        test_model(args)
--- a/reproduction/Summmarization/BertSum/utils.py
+++ b/reproduction/Summmarization/BertSum/utils.py
@@ -0,0 +1,24 @@
 import os
 from os.path import exists
 def get_data_path(mode, label_type):
    paths = {}
    if mode == 'train':
        paths['train'] = 'data/' + label_type + '/bert.train.jsonl'
        paths['val'] = 'data/' + label_type + '/bert.val.jsonl'
    else:
        paths['test'] = 'data/' + label_type + '/bert.test.jsonl'
    return paths
 def get_rouge_path(label_type):
    if label_type == 'others':
        data_path = 'data/' + label_type + '/bert.test.jsonl'
    else:
        data_path = 'data/' + label_type + '/test.jsonl'
    dec_path = 'dec'
    ref_path = 'ref'
    if not exists(ref_path):
        os.makedirs(ref_path)
    if not exists(dec_path):
        os.makedirs(dec_path)
    return data_path, dec_path, ref_path
--- a/reproduction/seqence_labelling/ner/data/Conll2003Loader.py
+++ b/reproduction/seqence_labelling/ner/data/Conll2003Loader.py
@@ -1,93 +0,0 @@
 from fastNLP.core.vocabulary import VocabularyOption
 from fastNLP.io.base_loader import DataSetLoader, DataInfo
 from typing import Union, Dict
 from fastNLP import Vocabulary
 from fastNLP import Const
 from reproduction.utils import check_dataloader_paths
 from fastNLP.io.dataset_loader import ConllLoader
 from reproduction.seqence_labelling.ner.data.utils import iob2bioes, iob2
 class Conll2003DataLoader(DataSetLoader):
    def __init__(self, task:str='ner', encoding_type:str='bioes'):
        """
        加载Conll2003格式的英语语料，该数据集的信息可以在https://www.clips.uantwerpen.be/conll2003/ner/找到。当task为pos
            时，返回的DataSet中target取值于第2列; 当task为chunk时，返回的DataSet中target取值于第3列;当task为ner时，返回
            的DataSet中target取值于第4列。所有"-DOCSTART- -X- O O"将被忽略，这会导致数据的数量少于很多文献报道的值，但
            鉴于"-DOCSTART- -X- O O"只是用于文档分割的符号，并不应该作为预测对象，所以我们忽略了数据中的-DOCTSTART-开头的行
        ner与chunk任务读取后的数据的target将为encoding_type类型。pos任务读取后就是pos列的数据。
        :param task: 指定需要标注任务。可选ner, pos, chunk
        """
        assert task in ('ner', 'pos', 'chunk')
        index = {'ner':3, 'pos':1, 'chunk':2}[task]
        self._loader = ConllLoader(headers=['raw_words', 'target'], indexes=[0, index])
        self._tag_converters = None
        if task in ('ner', 'chunk'):
            self._tag_converters = [iob2]
            if encoding_type == 'bioes':
                self._tag_converters.append(iob2bioes)
    def load(self, path: str):
        dataset = self._loader.load(path)
        def convert_tag_schema(tags):
            for converter in self._tag_converters:
                tags = converter(tags)
            return tags
        if self._tag_converters:
            dataset.apply_field(convert_tag_schema, field_name=Const.TARGET, new_field_name=Const.TARGET)
        return dataset
    def process(self, paths: Union[str, Dict[str, str]], word_vocab_opt:VocabularyOption=None, lower:bool=True):
        """
        读取并处理数据。数据中的'-DOCSTART-'开头的行会被忽略
        :param paths:
        :param word_vocab_opt: vocabulary的初始化值
        :param lower: 是否将所有字母转为小写
        :return:
        """
        # 读取数据
        paths = check_dataloader_paths(paths)
        data = DataInfo()
        input_fields = [Const.TARGET, Const.INPUT, Const.INPUT_LEN]
        target_fields = [Const.TARGET, Const.INPUT_LEN]
        for name, path in paths.items():
            dataset = self.load(path)
            dataset.apply_field(lambda words: words, field_name='raw_words', new_field_name=Const.INPUT)
            if lower:
                dataset.words.lower()
            data.datasets[name] = dataset
        # 对construct vocab
        word_vocab = Vocabulary(min_freq=2) if word_vocab_opt is None else Vocabulary(**word_vocab_opt)
        word_vocab.from_dataset(data.datasets['train'], field_name=Const.INPUT,
                                no_create_entry_dataset=[dataset for name, dataset in data.datasets.items() if name!='train'])
        word_vocab.index_dataset(*data.datasets.values(), field_name=Const.INPUT, new_field_name=Const.INPUT)
        data.vocabs[Const.INPUT] = word_vocab
        # cap words
        cap_word_vocab = Vocabulary()
        cap_word_vocab.from_dataset(data.datasets['train'], field_name='raw_words',
                                no_create_entry_dataset=[dataset for name, dataset in data.datasets.items() if name!='train'])
        cap_word_vocab.index_dataset(*data.datasets.values(), field_name='raw_words', new_field_name='cap_words')
        input_fields.append('cap_words')
        data.vocabs['cap_words'] = cap_word_vocab
        # 对target建vocab
        target_vocab = Vocabulary(unknown=None, padding=None)
        target_vocab.from_dataset(*data.datasets.values(), field_name=Const.TARGET)
        target_vocab.index_dataset(*data.datasets.values(), field_name=Const.TARGET)
        data.vocabs[Const.TARGET] = target_vocab
        for name, dataset in data.datasets.items():
            dataset.add_seq_len(Const.INPUT, new_field_name=Const.INPUT_LEN)
            dataset.set_input(*input_fields)
            dataset.set_target(*target_fields)
        return data
 if __name__ == '__main__':
    pass
--- a/reproduction/seqence_labelling/ner/data/OntoNoteLoader.py
+++ b/reproduction/seqence_labelling/ner/data/OntoNoteLoader.py
@@ -1,152 +0,0 @@
 from fastNLP.core.vocabulary import VocabularyOption
 from fastNLP.io.base_loader import DataSetLoader, DataInfo
 from typing import Union, Dict
 from fastNLP import DataSet
 from fastNLP import Vocabulary
 from fastNLP import Const
 from reproduction.utils import check_dataloader_paths
 from fastNLP.io.dataset_loader import ConllLoader
 from reproduction.seqence_labelling.ner.data.utils import iob2bioes, iob2
 class OntoNoteNERDataLoader(DataSetLoader):
    """
    用于读取处理为Conll格式后的OntoNote数据。将OntoNote数据处理为conll格式的过程可以参考https://github.com/yhcc/OntoNotes-5.0-NER。
    """
    def __init__(self, encoding_type:str='bioes'):
        assert encoding_type in ('bioes', 'bio')
        self.encoding_type = encoding_type
        if encoding_type=='bioes':
            self.encoding_method = iob2bioes
        else:
            self.encoding_method = iob2
    def load(self, path:str)->DataSet:
        """
        给定一个文件路径，读取数据。返回的DataSet包含以下的field
            raw_words: List[str]
            target: List[str]
        :param path:
        :return:
        """
        dataset = ConllLoader(headers=['raw_words', 'target'], indexes=[3, 10]).load(path)
        def convert_to_bio(tags):
            bio_tags = []
            flag = None
            for tag in tags:
                label = tag.strip("()*")
                if '(' in tag:
                    bio_label = 'B-' + label
                    flag = label
                elif flag:
                    bio_label = 'I-' + flag
                else:
                    bio_label = 'O'
                if ')' in tag:
                    flag = None
                bio_tags.append(bio_label)
            return self.encoding_method(bio_tags)
        def convert_word(words):
            converted_words = []
            for word in words:
                word = word.replace('/.', '.')  # 有些结尾的.是/.形式的
                if not word.startswith('-'):
                    converted_words.append(word)
                    continue
                # 以下是由于这些符号被转义了，再转回来
                tfrs = {'-LRB-':'(',
                        '-RRB-': ')',
                        '-LSB-': '[',
                        '-RSB-': ']',
                        '-LCB-': '{',
                        '-RCB-': '}'
                        }
                if word in tfrs:
                    converted_words.append(tfrs[word])
                else:
                    converted_words.append(word)
            return converted_words
        dataset.apply_field(convert_word, field_name='raw_words', new_field_name='raw_words')
        dataset.apply_field(convert_to_bio, field_name='target', new_field_name='target')
        return dataset
    def process(self, paths: Union[str, Dict[str, str]], word_vocab_opt:VocabularyOption=None,
                lower:bool=True)->DataInfo:
        """
        读取并处理数据。返回的DataInfo包含以下的内容
            vocabs:
                word: Vocabulary
                target: Vocabulary
            datasets:
                train: DataSet
                    words: List[int], 被设置为input
                    target: int. label，被同时设置为input和target
                    seq_len: int. 句子的长度，被同时设置为input和target
                    raw_words: List[str]
                xxx(根据传入的paths可能有所变化)
        :param paths:
        :param word_vocab_opt: vocabulary的初始化值
        :param lower: 是否使用小写
        :return:
        """
        paths = check_dataloader_paths(paths)
        data = DataInfo()
        input_fields = [Const.TARGET, Const.INPUT, Const.INPUT_LEN]
        target_fields = [Const.TARGET, Const.INPUT_LEN]
        for name, path in paths.items():
            dataset = self.load(path)
            dataset.apply_field(lambda words: words, field_name='raw_words', new_field_name=Const.INPUT)
            if lower:
                dataset.words.lower()
            data.datasets[name] = dataset
        # 对construct vocab
        word_vocab = Vocabulary(min_freq=2) if word_vocab_opt is None else Vocabulary(**word_vocab_opt)
        word_vocab.from_dataset(data.datasets['train'], field_name=Const.INPUT,
                                no_create_entry_dataset=[dataset for name, dataset in data.datasets.items() if name!='train'])
        word_vocab.index_dataset(*data.datasets.values(), field_name=Const.INPUT, new_field_name=Const.INPUT)
        data.vocabs[Const.INPUT] = word_vocab
        # cap words
        cap_word_vocab = Vocabulary()
        cap_word_vocab.from_dataset(*data.datasets.values(), field_name='raw_words')
        cap_word_vocab.index_dataset(*data.datasets.values(), field_name='raw_words', new_field_name='cap_words')
        input_fields.append('cap_words')
        data.vocabs['cap_words'] = cap_word_vocab
        # 对target建vocab
        target_vocab = Vocabulary(unknown=None, padding=None)
        target_vocab.from_dataset(*data.datasets.values(), field_name=Const.TARGET)
        target_vocab.index_dataset(*data.datasets.values(), field_name=Const.TARGET)
        data.vocabs[Const.TARGET] = target_vocab
        for name, dataset in data.datasets.items():
            dataset.add_seq_len(Const.INPUT, new_field_name=Const.INPUT_LEN)
            dataset.set_input(*input_fields)
            dataset.set_target(*target_fields)
        return data
 if __name__ == '__main__':
    loader = OntoNoteNERDataLoader()
    dataset = loader.load('/hdd/fudanNLP/fastNLP/others/data/v4/english/test.txt')
    print(dataset.target.value_count())
    print(dataset[:4])
 """
 train 115812 2200752
 development 15680 304684
 test 12217 230111
 train 92403 1901772
 valid 13606 279180
 test 10258 204135
 """
--- a/reproduction/seqence_labelling/ner/data/utils.py
+++ b/reproduction/seqence_labelling/ner/data/utils.py
@@ -1,49 +0,0 @@
 from typing import List
 def iob2(tags:List[str])->List[str]:
    """
    检查数据是否是合法的IOB数据，如果是IOB1会被自动转换为IOB2。
    :param tags: 需要转换的tags
    """
    for i, tag in enumerate(tags):
        if tag == "O":
            continue
        split = tag.split("-")
        if len(split) != 2 or split[0] not in ["I", "B"]:
            raise TypeError("The encoding schema is not a valid IOB type.")
        if split[0] == "B":
            continue
        elif i == 0 or tags[i - 1] == "O":  # conversion IOB1 to IOB2
            tags[i] = "B" + tag[1:]
        elif tags[i - 1][1:] == tag[1:]:
            continue
        else:  # conversion IOB1 to IOB2
            tags[i] = "B" + tag[1:]
    return tags
 def iob2bioes(tags:List[str])->List[str]:
    """
    将iob的tag转换为bmeso编码
    :param tags:
    :return:
    """
    new_tags = []
    for i, tag in enumerate(tags):
        if tag == 'O':
            new_tags.append(tag)
        else:
            split = tag.split('-')[0]
            if split == 'B':
                if i+1!=len(tags) and tags[i+1].split('-')[0] == 'I':
                    new_tags.append(tag)
                else:
                    new_tags.append(tag.replace('B-', 'S-'))
            elif split == 'I':
                if i + 1<len(tags) and tags[i+1].split('-')[0] == 'I':
                    new_tags.append(tag)
                else:
                    new_tags.append(tag.replace('I-', 'E-'))
            else:
                raise TypeError("Invalid IOB format.")
    return new_tags
--- a/reproduction/text_classification/README.md
+++ b/reproduction/text_classification/README.md
@@ -20,7 +20,7 @@ model name | yelp_p | yelp_f | sst-2|IMDB
 char_cnn | 93.80/95.12 | - | - |- 
 dpcnn | 95.50/97.36 | - | - |- 
 HAN |- | - | - |-
 LSTM| 95.74/- |- |- |88.52/-
 AWD-LSTM| 95.96/- |- |- |88.91/-
 LSTM+self_attention| 96.34/- | - | - |89.53/-
 LSTM| 95.74/- |64.16/- |- |88.52/-
 AWD-LSTM| 95.96/- |64.74/- |- |88.91/-
 LSTM+self_attention| 96.34/- | 65.78/- | - |89.53/-
--- a/reproduction/utils.py
+++ b/reproduction/utils.py
@@ -29,13 +29,15 @@ def check_dataloader_paths(paths:Union[str, Dict[str, str]])->Dict[str, str]:
                    path_pair = ('train', filename)
                if 'dev' in filename:
                    if path_pair:
                        raise Exception("File:{} in {} contains bot `{}` and `dev`.".format(filename, paths, path_pair[0]))
                        raise Exception("File:{} in {} contains both `{}` and `dev`.".format(filename, paths, path_pair[0]))
                    path_pair = ('dev', filename)
                if 'test' in filename:
                    if path_pair:
                        raise Exception("File:{} in {} contains bot `{}` and `test`.".format(filename, paths, path_pair[0]))
                        raise Exception("File:{} in {} contains both `{}` and `test`.".format(filename, paths, path_pair[0]))
                    path_pair = ('test', filename)
                if path_pair:
                    if path_pair[0] in files:
                        raise RuntimeError(f"Multiple file under {paths} have '{path_pair[0]}' in their filename.")
                    files[path_pair[0]] = os.path.join(paths, path_pair[1])
            return files
        else: