From 08399e9b6e1a261636a5bbb54ca1e46bb5e15b49 Mon Sep 17 00:00:00 2001
From: LeeSureman <1349342500@QQ.com>
Date: Thu, 10 Oct 2019 13:02:13 +0800
Subject: [PATCH] debug

---
 .../chinese_ner/LatticeLSTM/README.md         |  39 ++-
 .../chinese_ner/LatticeLSTM/check_output.py   | 252 ------------------
 .../chinese_ner/LatticeLSTM/load_data.py      |  86 +++++-
 .../chinese_ner/LatticeLSTM/main.py           |  44 ++-
 .../chinese_ner/LatticeLSTM/models.py         |  17 +-
 .../chinese_ner/LatticeLSTM/modules.py        |   6 +-
 .../chinese_ner/LatticeLSTM/pathes.py         |   3 +-
 7 files changed, 171 insertions(+), 276 deletions(-)
 delete mode 100644 reproduction/seqence_labelling/chinese_ner/LatticeLSTM/check_output.py

diff --git a/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/README.md b/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/README.md
index 12294e6c..55c1bdee 100644
--- a/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/README.md
+++ b/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/README.md
@@ -1,7 +1,11 @@
+[中文](#支持批并行的LatticeLSTM)
+
+[English](#Batch-Parallel-LatticeLSTM)
 # 支持批并行的LatticeLSTM
 + 原论文：https://arxiv.org/abs/1805.02023
 + 在batch=10时，计算速度已明显超过[原版代码](https://github.com/jiesutd/LatticeLSTM)。
 + 在main.py中添加三个embedding的文件路径以及对应数据集的路径即可运行
++ 此代码集合已加入fastNLP
 
 ## 运行环境：
 + python >= 3.7.3
@@ -18,7 +22,7 @@
 ## 性能：
 |数据集| 目前达到的F1分数(test)|原文中的F1分数(test)|
 |:----:|:----:|:----:|
-|Weibo|62.73|58.79|
+|Weibo|58.66|58.79|
 |Resume|95.18|94.46|
 |Ontonote|73.62|73.88|
 
@@ -26,3 +30,36 @@
 
 ## 如有任何疑问请联系：
 + lixiaonan_xdu@outlook.com
+
+---
+
+# Batch Parallel LatticeLSTM
++ paper：https://arxiv.org/abs/1805.02023
++ when batch is 10，the computation efficiency exceeds that of [original code](https://github.com/jiesutd/LatticeLSTM)。
++ set the path of embeddings and corpus before you run main.py
++ this code set has been added to fastNLP
+
+## Environment：
++ python >= 3.7.3
++ fastNLP >= dev.0.5.0
++ pytorch >= 1.1.0
++ numpy >= 1.16.4
++ fitlog >= 0.2.0
+
+## Dataset：
++ Resume，downloaded from [here](https://github.com/jiesutd/LatticeLSTM)
++ Ontonote
++ [Weibo](https://github.com/hltcoe/golden-horse)
+
+to those unincluded dataset, you can write the interface function whose output form is like *load_ontonotes4ner* in load_data.py
+
+## Performance：
+|Dataset|F1 of my code(test)|F1 in paper(test)|
+|:----:|:----:|:----:|
+|Weibo|58.66|58.79|
+|Resume|95.18|94.46|
+|Ontonote|73.62|73.88|
+
+PS:The Weibo dataset I use is V2, namely revised version.
+## If any confusion, please contact：
++ lixiaonan_xdu@outlook.com
diff --git a/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/check_output.py b/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/check_output.py
deleted file mode 100644
index fa8aeae3..00000000
--- a/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/check_output.py
+++ /dev/null
@@ -1,252 +0,0 @@
-import torch.nn as nn
-from pathes import *
-from load_data import load_ontonotes4ner,equip_chinese_ner_with_skip,load_yangjie_rich_pretrain_word_list,load_resume_ner
-from fastNLP.embeddings import StaticEmbedding
-from models import LatticeLSTM_SeqLabel,LSTM_SeqLabel,LatticeLSTM_SeqLabel_V1
-from fastNLP import CrossEntropyLoss,SpanFPreRecMetric,Trainer,AccuracyMetric,LossInForward
-import torch.optim as optim
-import argparse
-import torch
-import sys
-from utils_ import LatticeLexiconPadder,SpanFPreRecMetric_YJ
-from fastNLP import Tester
-import fitlog
-from fastNLP.core.callback import FitlogCallback
-from utils import set_seed
-import os
-from fastNLP import LRScheduler
-from torch.optim.lr_scheduler import LambdaLR
-
-
-
-# sys.path.append('.')
-# sys.path.append('..')
-# for p in sys.path:
-#     print(p)
-# fitlog.add_hyper_in_file (__file__) # record your hyperparameters
-########hyper
-
-########hyper
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--device',default='cpu')
-parser.add_argument('--debug',default=True)
-
-parser.add_argument('--batch',default=1)
-parser.add_argument('--test_batch',default=1024)
-parser.add_argument('--optim',default='sgd',help='adam|sgd')
-parser.add_argument('--lr',default=0.015)
-parser.add_argument('--model',default='lattice',help='lattice|lstm')
-parser.add_argument('--skip_before_head',default=False)#in paper it's false
-parser.add_argument('--hidden',default=100)
-parser.add_argument('--momentum',default=0)
-parser.add_argument('--bi',default=True)
-parser.add_argument('--dataset',default='ontonote',help='resume|ontonote|weibo|msra')
-parser.add_argument('--use_bigram',default=False)
-
-parser.add_argument('--embed_dropout',default=0)
-parser.add_argument('--output_dropout',default=0)
-parser.add_argument('--epoch',default=100)
-parser.add_argument('--seed',default=100)
-
-args = parser.parse_args()
-
-set_seed(args.seed)
-
-fit_msg_list = [args.model,'bi' if args.bi else 'uni',str(args.batch)]
-if args.model == 'lattice':
-    fit_msg_list.append(str(args.skip_before_head))
-fit_msg = ' '.join(fit_msg_list)
-# fitlog.commit(__file__,fit_msg=fit_msg)
-
-
-# fitlog.add_hyper(args)
-device = torch.device(args.device)
-for k,v in args.__dict__.items():
-    print(k,v)
-
-refresh_data = False
-if args.dataset == 'ontonote':
-    datasets,vocabs,embeddings = load_ontonotes4ner(ontonote4ner_cn_path,yangjie_rich_pretrain_unigram_path,yangjie_rich_pretrain_bigram_path,
-                                                    _refresh=refresh_data,index_token=False)
-elif args.dataset == 'resume':
-    datasets,vocabs,embeddings = load_resume_ner(resume_ner_path,yangjie_rich_pretrain_unigram_path,yangjie_rich_pretrain_bigram_path,
-                                                    _refresh=refresh_data,index_token=False)
-# exit()
-w_list = load_yangjie_rich_pretrain_word_list(yangjie_rich_pretrain_word_path,
-                                              _refresh=refresh_data)
-
-
-
-cache_name = os.path.join('cache',args.dataset+'_lattice')
-datasets,vocabs,embeddings = equip_chinese_ner_with_skip(datasets,vocabs,embeddings,w_list,yangjie_rich_pretrain_word_path,
-                                                         _refresh=refresh_data,_cache_fp=cache_name)
-
-print('中:embedding:{}'.format(embeddings['char'](24)))
-print('embed lookup dropout:{}'.format(embeddings['word'].word_dropout))
-
-# for k, v in datasets.items():
-# #     v.apply_field(lambda x: list(map(len, x)), 'skips_l2r_word', 'lexicon_count')
-# #     v.apply_field(lambda x:
-# #                   list(map(lambda y:
-# #                            list(map(lambda z: vocabs['word'].to_index(z), y)), x)),
-# #                   'skips_l2r_word')
-
-print(datasets['train'][0])
-print('vocab info:')
-for k,v in vocabs.items():
-    print('{}:{}'.format(k,len(v)))
-# print(datasets['dev'][0])
-# print(datasets['test'][0])
-# print(datasets['train'].get_all_fields().keys())
-for k,v in datasets.items():
-    if args.model == 'lattice':
-        v.set_ignore_type('skips_l2r_word','skips_l2r_source','skips_r2l_word', 'skips_r2l_source')
-        if args.skip_before_head:
-            v.set_padder('skips_l2r_word',LatticeLexiconPadder())
-            v.set_padder('skips_l2r_source',LatticeLexiconPadder())
-            v.set_padder('skips_r2l_word',LatticeLexiconPadder())
-            v.set_padder('skips_r2l_source',LatticeLexiconPadder(pad_val_dynamic=True))
-        else:
-            v.set_padder('skips_l2r_word',LatticeLexiconPadder())
-            v.set_padder('skips_r2l_word', LatticeLexiconPadder())
-            v.set_padder('skips_l2r_source', LatticeLexiconPadder(-1))
-            v.set_padder('skips_r2l_source', LatticeLexiconPadder(pad_val_dynamic=True,dynamic_offset=1))
-        if args.bi:
-            v.set_input('chars','bigrams','seq_len',
-                        'skips_l2r_word','skips_l2r_source','lexicon_count',
-                        'skips_r2l_word', 'skips_r2l_source','lexicon_count_back',
-                        'target',
-                        use_1st_ins_infer_dim_type=True)
-        else:
-            v.set_input('chars','bigrams','seq_len',
-                        'skips_l2r_word','skips_l2r_source','lexicon_count',
-                        'target',
-                        use_1st_ins_infer_dim_type=True)
-        v.set_target('target','seq_len')
-
-        v['target'].set_pad_val(0)
-    elif args.model == 'lstm':
-        v.set_ignore_type('skips_l2r_word','skips_l2r_source')
-        v.set_padder('skips_l2r_word',LatticeLexiconPadder())
-        v.set_padder('skips_l2r_source',LatticeLexiconPadder())
-        v.set_input('chars','bigrams','seq_len','target',
-                    use_1st_ins_infer_dim_type=True)
-        v.set_target('target','seq_len')
-
-        v['target'].set_pad_val(0)
-
-print(datasets['dev']['skips_l2r_word'][100])
-
-
-if args.model =='lattice':
-    model = LatticeLSTM_SeqLabel_V1(embeddings['char'],embeddings['bigram'],embeddings['word'],
-                        hidden_size=args.hidden,label_size=len(vocabs['label']),device=args.device,
-                                 embed_dropout=args.embed_dropout,output_dropout=args.output_dropout,
-                                 skip_batch_first=True,bidirectional=args.bi,debug=args.debug,
-                                 skip_before_head=args.skip_before_head,use_bigram=args.use_bigram,
-                                 vocabs=vocabs
-                                 )
-elif args.model == 'lstm':
-    model = LSTM_SeqLabel(embeddings['char'],embeddings['bigram'],embeddings['word'],
-                        hidden_size=args.hidden,label_size=len(vocabs['label']),device=args.device,
-                          bidirectional=args.bi,
-                          embed_dropout=args.embed_dropout,output_dropout=args.output_dropout,
-                          use_bigram=args.use_bigram)
-
-for k,v in model.state_dict().items():
-    print('{}:{}'.format(k,v.size()))
-
-
-
-# exit()
-weight_dict = torch.load(open('/remote-home/xnli/weight_debug/lattice_yangjie.pkl','rb'))
-# print(weight_dict.keys())
-# for k,v in weight_dict.items():
-#     print('{}:{}'.format(k,v.size()))
-def state_dict_param(model):
-    param_list = list(model.named_parameters())
-    print(len(param_list))
-    param_dict = {}
-    for i in range(len(param_list)):
-        param_dict[param_list[i][0]] = param_list[i][1]
-
-    return param_dict
-
-
-def copy_yangjie_lattice_weight(target,source_dict):
-    t = state_dict_param(target)
-    with torch.no_grad():
-        t['encoder.char_cell.weight_ih'].set_(source_dict['lstm.forward_lstm.rnn.weight_ih'])
-        t['encoder.char_cell.weight_hh'].set_(source_dict['lstm.forward_lstm.rnn.weight_hh'])
-        t['encoder.char_cell.alpha_weight_ih'].set_(source_dict['lstm.forward_lstm.rnn.alpha_weight_ih'])
-        t['encoder.char_cell.alpha_weight_hh'].set_(source_dict['lstm.forward_lstm.rnn.alpha_weight_hh'])
-        t['encoder.char_cell.bias'].set_(source_dict['lstm.forward_lstm.rnn.bias'])
-        t['encoder.char_cell.alpha_bias'].set_(source_dict['lstm.forward_lstm.rnn.alpha_bias'])
-        t['encoder.word_cell.weight_ih'].set_(source_dict['lstm.forward_lstm.word_rnn.weight_ih'])
-        t['encoder.word_cell.weight_hh'].set_(source_dict['lstm.forward_lstm.word_rnn.weight_hh'])
-        t['encoder.word_cell.bias'].set_(source_dict['lstm.forward_lstm.word_rnn.bias'])
-
-        t['encoder_back.char_cell.weight_ih'].set_(source_dict['lstm.backward_lstm.rnn.weight_ih'])
-        t['encoder_back.char_cell.weight_hh'].set_(source_dict['lstm.backward_lstm.rnn.weight_hh'])
-        t['encoder_back.char_cell.alpha_weight_ih'].set_(source_dict['lstm.backward_lstm.rnn.alpha_weight_ih'])
-        t['encoder_back.char_cell.alpha_weight_hh'].set_(source_dict['lstm.backward_lstm.rnn.alpha_weight_hh'])
-        t['encoder_back.char_cell.bias'].set_(source_dict['lstm.backward_lstm.rnn.bias'])
-        t['encoder_back.char_cell.alpha_bias'].set_(source_dict['lstm.backward_lstm.rnn.alpha_bias'])
-        t['encoder_back.word_cell.weight_ih'].set_(source_dict['lstm.backward_lstm.word_rnn.weight_ih'])
-        t['encoder_back.word_cell.weight_hh'].set_(source_dict['lstm.backward_lstm.word_rnn.weight_hh'])
-        t['encoder_back.word_cell.bias'].set_(source_dict['lstm.backward_lstm.word_rnn.bias'])
-
-    for k,v in t.items():
-        print('{}:{}'.format(k,v))
-
-copy_yangjie_lattice_weight(model,weight_dict)
-
-# print(vocabs['label'].word2idx.keys())
-
-
-
-
-
-
-
-
-loss = LossInForward()
-
-f1_metric = SpanFPreRecMetric(vocabs['label'],pred='pred',target='target',seq_len='seq_len',encoding_type='bmeso')
-f1_metric_yj = SpanFPreRecMetric_YJ(vocabs['label'],pred='pred',target='target',seq_len='seq_len',encoding_type='bmesoyj')
-acc_metric = AccuracyMetric(pred='pred',target='target',seq_len='seq_len')
-metrics = [f1_metric,f1_metric_yj,acc_metric]
-
-if args.optim == 'adam':
-    optimizer = optim.Adam(model.parameters(),lr=args.lr)
-elif args.optim == 'sgd':
-    optimizer = optim.SGD(model.parameters(),lr=args.lr,momentum=args.momentum)
-
-
-
-# tester = Tester(datasets['dev'],model,metrics=metrics,batch_size=args.test_batch,device=device)
-# test_result = tester.test()
-# print(test_result)
-callbacks = [
-    LRScheduler(lr_scheduler=LambdaLR(optimizer, lambda ep: 1 / (1 + 0.05)**ep))
-]
-print(datasets['train'][:2])
-print(vocabs['char'].to_index('：'))
-# StaticEmbedding
-# datasets['train'] = datasets['train'][1:]
-from fastNLP import SequentialSampler
-trainer = Trainer(datasets['train'],model,
-                  optimizer=optimizer,
-                  loss=loss,
-                  metrics=metrics,
-                  dev_data=datasets['dev'],
-                  device=device,
-                  batch_size=args.batch,
-                  n_epochs=args.epoch,
-                  dev_batch_size=args.test_batch,
-                  callbacks=callbacks,
-                  check_code_level=-1,
-                  sampler=SequentialSampler())
-
-trainer.train()
\ No newline at end of file
diff --git a/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/load_data.py b/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/load_data.py
index 919f4e61..fcba17db 100644
--- a/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/load_data.py
+++ b/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/load_data.py
@@ -416,10 +416,92 @@ def load_conllized_ontonote_pkl_yf(path):
     return task_lst, vocabs
 
 
+@cache_results(_cache_fp='weiboNER old uni+bi', _refresh=False)
+def load_weibo_ner_old(path,unigram_embedding_path=None,bigram_embedding_path=None,index_token=True,
+                  normlize={'char':True,'bigram':True,'word':False}):
+    from fastNLP.io.data_loader import ConllLoader
+    from utils import get_bigrams
+
+    loader = ConllLoader(['chars','target'])
+    # from fastNLP.io.file_reader import _read_conll
+    # from fastNLP.core import Instance,DataSet
+    # def _load(path):
+    #     ds = DataSet()
+    #     for idx, data in _read_conll(path, indexes=loader.indexes, dropna=loader.dropna,
+    #                                 encoding='ISO-8859-1'):
+    #         ins = {h: data[i] for i, h in enumerate(loader.headers)}
+    #         ds.append(Instance(**ins))
+    #     return ds
+    # from fastNLP.io.utils import check_loader_paths
+    # paths = check_loader_paths(path)
+    # datasets = {name: _load(path) for name, path in paths.items()}
+    datasets = {}
+    train_path = os.path.join(path,'train.all.bmes')
+    dev_path = os.path.join(path,'dev.all.bmes')
+    test_path = os.path.join(path,'test.all.bmes')
+    datasets['train'] = loader.load(train_path).datasets['train']
+    datasets['dev'] = loader.load(dev_path).datasets['train']
+    datasets['test'] = loader.load(test_path).datasets['train']
+
+    for k,v in datasets.items():
+        print('{}:{}'.format(k,len(v)))
+
+    vocabs = {}
+    word_vocab = Vocabulary()
+    bigram_vocab = Vocabulary()
+    label_vocab = Vocabulary(padding=None,unknown=None)
+
+    for k,v in datasets.items():
+        # ignore the word segmentation tag
+        v.apply_field(lambda x: [w[0] for w in x],'chars','chars')
+        v.apply_field(get_bigrams,'chars','bigrams')
+
+
+    word_vocab.from_dataset(datasets['train'],field_name='chars',no_create_entry_dataset=[datasets['dev'],datasets['test']])
+    label_vocab.from_dataset(datasets['train'],field_name='target')
+    print('label_vocab:{}\n{}'.format(len(label_vocab),label_vocab.idx2word))
+
+
+    for k,v in datasets.items():
+        # v.set_pad_val('target',-100)
+        v.add_seq_len('chars',new_field_name='seq_len')
+
+
+    vocabs['char'] = word_vocab
+    vocabs['label'] = label_vocab
+
+
+    bigram_vocab.from_dataset(datasets['train'],field_name='bigrams',no_create_entry_dataset=[datasets['dev'],datasets['test']])
+    if index_token:
+        word_vocab.index_dataset(*list(datasets.values()), field_name='raw_words', new_field_name='words')
+        bigram_vocab.index_dataset(*list(datasets.values()),field_name='raw_bigrams',new_field_name='bigrams')
+        label_vocab.index_dataset(*list(datasets.values()), field_name='raw_target', new_field_name='target')
+
+    # for k,v in datasets.items():
+    #     v.set_input('chars','bigrams','seq_len','target')
+    #     v.set_target('target','seq_len')
+
+    vocabs['bigram'] = bigram_vocab
+
+    embeddings = {}
+
+    if unigram_embedding_path is not None:
+        unigram_embedding = StaticEmbedding(word_vocab, model_dir_or_name=unigram_embedding_path,
+                                            word_dropout=0.01,normalize=normlize['char'])
+        embeddings['char'] = unigram_embedding
+
+    if bigram_embedding_path is not None:
+        bigram_embedding = StaticEmbedding(bigram_vocab, model_dir_or_name=bigram_embedding_path,
+                                           word_dropout=0.01,normalize=normlize['bigram'])
+        embeddings['bigram'] = bigram_embedding
+
+    return datasets, vocabs, embeddings
+
+
 @cache_results(_cache_fp='weiboNER uni+bi', _refresh=False)
 def load_weibo_ner(path,unigram_embedding_path=None,bigram_embedding_path=None,index_token=True,
                    normlize={'char':True,'bigram':True,'word':False}):
-    from fastNLP.io.data_loader import ConllLoader
+    from fastNLP.io.loader import ConllLoader
     from utils import get_bigrams
 
     loader = ConllLoader(['chars','target'])
@@ -492,7 +574,7 @@ def load_weibo_ner(path,unigram_embedding_path=None,bigram_embedding_path=None,i
 @cache_results(_cache_fp='cache/ontonotes4ner',_refresh=False)
 def load_ontonotes4ner(path,char_embedding_path=None,bigram_embedding_path=None,index_token=True,
                        normalize={'char':True,'bigram':True,'word':False}):
-    from fastNLP.io.data_loader import ConllLoader
+    from fastNLP.io.loader import ConllLoader
     from utils import get_bigrams
 
     train_path = os.path.join(path,'train.char.bmes')
diff --git a/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/main.py b/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/main.py
index f5006bde..a2df5a91 100644
--- a/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/main.py
+++ b/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/main.py
@@ -1,6 +1,8 @@
 import torch.nn as nn
+# print(1111111111)
 # from pathes import *
-from load_data import load_ontonotes4ner,equip_chinese_ner_with_skip,load_yangjie_rich_pretrain_word_list,load_resume_ner,load_weibo_ner
+from load_data import load_ontonotes4ner,equip_chinese_ner_with_skip,load_yangjie_rich_pretrain_word_list,\
+    load_resume_ner,load_weibo_ner,load_weibo_ner_old
 from fastNLP.embeddings import StaticEmbedding
 from models import LatticeLSTM_SeqLabel,LSTM_SeqLabel,LatticeLSTM_SeqLabel_V1
 from fastNLP import CrossEntropyLoss,SpanFPreRecMetric,Trainer,AccuracyMetric,LossInForward
@@ -18,23 +20,24 @@ from fastNLP import LRScheduler
 from torch.optim.lr_scheduler import LambdaLR
 
 parser = argparse.ArgumentParser()
-parser.add_argument('--device',default='cuda:4')
+parser.add_argument('--device',default='cuda:1')
 parser.add_argument('--debug',default=False)
 
-parser.add_argument('--norm_embed',default=True)
-parser.add_argument('--batch',default=10)
+parser.add_argument('--norm_embed',default=False)
+parser.add_argument('--batch',default=1)
 parser.add_argument('--test_batch',default=1024)
 parser.add_argument('--optim',default='sgd',help='adam|sgd')
 parser.add_argument('--lr',default=0.045)
 parser.add_argument('--model',default='lattice',help='lattice|lstm')
 parser.add_argument('--skip_before_head',default=False)#in paper it's false
-parser.add_argument('--hidden',default=100)
+parser.add_argument('--hidden',default=113)
 parser.add_argument('--momentum',default=0)
 parser.add_argument('--bi',default=True)
-parser.add_argument('--dataset',default='ontonote',help='resume|ontonote|weibo|msra')
+parser.add_argument('--dataset',default='weibo',help='resume|ontonote|weibo|msra')
 parser.add_argument('--use_bigram',default=True)
 
 parser.add_argument('--embed_dropout',default=0.5)
+parser.add_argument('--gaz_dropout',default=-1)
 parser.add_argument('--output_dropout',default=0.5)
 parser.add_argument('--epoch',default=100)
 parser.add_argument('--seed',default=100)
@@ -49,8 +52,6 @@ if args.model == 'lattice':
 fit_msg = ' '.join(fit_msg_list)
 fitlog.commit(__file__,fit_msg=fit_msg)
 
-
-fitlog.add_hyper(args)
 device = torch.device(args.device)
 for k,v in args.__dict__.items():
     print(k,v)
@@ -78,6 +79,10 @@ elif args.dataset == 'weibo':
                                                     _refresh=refresh_data,index_token=False,
                                                     )
 
+elif args.dataset == 'weibo_old':
+    datasets,vocabs,embeddings = load_weibo_ner_old(weibo_ner_old_path,yangjie_rich_pretrain_unigram_path,yangjie_rich_pretrain_bigram_path,
+                                                    _refresh=refresh_data,index_token=False,
+                                                    )
 if args.dataset == 'ontonote':
     args.batch = 10
     args.lr = 0.045
@@ -85,9 +90,18 @@ elif args.dataset == 'resume':
     args.batch = 1
     args.lr = 0.015
 elif args.dataset == 'weibo':
+    args.batch = 10
+    args.gaz_dropout = 0.1
     args.embed_dropout = 0.1
     args.output_dropout = 0.1
+elif args.dataset == 'weibo_old':
+    args.embed_dropout = 0.1
+    args.output_dropout = 0.1
+
+if args.gaz_dropout < 0:
+    args.gaz_dropout = args.embed_dropout
 
+fitlog.add_hyper(args)
 w_list = load_yangjie_rich_pretrain_word_list(yangjie_rich_pretrain_word_path,
                                               _refresh=refresh_data)
 
@@ -145,7 +159,8 @@ if args.model =='lattice':
                         hidden_size=args.hidden,label_size=len(vocabs['label']),device=args.device,
                                  embed_dropout=args.embed_dropout,output_dropout=args.output_dropout,
                                  skip_batch_first=True,bidirectional=args.bi,debug=args.debug,
-                                 skip_before_head=args.skip_before_head,use_bigram=args.use_bigram
+                                 skip_before_head=args.skip_before_head,use_bigram=args.use_bigram,
+                                    gaz_dropout=args.gaz_dropout
                                  )
 elif args.model == 'lstm':
     model = LSTM_SeqLabel(embeddings['char'],embeddings['bigram'],embeddings['word'],
@@ -156,11 +171,12 @@ elif args.model == 'lstm':
 
 
 loss = LossInForward()
-
-f1_metric = SpanFPreRecMetric(vocabs['label'],pred='pred',target='target',seq_len='seq_len',encoding_type='bmeso')
-f1_metric_yj = SpanFPreRecMetric_YJ(vocabs['label'],pred='pred',target='target',seq_len='seq_len',encoding_type='bmesoyj')
+encoding_type = 'bmeso'
+if args.dataset == 'weibo':
+    encoding_type = 'bio'
+f1_metric = SpanFPreRecMetric(vocabs['label'],pred='pred',target='target',seq_len='seq_len',encoding_type=encoding_type)
 acc_metric = AccuracyMetric(pred='pred',target='target',seq_len='seq_len')
-metrics = [f1_metric,f1_metric_yj,acc_metric]
+metrics = [f1_metric,acc_metric]
 
 if args.optim == 'adam':
     optimizer = optim.Adam(model.parameters(),lr=args.lr)
@@ -174,7 +190,7 @@ callbacks = [
     FitlogCallback({'test':datasets['test'],'train':datasets['train']}),
     LRScheduler(lr_scheduler=LambdaLR(optimizer, lambda ep: 1 / (1 + 0.03)**ep))
 ]
-
+print('label_vocab:{}\n{}'.format(len(vocabs['label']),vocabs['label'].idx2word))
 trainer = Trainer(datasets['train'],model,
                   optimizer=optimizer,
                   loss=loss,
diff --git a/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/models.py b/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/models.py
index f0f912d9..0b419015 100644
--- a/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/models.py
+++ b/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/models.py
@@ -3,7 +3,7 @@ from fastNLP.embeddings import StaticEmbedding
 from fastNLP.modules import LSTM, ConditionalRandomField
 import torch
 from fastNLP import seq_len_to_mask
-from utils import better_init_rnn
+from utils import better_init_rnn,print_info
 
 
 class LatticeLSTM_SeqLabel(nn.Module):
@@ -120,7 +120,7 @@ class LatticeLSTM_SeqLabel(nn.Module):
 class LatticeLSTM_SeqLabel_V1(nn.Module):
     def __init__(self, char_embed, bigram_embed, word_embed, hidden_size, label_size, bias=True, bidirectional=False,
                  device=None, embed_dropout=0, output_dropout=0, skip_batch_first=True,debug=False,
-                 skip_before_head=False,use_bigram=True,vocabs=None):
+                 skip_before_head=False,use_bigram=True,vocabs=None,gaz_dropout=0):
         if device is None:
             self.device = torch.device('cpu')
         else:
@@ -173,6 +173,7 @@ class LatticeLSTM_SeqLabel_V1(nn.Module):
 
         self.loss_func = nn.CrossEntropyLoss()
         self.embed_dropout = nn.Dropout(embed_dropout)
+        self.gaz_dropout = nn.Dropout(gaz_dropout)
         self.output_dropout = nn.Dropout(output_dropout)
 
     def forward(self, chars, bigrams, seq_len, target,
@@ -257,15 +258,22 @@ class LSTM_SeqLabel(nn.Module):
 
         better_init_rnn(self.encoder.lstm)
 
+
         self.output = nn.Linear(self.hidden_size * (2 if self.bidirectional else 1), self.label_size)
 
-        self.debug = False
+        self.debug = True
         self.loss_func = nn.CrossEntropyLoss()
         self.embed_dropout = nn.Dropout(embed_dropout)
         self.output_dropout = nn.Dropout(output_dropout)
         self.crf = ConditionalRandomField(label_size, True)
 
     def forward(self, chars, bigrams, seq_len, target):
+        if self.debug:
+
+            print_info('chars:{}'.format(chars.size()))
+            print_info('bigrams:{}'.format(bigrams.size()))
+            print_info('seq_len:{}'.format(seq_len.size()))
+            print_info('target:{}'.format(target.size()))
         embed_char = self.char_embed(chars)
 
         if self.use_bigram:
@@ -291,6 +299,9 @@ class LSTM_SeqLabel(nn.Module):
 
         # batch_size, sent_len = pred.shape[0], pred.shape[1]
         # loss = self.loss_func(pred.reshape(batch_size * sent_len, -1), target.reshape(batch_size * sent_len))
+        if self.debug:
+            print('debug mode:finish')
+            exit(1208)
         if self.training:
             loss = self.crf(pred, target, mask)
             return {'loss': loss}
diff --git a/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/modules.py b/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/modules.py
index 84e21dc5..70182250 100644
--- a/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/modules.py
+++ b/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/modules.py
@@ -326,7 +326,7 @@ class MultiInputLSTMCell_V1(nn.Module):
 
             alpha = torch.sigmoid(alpha_wi + alpha_wh + alpha_bias_batch)
 
-            skip_mask = seq_len_to_mask(skip_count,max_len=skip_c.size()[1])
+            skip_mask = seq_len_to_mask(skip_count,max_len=skip_c.size()[1]).float()
 
             skip_mask = 1 - skip_mask
 
@@ -622,8 +622,8 @@ class LatticeLSTMLayer_sup_back_V1(nn.Module):
                 h_1,c_1 = self.char_cell(inp[:,i,:],c_1_skip,skip_count[:,i],(h_0,c_0))
 
 
-                h_1_mask = h_1.masked_fill(1-mask_for_seq_len[:,i].unsqueeze(-1),0)
-                c_1_mask = c_1.masked_fill(1 - mask_for_seq_len[:, i].unsqueeze(-1), 0)
+                h_1_mask = h_1.masked_fill(~ mask_for_seq_len[:,i].unsqueeze(-1),0)
+                c_1_mask = c_1.masked_fill(~ mask_for_seq_len[:, i].unsqueeze(-1), 0)
 
 
                 h_ = torch.cat([h_1_mask.unsqueeze(1),h_],dim=1)
diff --git a/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/pathes.py b/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/pathes.py
index af1efaf7..fe3f6162 100644
--- a/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/pathes.py
+++ b/reproduction/seqence_labelling/chinese_ner/LatticeLSTM/pathes.py
@@ -20,4 +20,5 @@ sst2_path = '/remote-home/xnli/data/corpus/text_classification/SST-2/'
 ontonote4ner_cn_path = '/remote-home/xnli/data/corpus/sequence_labelling/chinese_ner/OntoNote4NER'
 msra_ner_cn_path = '/remote-home/xnli/data/corpus/sequence_labelling/chinese_ner/MSRANER'
 resume_ner_path = '/remote-home/xnli/data/corpus/sequence_labelling/chinese_ner/ResumeNER'
-weibo_ner_path = '/remote-home/xnli/data/corpus/sequence_labelling/chinese_ner/WeiboNER'
\ No newline at end of file
+weibo_ner_path = '/remote-home/xnli/data/corpus/sequence_labelling/chinese_ner/WeiboNER'
+weibo_ner_old_path = '/remote-home/xnli/data/corpus/sequence_labelling/chinese_ner/WeiboNER_old'
\ No newline at end of file