|
|
@@ -1,6 +1,7 @@ |
|
|
|
import sys |
|
|
|
sys.path.append('../../..') |
|
|
|
|
|
|
|
|
|
|
|
from fastNLP.modules.encoder.embedding import CNNCharEmbedding, StaticEmbedding, BertEmbedding, ElmoEmbedding, LSTMCharEmbedding |
|
|
|
from fastNLP.modules.encoder.embedding import CNNCharEmbedding, StaticEmbedding, BertEmbedding, ElmoEmbedding, StackEmbedding |
|
|
|
from fastNLP.core.vocabulary import VocabularyOption |
|
|
|
|
|
|
|
from reproduction.seqence_labelling.ner.model.lstm_cnn_crf import CNNBiLSTMCRF |
|
|
@@ -12,7 +13,10 @@ from torch.optim import SGD, Adam |
|
|
|
from fastNLP import GradientClipCallback |
|
|
|
from fastNLP.core.callback import FitlogCallback, LRScheduler |
|
|
|
from torch.optim.lr_scheduler import LambdaLR |
|
|
|
from reproduction.seqence_labelling.ner.model.swats import SWATS |
|
|
|
from fastNLP.core.optimizer import AdamW |
|
|
|
# from reproduction.seqence_labelling.ner.model.swats import SWATS |
|
|
|
from reproduction.seqence_labelling.chinese_ner.callbacks import SaveModelCallback |
|
|
|
from fastNLP import cache_results |
|
|
|
|
|
|
|
import fitlog |
|
|
|
fitlog.debug() |
|
|
@@ -20,17 +24,20 @@ fitlog.debug() |
|
|
|
from reproduction.seqence_labelling.ner.data.Conll2003Loader import Conll2003DataLoader |
|
|
|
|
|
|
|
encoding_type = 'bioes' |
|
|
|
|
|
|
|
data = Conll2003DataLoader(encoding_type=encoding_type).process('../../../../others/data/conll2003', |
|
|
|
word_vocab_opt=VocabularyOption(min_freq=2), |
|
|
|
lower=False) |
|
|
|
@cache_results('caches/upper_conll2003.pkl') |
|
|
|
def load_data(): |
|
|
|
data = Conll2003DataLoader(encoding_type=encoding_type).process('../../../../others/data/conll2003', |
|
|
|
word_vocab_opt=VocabularyOption(min_freq=1), |
|
|
|
lower=False) |
|
|
|
return data |
|
|
|
data = load_data() |
|
|
|
print(data) |
|
|
|
char_embed = CNNCharEmbedding(vocab=data.vocabs['cap_words'], embed_size=30, char_emb_size=30, filter_nums=[30], |
|
|
|
kernel_sizes=[3]) |
|
|
|
char_embed = CNNCharEmbedding(vocab=data.vocabs['words'], embed_size=30, char_emb_size=30, filter_nums=[30], |
|
|
|
kernel_sizes=[3], word_dropout=0.01, dropout=0.5) |
|
|
|
# char_embed = LSTMCharEmbedding(vocab=data.vocabs['cap_words'], embed_size=30 ,char_emb_size=30) |
|
|
|
word_embed = StaticEmbedding(vocab=data.vocabs[Const.INPUT], |
|
|
|
model_dir_or_name='/hdd/fudanNLP/pretrain_vectors/wiki_en_100_50_case_2.txt', |
|
|
|
requires_grad=True) |
|
|
|
word_embed = StaticEmbedding(vocab=data.vocabs['words'], |
|
|
|
model_dir_or_name='/hdd/fudanNLP/pretrain_vectors/glove.6B.100d.txt', |
|
|
|
requires_grad=True, lower=True, word_dropout=0.01, dropout=0.5) |
|
|
|
word_embed.embedding.weight.data = word_embed.embedding.weight.data/word_embed.embedding.weight.data.std() |
|
|
|
|
|
|
|
# import joblib |
|
|
@@ -46,25 +53,28 @@ word_embed.embedding.weight.data = word_embed.embedding.weight.data/word_embed.e |
|
|
|
# for name, dataset in data.datasets.items(): |
|
|
|
# dataset.apply_field(convert_to_ids, field_name='raw_words', new_field_name=Const.INPUT) |
|
|
|
|
|
|
|
# word_embed = ElmoEmbedding(vocab=data.vocabs['cap_words'], |
|
|
|
# model_dir_or_name='/hdd/fudanNLP/fastNLP/others/pretrained_models/elmo_en', |
|
|
|
# requires_grad=True) |
|
|
|
# elmo_embed = ElmoEmbedding(vocab=data.vocabs['cap_words'], |
|
|
|
# model_dir_or_name='.', |
|
|
|
# requires_grad=True, layers='mix') |
|
|
|
# char_embed = StackEmbedding([elmo_embed, char_embed]) |
|
|
|
|
|
|
|
model = CNNBiLSTMCRF(word_embed, char_embed, hidden_size=200, num_layers=1, tag_vocab=data.vocabs[Const.TARGET], |
|
|
|
encoding_type=encoding_type) |
|
|
|
|
|
|
|
callbacks = [ |
|
|
|
GradientClipCallback(clip_type='value', clip_value=5) |
|
|
|
, FitlogCallback({'test':data.datasets['test']}, verbose=1) |
|
|
|
GradientClipCallback(clip_type='value', clip_value=5), |
|
|
|
FitlogCallback({'test':data.datasets['test']}, verbose=1), |
|
|
|
# SaveModelCallback('save_models/', top=3, only_param=False, save_on_exception=True) |
|
|
|
] |
|
|
|
# optimizer = Adam(model.parameters(), lr=0.005) |
|
|
|
optimizer = SWATS(model.parameters(), verbose=True) |
|
|
|
# optimizer = SGD(model.parameters(), lr=0.008, momentum=0.9) |
|
|
|
# scheduler = LRScheduler(LambdaLR(optimizer, lr_lambda=lambda epoch: 1 / (1 + 0.05 * epoch))) |
|
|
|
# callbacks.append(scheduler) |
|
|
|
# optimizer = Adam(model.parameters(), lr=0.001) |
|
|
|
# optimizer = SWATS(model.parameters(), verbose=True) |
|
|
|
optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9) |
|
|
|
scheduler = LRScheduler(LambdaLR(optimizer, lr_lambda=lambda epoch: 1 / (1 + 0.05 * epoch))) |
|
|
|
callbacks.append(scheduler) |
|
|
|
|
|
|
|
|
|
|
|
trainer = Trainer(train_data=data.datasets['train'], model=model, optimizer=optimizer, sampler=BucketSampler(), |
|
|
|
device=1, dev_data=data.datasets['dev'], batch_size=10, |
|
|
|
trainer = Trainer(train_data=data.datasets['train'], model=model, optimizer=optimizer, sampler=BucketSampler(batch_size=20), |
|
|
|
device=1, dev_data=data.datasets['dev'], batch_size=20, |
|
|
|
metrics=SpanFPreRecMetric(tag_vocab=data.vocabs[Const.TARGET], encoding_type=encoding_type), |
|
|
|
callbacks=callbacks, num_workers=1, n_epochs=100) |
|
|
|
callbacks=callbacks, num_workers=2, n_epochs=100) |
|
|
|
trainer.train() |