@@ -232,12 +232,16 @@ class CrossEntropyLoss(LossBase): | |||||
""" | """ | ||||
def __init__(self, pred=None, target=None, padding_idx=-100): | def __init__(self, pred=None, target=None, padding_idx=-100): | ||||
# TODO 需要做一些检查,F.cross_entropy在计算时,如果pred是(16, 10 ,4), target的形状按道理应该是(16, 10), 但实际需要(16,4) | |||||
super(CrossEntropyLoss, self).__init__() | super(CrossEntropyLoss, self).__init__() | ||||
self._init_param_map(pred=pred, target=target) | self._init_param_map(pred=pred, target=target) | ||||
self.padding_idx = padding_idx | self.padding_idx = padding_idx | ||||
def get_loss(self, pred, target): | def get_loss(self, pred, target): | ||||
if pred.dim()>2: | |||||
if pred.size()[:2]==target.size(): | |||||
# F.cross_entropy在计算时,如果pred是(16, 10 ,4), 会在第二维上去log_softmax, 所以需要交换一下位置 | |||||
pred = pred.transpose(1, 2) | |||||
return F.cross_entropy(input=pred, target=target, | return F.cross_entropy(input=pred, target=target, | ||||
ignore_index=self.padding_idx) | ignore_index=self.padding_idx) | ||||
@@ -451,9 +451,11 @@ class Trainer(object): | |||||
self.data_iterator = train_data | self.data_iterator = train_data | ||||
else: | else: | ||||
raise TypeError("train_data type {} not support".format(type(train_data))) | raise TypeError("train_data type {} not support".format(type(train_data))) | ||||
self.model = _move_model_to_device(model, device=device) | |||||
if check_code_level > -1 and isinstance(self.data_iterator, DataSetIter): | if check_code_level > -1 and isinstance(self.data_iterator, DataSetIter): | ||||
_check_code(dataset=train_data, model=model, losser=losser, metrics=metrics, dev_data=dev_data, | |||||
_check_code(dataset=train_data, model=self.model, losser=losser, metrics=metrics, dev_data=dev_data, | |||||
metric_key=metric_key, check_level=check_code_level, | metric_key=metric_key, check_level=check_code_level, | ||||
batch_size=min(batch_size, DEFAULT_CHECK_BATCH_SIZE)) | batch_size=min(batch_size, DEFAULT_CHECK_BATCH_SIZE)) | ||||
# _check_code 是 fastNLP 帮助你检查代码是否正确的方法 。如果你在错误栈中看到这行注释,请认真检查你的代码 | # _check_code 是 fastNLP 帮助你检查代码是否正确的方法 。如果你在错误栈中看到这行注释,请认真检查你的代码 | ||||
@@ -474,9 +476,7 @@ class Trainer(object): | |||||
self.best_dev_perf = None | self.best_dev_perf = None | ||||
self.n_steps = (len(self.train_data) // self.batch_size + int( | self.n_steps = (len(self.train_data) // self.batch_size + int( | ||||
len(self.train_data) % self.batch_size != 0)) * self.n_epochs | len(self.train_data) % self.batch_size != 0)) * self.n_epochs | ||||
self.model = _move_model_to_device(self.model, device=device) | |||||
if isinstance(optimizer, torch.optim.Optimizer): | if isinstance(optimizer, torch.optim.Optimizer): | ||||
self.optimizer = optimizer | self.optimizer = optimizer | ||||
elif isinstance(optimizer, Optimizer): | elif isinstance(optimizer, Optimizer): | ||||
@@ -204,7 +204,7 @@ class StaticEmbedding(TokenEmbedding): | |||||
model_url = PRETRAIN_URL + model_name | model_url = PRETRAIN_URL + model_name | ||||
model_path = cached_path(model_url) | model_path = cached_path(model_url) | ||||
# 检查是否存在 | # 检查是否存在 | ||||
elif os.path.isfile(model_dir_or_name): | |||||
elif os.path.isfile(os.path.expanduser(os.path.abspath(model_dir_or_name))): | |||||
model_path = model_dir_or_name | model_path = model_dir_or_name | ||||
else: | else: | ||||
raise ValueError(f"Cannot recognize {model_dir_or_name}.") | raise ValueError(f"Cannot recognize {model_dir_or_name}.") | ||||
@@ -455,7 +455,7 @@ class ElmoEmbedding(ContextualEmbedding): | |||||
model_url = PRETRAIN_URL + model_name | model_url = PRETRAIN_URL + model_name | ||||
model_dir = cached_path(model_url) | model_dir = cached_path(model_url) | ||||
# 检查是否存在 | # 检查是否存在 | ||||
elif os.path.isdir(model_dir_or_name): | |||||
elif os.path.isdir(os.path.expanduser(os.path.abspath(model_dir_or_name))): | |||||
model_dir = model_dir_or_name | model_dir = model_dir_or_name | ||||
else: | else: | ||||
raise ValueError(f"Cannot recognize {model_dir_or_name}.") | raise ValueError(f"Cannot recognize {model_dir_or_name}.") | ||||
@@ -553,7 +553,7 @@ class BertEmbedding(ContextualEmbedding): | |||||
model_url = PRETRAIN_URL + model_name | model_url = PRETRAIN_URL + model_name | ||||
model_dir = cached_path(model_url) | model_dir = cached_path(model_url) | ||||
# 检查是否存在 | # 检查是否存在 | ||||
elif os.path.isdir(model_dir_or_name): | |||||
elif os.path.isdir(os.path.expanduser(os.path.abspath(model_dir_or_name))): | |||||
model_dir = model_dir_or_name | model_dir = model_dir_or_name | ||||
else: | else: | ||||
raise ValueError(f"Cannot recognize {model_dir_or_name}.") | raise ValueError(f"Cannot recognize {model_dir_or_name}.") | ||||
@@ -57,12 +57,8 @@ callbacks = [clipper] | |||||
# if pretrain: | # if pretrain: | ||||
# fixer = FixEmbedding([model.char_embedding, model.bigram_embedding], fix_until=fix_until) | # fixer = FixEmbedding([model.char_embedding, model.bigram_embedding], fix_until=fix_until) | ||||
# callbacks.append(fixer) | # callbacks.append(fixer) | ||||
trainer = Trainer(data.datasets['train'], model, optimizer=optimizer, loss=None, | |||||
batch_size=32, sampler=sampler, update_every=5, | |||||
n_epochs=3, print_every=5, | |||||
dev_data=data.datasets['dev'], metrics=RelayMetric(), metric_key='f', | |||||
validate_every=-1, save_path=None, | |||||
prefetch=True, use_tqdm=True, device=device, | |||||
callbacks=callbacks, | |||||
trainer = Trainer(data.datasets['train'], model, optimizer=optimizer, loss=None, batch_size=32, sampler=sampler, | |||||
update_every=5, n_epochs=3, print_every=5, dev_data=data.datasets['dev'], metrics=RelayMetric(), | |||||
metric_key='f', validate_every=-1, save_path=None, use_tqdm=True, device=device, callbacks=callbacks, | |||||
check_code_level=0) | check_code_level=0) | ||||
trainer.train() | trainer.train() |
@@ -12,11 +12,11 @@ class CNNBiLSTMCRF(nn.Module): | |||||
def __init__(self, embed, char_embed, hidden_size, num_layers, tag_vocab, dropout=0.5, encoding_type='bioes'): | def __init__(self, embed, char_embed, hidden_size, num_layers, tag_vocab, dropout=0.5, encoding_type='bioes'): | ||||
super().__init__() | super().__init__() | ||||
self.embedding = Embedding(embed, dropout=0.5) | |||||
self.char_embedding = Embedding(char_embed, dropout=0.5) | |||||
self.embedding = Embedding(embed, dropout=0.5, dropout_word=0) | |||||
self.char_embedding = Embedding(char_embed, dropout=0.5, dropout_word=0.01) | |||||
self.lstm = LSTM(input_size=self.embedding.embedding_dim+self.char_embedding.embedding_dim, | self.lstm = LSTM(input_size=self.embedding.embedding_dim+self.char_embedding.embedding_dim, | ||||
hidden_size=hidden_size//2, num_layers=num_layers, | |||||
bidirectional=True, batch_first=True, dropout=dropout) | |||||
hidden_size=hidden_size//2, num_layers=num_layers, | |||||
bidirectional=True, batch_first=True) | |||||
self.fc = nn.Linear(hidden_size, len(tag_vocab)) | self.fc = nn.Linear(hidden_size, len(tag_vocab)) | ||||
transitions = allowed_transitions(tag_vocab.idx2word, encoding_type=encoding_type, include_start_end=True) | transitions = allowed_transitions(tag_vocab.idx2word, encoding_type=encoding_type, include_start_end=True) | ||||
@@ -25,9 +25,9 @@ class CNNBiLSTMCRF(nn.Module): | |||||
self.dropout = nn.Dropout(dropout, inplace=True) | self.dropout = nn.Dropout(dropout, inplace=True) | ||||
for name, param in self.named_parameters(): | for name, param in self.named_parameters(): | ||||
if 'ward_fc' in name: | |||||
if 'fc' in name: | |||||
if param.data.dim()>1: | if param.data.dim()>1: | ||||
nn.init.xavier_normal_(param) | |||||
nn.init.xavier_uniform_(param) | |||||
else: | else: | ||||
nn.init.constant_(param, 0) | nn.init.constant_(param, 0) | ||||
if 'crf' in name: | if 'crf' in name: | ||||
@@ -1,6 +1,6 @@ | |||||
from fastNLP.modules.encoder.embedding import CNNCharEmbedding, StaticEmbedding, BertEmbedding | |||||
from fastNLP.modules.encoder.embedding import CNNCharEmbedding, StaticEmbedding, BertEmbedding, ElmoEmbedding, LSTMCharEmbedding | |||||
from fastNLP.core.vocabulary import VocabularyOption | from fastNLP.core.vocabulary import VocabularyOption | ||||
from reproduction.seqence_labelling.ner.model.lstm_cnn_crf import CNNBiLSTMCRF | from reproduction.seqence_labelling.ner.model.lstm_cnn_crf import CNNBiLSTMCRF | ||||
@@ -12,6 +12,8 @@ from torch.optim import SGD, Adam | |||||
from fastNLP import GradientClipCallback | from fastNLP import GradientClipCallback | ||||
from fastNLP.core.callback import FitlogCallback, LRScheduler | from fastNLP.core.callback import FitlogCallback, LRScheduler | ||||
from torch.optim.lr_scheduler import LambdaLR | from torch.optim.lr_scheduler import LambdaLR | ||||
from reproduction.seqence_labelling.ner.model.swats import SWATS | |||||
import fitlog | import fitlog | ||||
fitlog.debug() | fitlog.debug() | ||||
@@ -19,28 +21,50 @@ from reproduction.seqence_labelling.ner.data.Conll2003Loader import Conll2003Dat | |||||
encoding_type = 'bioes' | encoding_type = 'bioes' | ||||
data = Conll2003DataLoader(encoding_type=encoding_type).process('/hdd/fudanNLP/fastNLP/others/data/conll2003', | |||||
word_vocab_opt=VocabularyOption(min_freq=2)) | |||||
data = Conll2003DataLoader(encoding_type=encoding_type).process('../../../../others/data/conll2003', | |||||
word_vocab_opt=VocabularyOption(min_freq=2), | |||||
lower=False) | |||||
print(data) | print(data) | ||||
char_embed = CNNCharEmbedding(vocab=data.vocabs['cap_words'], embed_size=30, char_emb_size=30, filter_nums=[30], | char_embed = CNNCharEmbedding(vocab=data.vocabs['cap_words'], embed_size=30, char_emb_size=30, filter_nums=[30], | ||||
kernel_sizes=[3]) | kernel_sizes=[3]) | ||||
# char_embed = LSTMCharEmbedding(vocab=data.vocabs['cap_words'], embed_size=30 ,char_emb_size=30) | |||||
word_embed = StaticEmbedding(vocab=data.vocabs[Const.INPUT], | word_embed = StaticEmbedding(vocab=data.vocabs[Const.INPUT], | ||||
model_dir_or_name='/hdd/fudanNLP/pretrain_vectors/glove.6B.100d.txt', | |||||
model_dir_or_name='/hdd/fudanNLP/pretrain_vectors/wiki_en_100_50_case_2.txt', | |||||
requires_grad=True) | requires_grad=True) | ||||
word_embed.embedding.weight.data = word_embed.embedding.weight.data/word_embed.embedding.weight.data.std() | word_embed.embedding.weight.data = word_embed.embedding.weight.data/word_embed.embedding.weight.data.std() | ||||
# import joblib | |||||
# raw_data = joblib.load('/hdd/fudanNLP/fastNLP/others/NER-with-LS/data/conll_with_data.joblib') | |||||
# def convert_to_ids(raw_words): | |||||
# ids = [] | |||||
# for word in raw_words: | |||||
# id = raw_data['word_to_id'][word] | |||||
# id = raw_data['id_to_emb_map'][id] | |||||
# ids.append(id) | |||||
# return ids | |||||
# word_embed = raw_data['emb_matrix'] | |||||
# for name, dataset in data.datasets.items(): | |||||
# dataset.apply_field(convert_to_ids, field_name='raw_words', new_field_name=Const.INPUT) | |||||
# word_embed = ElmoEmbedding(vocab=data.vocabs['cap_words'], | |||||
# model_dir_or_name='/hdd/fudanNLP/fastNLP/others/pretrained_models/elmo_en', | |||||
# requires_grad=True) | |||||
model = CNNBiLSTMCRF(word_embed, char_embed, hidden_size=200, num_layers=1, tag_vocab=data.vocabs[Const.TARGET], | model = CNNBiLSTMCRF(word_embed, char_embed, hidden_size=200, num_layers=1, tag_vocab=data.vocabs[Const.TARGET], | ||||
encoding_type=encoding_type) | encoding_type=encoding_type) | ||||
optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9) | |||||
scheduler = LRScheduler(LambdaLR(optimizer, lr_lambda=lambda epoch: 1 / (1 + 0.05 * epoch))) | |||||
callbacks = [GradientClipCallback(clip_type='value', clip_value=5), FitlogCallback({'test':data.datasets['test'], | |||||
'train':data.datasets['train']}, verbose=1), | |||||
scheduler] | |||||
callbacks = [ | |||||
GradientClipCallback(clip_type='value', clip_value=5) | |||||
, FitlogCallback({'test':data.datasets['test']}, verbose=1) | |||||
] | |||||
# optimizer = Adam(model.parameters(), lr=0.005) | |||||
optimizer = SWATS(model.parameters(), verbose=True) | |||||
# optimizer = SGD(model.parameters(), lr=0.008, momentum=0.9) | |||||
# scheduler = LRScheduler(LambdaLR(optimizer, lr_lambda=lambda epoch: 1 / (1 + 0.05 * epoch))) | |||||
# callbacks.append(scheduler) | |||||
trainer = Trainer(train_data=data.datasets['train'], model=model, optimizer=optimizer, sampler=BucketSampler(), | trainer = Trainer(train_data=data.datasets['train'], model=model, optimizer=optimizer, sampler=BucketSampler(), | ||||
device=0, dev_data=data.datasets['dev'], batch_size=10, | |||||
device=1, dev_data=data.datasets['dev'], batch_size=10, | |||||
metrics=SpanFPreRecMetric(tag_vocab=data.vocabs[Const.TARGET], encoding_type=encoding_type), | metrics=SpanFPreRecMetric(tag_vocab=data.vocabs[Const.TARGET], encoding_type=encoding_type), | ||||
callbacks=callbacks, num_workers=1, n_epochs=100) | callbacks=callbacks, num_workers=1, n_epochs=100) | ||||
trainer.train() | trainer.train() |
@@ -1,4 +1,6 @@ | |||||
import sys | |||||
sys.path.append('../../..') | |||||
from fastNLP.modules.encoder.embedding import CNNCharEmbedding, StaticEmbedding | from fastNLP.modules.encoder.embedding import CNNCharEmbedding, StaticEmbedding | ||||
@@ -8,8 +10,11 @@ from fastNLP import SpanFPreRecMetric | |||||
from fastNLP import BucketSampler | from fastNLP import BucketSampler | ||||
from fastNLP import Const | from fastNLP import Const | ||||
from torch.optim import SGD, Adam | from torch.optim import SGD, Adam | ||||
from torch.optim.lr_scheduler import LambdaLR | |||||
from fastNLP import GradientClipCallback | from fastNLP import GradientClipCallback | ||||
from fastNLP.core.callback import FitlogCallback | |||||
from fastNLP.core.callback import FitlogCallback, LRScheduler | |||||
from reproduction.seqence_labelling.ner.model.swats import SWATS | |||||
import fitlog | import fitlog | ||||
fitlog.debug() | fitlog.debug() | ||||
@@ -17,23 +22,44 @@ from reproduction.seqence_labelling.ner.data.OntoNoteLoader import OntoNoteNERDa | |||||
encoding_type = 'bioes' | encoding_type = 'bioes' | ||||
data = OntoNoteNERDataLoader(encoding_type=encoding_type).process('/hdd/fudanNLP/fastNLP/others/data/v4/english') | |||||
data = OntoNoteNERDataLoader(encoding_type=encoding_type).process('/hdd/fudanNLP/fastNLP/others/data/v4/english', | |||||
lower=True) | |||||
import joblib | |||||
raw_data = joblib.load('/hdd/fudanNLP/fastNLP/others/NER-with-LS/data/ontonotes_with_data.joblib') | |||||
def convert_to_ids(raw_words): | |||||
ids = [] | |||||
for word in raw_words: | |||||
id = raw_data['word_to_id'][word] | |||||
id = raw_data['id_to_emb_map'][id] | |||||
ids.append(id) | |||||
return ids | |||||
word_embed = raw_data['emb_matrix'] | |||||
for name, dataset in data.datasets.items(): | |||||
dataset.apply_field(convert_to_ids, field_name='raw_words', new_field_name=Const.INPUT) | |||||
print(data) | print(data) | ||||
char_embed = CNNCharEmbedding(vocab=data.vocabs['cap_words'], embed_size=30, char_emb_size=30, filter_nums=[30], | char_embed = CNNCharEmbedding(vocab=data.vocabs['cap_words'], embed_size=30, char_emb_size=30, filter_nums=[30], | ||||
kernel_sizes=[3]) | kernel_sizes=[3]) | ||||
word_embed = StaticEmbedding(vocab=data.vocabs[Const.INPUT], | |||||
model_dir_or_name='/hdd/fudanNLP/pretrain_vectors/glove.6B.100d.txt', | |||||
requires_grad=True) | |||||
# word_embed = StaticEmbedding(vocab=data.vocabs[Const.INPUT], | |||||
# model_dir_or_name='/hdd/fudanNLP/pretrain_vectors/glove.6B.100d.txt', | |||||
# requires_grad=True) | |||||
model = CNNBiLSTMCRF(word_embed, char_embed, hidden_size=400, num_layers=2, tag_vocab=data.vocabs[Const.TARGET], | |||||
model = CNNBiLSTMCRF(word_embed, char_embed, hidden_size=1200, num_layers=1, tag_vocab=data.vocabs[Const.TARGET], | |||||
encoding_type=encoding_type) | encoding_type=encoding_type) | ||||
optimizer = SGD(model.parameters(), lr=0.015, momentum=0.9) | |||||
callbacks = [GradientClipCallback(clip_value=5, clip_type='value'), | |||||
FitlogCallback(data.datasets['test'], verbose=1)] | |||||
optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9) | |||||
scheduler = LRScheduler(LambdaLR(optimizer, lr_lambda=lambda epoch: 1 / (1 + 0.05 * epoch))) | |||||
callbacks.append(scheduler) | |||||
# optimizer = SWATS(model.parameters(), verbose=True) | |||||
# optimizer = Adam(model.parameters(), lr=0.005) | |||||
callbacks = [GradientClipCallback(), FitlogCallback(data.datasets['test'], verbose=1)] | |||||
trainer = Trainer(train_data=data.datasets['train'], model=model, optimizer=optimizer, sampler=BucketSampler(), | |||||
device=1, dev_data=data.datasets['dev'], batch_size=32, | |||||
trainer = Trainer(train_data=data.datasets['train'], model=model, optimizer=optimizer, sampler=BucketSampler(num_buckets=100), | |||||
device=0, dev_data=data.datasets['dev'], batch_size=10, | |||||
metrics=SpanFPreRecMetric(tag_vocab=data.vocabs[Const.TARGET], encoding_type=encoding_type), | metrics=SpanFPreRecMetric(tag_vocab=data.vocabs[Const.TARGET], encoding_type=encoding_type), | ||||
callbacks=callbacks, num_workers=1, n_epochs=100) | callbacks=callbacks, num_workers=1, n_epochs=100) | ||||
trainer.train() | trainer.train() |