[bug fix] fix test case for sstloader & update some train scriptstags/v0.4.10
@@ -60,8 +60,8 @@ class SSTLoader(DataSetLoader): | |||
def _get_one(self, data, subtree): | |||
tree = Tree.fromstring(data) | |||
if subtree: | |||
return [([x.text for x in self.tokenizer(' '.join(t.leaves()))], t.label()) for t in tree.subtrees() ] | |||
return [([x.text for x in self.tokenizer(' '.join(tree.leaves()))], tree.label())] | |||
return [(self.tokenizer(' '.join(t.leaves())), t.label()) for t in tree.subtrees() ] | |||
return [(self.tokenizer(' '.join(tree.leaves())), tree.label())] | |||
def process(self, | |||
paths, train_subtree=True, | |||
@@ -46,8 +46,8 @@ class StarTransEnc(nn.Module): | |||
super(StarTransEnc, self).__init__() | |||
self.embedding = get_embeddings(init_embed) | |||
emb_dim = self.embedding.embedding_dim | |||
#self.emb_fc = nn.Linear(emb_dim, hidden_size) | |||
self.emb_drop = nn.Dropout(emb_dropout) | |||
self.emb_fc = nn.Linear(emb_dim, hidden_size) | |||
# self.emb_drop = nn.Dropout(emb_dropout) | |||
self.encoder = StarTransformer(hidden_size=hidden_size, | |||
num_layers=num_layers, | |||
num_head=num_head, | |||
@@ -65,7 +65,7 @@ class StarTransEnc(nn.Module): | |||
[batch, hidden] 全局 relay 节点, 详见论文 | |||
""" | |||
x = self.embedding(x) | |||
#x = self.emb_fc(self.emb_drop(x)) | |||
x = self.emb_fc(x) | |||
nodes, relay = self.encoder(x, mask) | |||
return nodes, relay | |||
@@ -34,8 +34,8 @@ class StarTransformer(nn.Module): | |||
super(StarTransformer, self).__init__() | |||
self.iters = num_layers | |||
self.norm = nn.ModuleList([nn.LayerNorm(hidden_size) for _ in range(self.iters)]) | |||
self.emb_fc = nn.Conv2d(hidden_size, hidden_size, 1) | |||
self.norm = nn.ModuleList([nn.LayerNorm(hidden_size, eps=1e-6) for _ in range(self.iters)]) | |||
# self.emb_fc = nn.Conv2d(hidden_size, hidden_size, 1) | |||
self.emb_drop = nn.Dropout(dropout) | |||
self.ring_att = nn.ModuleList( | |||
[_MSA1(hidden_size, nhead=num_head, head_dim=head_dim, dropout=0.0) | |||
@@ -9,26 +9,3 @@ paper: [Star-Transformer](https://arxiv.org/abs/1902.09113) | |||
|Text Classification|SST|-|51.2| | |||
|Natural Language Inference|SNLI|-|83.76| | |||
## Usage | |||
``` python | |||
# for sequence labeling(ner, pos tagging, etc) | |||
from fastNLP.models.star_transformer import STSeqLabel | |||
model = STSeqLabel( | |||
vocab_size=10000, num_cls=50, | |||
emb_dim=300) | |||
# for sequence classification | |||
from fastNLP.models.star_transformer import STSeqCls | |||
model = STSeqCls( | |||
vocab_size=10000, num_cls=50, | |||
emb_dim=300) | |||
# for natural language inference | |||
from fastNLP.models.star_transformer import STNLICls | |||
model = STNLICls( | |||
vocab_size=10000, num_cls=50, | |||
emb_dim=300) | |||
``` |
@@ -10,7 +10,8 @@ from fastNLP.models.star_transformer import STSeqLabel, STSeqCls, STNLICls | |||
from fastNLP.core.const import Const as C | |||
import sys | |||
#sys.path.append('/remote-home/yfshao/workdir/dev_fastnlp/') | |||
pre_dir = '/home/ec2-user/fast_data/' | |||
import os | |||
pre_dir = os.path.join(os.environ['HOME'], 'workdir/datasets/') | |||
g_model_select = { | |||
'pos': STSeqLabel, | |||
@@ -19,7 +20,7 @@ g_model_select = { | |||
'nli': STNLICls, | |||
} | |||
g_emb_file_path = {'en': pre_dir + 'glove.840B.300d.txt', | |||
g_emb_file_path = {'en': pre_dir + 'word_vector/glove.840B.300d.txt', | |||
'zh': pre_dir + 'cc.zh.300.vec'} | |||
g_args = None | |||
@@ -55,7 +56,7 @@ def get_conll2012_ner(): | |||
def get_sst(): | |||
path = pre_dir + 'sst' | |||
path = pre_dir + 'SST' | |||
files = ['train.txt', 'dev.txt', 'test.txt'] | |||
return load_sst(path, files) | |||
@@ -171,10 +172,10 @@ def train(): | |||
sampler=FN.BucketSampler(100, g_args.bsz, C.INPUT_LEN), | |||
callbacks=[MyCallback()]) | |||
trainer.train() | |||
print(trainer.train()) | |||
tester = FN.Tester(data=test_data, model=model, metrics=metric, | |||
batch_size=128, device=device) | |||
tester.test() | |||
print(tester.test()) | |||
def test(): | |||
@@ -106,7 +106,7 @@ class IDCNN(nn.Module): | |||
if self.crf is not None and target is not None: | |||
loss = self.crf(y.transpose(1, 2), t, mask) | |||
else: | |||
t.masked_fill_(mask == 0, -100) | |||
# t.masked_fill_(mask == 0, -100) | |||
loss = F.cross_entropy(y, t, ignore_index=-100) | |||
return loss | |||
@@ -130,13 +130,3 @@ class IDCNN(nn.Module): | |||
C.OUTPUT: pred, | |||
} | |||
def predict(self, words, seq_len, chars=None): | |||
res = self.forward( | |||
words=words, | |||
seq_len=seq_len, | |||
chars=chars, | |||
target=None | |||
)[C.OUTPUT] | |||
return { | |||
C.OUTPUT: res | |||
} |
@@ -1,4 +1,5 @@ | |||
from reproduction.seqence_labelling.ner.data.OntoNoteLoader import OntoNoteNERDataLoader | |||
from reproduction.seqence_labelling.ner.data.Conll2003Loader import Conll2003DataLoader | |||
from fastNLP.core.callback import FitlogCallback, LRScheduler | |||
from fastNLP import GradientClipCallback | |||
from torch.optim.lr_scheduler import LambdaLR, CosineAnnealingLR | |||
@@ -7,10 +8,12 @@ from fastNLP import Const | |||
from fastNLP import RandomSampler, BucketSampler | |||
from fastNLP import SpanFPreRecMetric | |||
from fastNLP import Trainer | |||
from fastNLP.core.metrics import MetricBase | |||
from reproduction.seqence_labelling.ner.model.dilated_cnn import IDCNN | |||
from fastNLP.core.utils import Option | |||
from fastNLP.modules.encoder.embedding import CNNCharEmbedding, StaticEmbedding | |||
from fastNLP.core.utils import cache_results | |||
from fastNLP.core.vocabulary import VocabularyOption | |||
import sys | |||
import torch.cuda | |||
import os | |||
@@ -24,43 +27,53 @@ encoding_type = 'bioes' | |||
def get_path(path): | |||
return os.path.join(os.environ['HOME'], path) | |||
data_path = get_path('workdir/datasets/ontonotes-v4') | |||
ops = Option( | |||
batch_size=128, | |||
num_epochs=100, | |||
lr=3e-4, | |||
lr=5e-4, | |||
repeats=3, | |||
num_layers=3, | |||
num_filters=400, | |||
use_crf=True, | |||
use_crf=False, | |||
gradient_clip=5, | |||
) | |||
@cache_results('ontonotes-cache') | |||
@cache_results('ontonotes-min_freq0-case-cache') | |||
def load_data(): | |||
data = OntoNoteNERDataLoader(encoding_type=encoding_type).process(data_path, | |||
lower=True) | |||
print('loading data') | |||
# data = OntoNoteNERDataLoader(encoding_type=encoding_type).process( | |||
# data_path = get_path('workdir/datasets/ontonotes-v4') | |||
# lower=False, | |||
# word_vocab_opt=VocabularyOption(min_freq=0), | |||
# ) | |||
data = Conll2003DataLoader(task='ner', encoding_type=encoding_type).process( | |||
paths=get_path('workdir/datasets/conll03'), | |||
lower=False, word_vocab_opt=VocabularyOption(min_freq=0) | |||
) | |||
# char_embed = CNNCharEmbedding(vocab=data.vocabs['cap_words'], embed_size=30, char_emb_size=30, filter_nums=[30], | |||
# kernel_sizes=[3]) | |||
print('loading embedding') | |||
word_embed = StaticEmbedding(vocab=data.vocabs[Const.INPUT], | |||
model_dir_or_name='en-glove-840b-300', | |||
requires_grad=True) | |||
return data, [word_embed] | |||
data, embeds = load_data() | |||
print(data) | |||
print(data.datasets['train'][0]) | |||
print(list(data.vocabs.keys())) | |||
for ds in data.datasets.values(): | |||
ds.rename_field('cap_words', 'chars') | |||
ds.set_input('chars') | |||
# for ds in data.datasets.values(): | |||
# ds.rename_field('cap_words', 'chars') | |||
# ds.set_input('chars') | |||
word_embed = embeds[0] | |||
char_embed = CNNCharEmbedding(data.vocabs['cap_words']) | |||
word_embed.embedding.weight.data /= word_embed.embedding.weight.data.std() | |||
# char_embed = CNNCharEmbedding(data.vocabs['cap_words']) | |||
char_embed = None | |||
# for ds in data.datasets: | |||
# ds.rename_field('') | |||
@@ -75,13 +88,42 @@ model = IDCNN(init_embed=word_embed, | |||
kernel_size=3, | |||
use_crf=ops.use_crf, use_projection=True, | |||
block_loss=True, | |||
input_dropout=0.33, hidden_dropout=0.2, inner_dropout=0.2) | |||
input_dropout=0.5, hidden_dropout=0.0, inner_dropout=0.0) | |||
print(model) | |||
callbacks = [GradientClipCallback(clip_value=ops.gradient_clip, clip_type='norm'),] | |||
metrics = [] | |||
metrics.append( | |||
SpanFPreRecMetric( | |||
tag_vocab=data.vocabs[Const.TARGET], encoding_type=encoding_type, | |||
pred=Const.OUTPUT, target=Const.TARGET, seq_len=Const.INPUT_LEN, | |||
) | |||
) | |||
class LossMetric(MetricBase): | |||
def __init__(self, loss=None): | |||
super(LossMetric, self).__init__() | |||
self._init_param_map(loss=loss) | |||
self.total_loss = 0.0 | |||
self.steps = 0 | |||
def evaluate(self, loss): | |||
self.total_loss += float(loss) | |||
self.steps += 1 | |||
def get_metric(self, reset=True): | |||
result = {'loss': self.total_loss / (self.steps + 1e-12)} | |||
if reset: | |||
self.total_loss = 0.0 | |||
self.steps = 0 | |||
return result | |||
metrics.append( | |||
LossMetric(loss=Const.LOSS) | |||
) | |||
optimizer = Adam(model.parameters(), lr=ops.lr, weight_decay=0) | |||
optimizer = Adam(model.parameters(), lr=ops.lr, weight_decay=1e-4) | |||
# scheduler = LRScheduler(LambdaLR(optimizer, lr_lambda=lambda epoch: 1 / (1 + 0.05 * epoch))) | |||
# callbacks.append(LRScheduler(CosineAnnealingLR(optimizer, 15))) | |||
# optimizer = SWATS(model.parameters(), verbose=True) | |||
@@ -92,8 +134,7 @@ device = 'cuda:0' if torch.cuda.is_available() else 'cpu' | |||
trainer = Trainer(train_data=data.datasets['train'], model=model, optimizer=optimizer, | |||
sampler=BucketSampler(num_buckets=50, batch_size=ops.batch_size), | |||
device=device, dev_data=data.datasets['dev'], batch_size=ops.batch_size, | |||
metrics=SpanFPreRecMetric( | |||
tag_vocab=data.vocabs[Const.TARGET], encoding_type=encoding_type), | |||
metrics=metrics, | |||
check_code_level=-1, | |||
callbacks=callbacks, num_workers=2, n_epochs=ops.num_epochs) | |||
trainer.train() |
@@ -11,7 +11,7 @@ from reproduction.text_classification.model.dpcnn import DPCNN | |||
from data.yelpLoader import yelpLoader | |||
from fastNLP.core.sampler import BucketSampler | |||
import torch.nn as nn | |||
from fastNLP.core import LRScheduler | |||
from fastNLP.core import LRScheduler, Callback | |||
from fastNLP.core.const import Const as C | |||
from fastNLP.core.vocabulary import VocabularyOption | |||
from utils.util_init import set_rng_seeds | |||
@@ -25,14 +25,14 @@ os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" | |||
class Config(): | |||
seed = 12345 | |||
model_dir_or_name = "dpcnn-yelp-p" | |||
model_dir_or_name = "dpcnn-yelp-f" | |||
embedding_grad = True | |||
train_epoch = 30 | |||
batch_size = 100 | |||
task = "yelp_p" | |||
task = "yelp_f" | |||
#datadir = 'workdir/datasets/SST' | |||
datadir = 'workdir/datasets/yelp_polarity' | |||
# datadir = 'workdir/datasets/yelp_full' | |||
# datadir = 'workdir/datasets/yelp_polarity' | |||
datadir = 'workdir/datasets/yelp_full' | |||
#datafile = {"train": "train.txt", "dev": "dev.txt", "test": "test.txt"} | |||
datafile = {"train": "train.csv", "test": "test.csv"} | |||
lr = 1e-3 | |||
@@ -73,6 +73,8 @@ def load_data(): | |||
datainfo, embedding = load_data() | |||
embedding.embedding.weight.data /= embedding.embedding.weight.data.std() | |||
print(embedding.embedding.weight.mean(), embedding.embedding.weight.std()) | |||
# 2.或直接复用fastNLP的模型 | |||
@@ -92,11 +94,12 @@ optimizer = SGD([param for param in model.parameters() if param.requires_grad == | |||
lr=ops.lr, momentum=0.9, weight_decay=ops.weight_decay) | |||
callbacks = [] | |||
# callbacks.append(LRScheduler(CosineAnnealingLR(optimizer, 5))) | |||
callbacks.append( | |||
LRScheduler(LambdaLR(optimizer, lambda epoch: ops.lr if epoch < | |||
ops.train_epoch * 0.8 else ops.lr * 0.1)) | |||
) | |||
callbacks.append(LRScheduler(CosineAnnealingLR(optimizer, 5))) | |||
# callbacks.append( | |||
# LRScheduler(LambdaLR(optimizer, lambda epoch: ops.lr if epoch < | |||
# ops.train_epoch * 0.8 else ops.lr * 0.1)) | |||
# ) | |||
# callbacks.append( | |||
# FitlogCallback(data=datainfo.datasets, verbose=1) | |||