Browse Source

add dpcnn

tags/v0.4.10
yunfan 6 years ago
parent
commit
32917dac7f
2 changed files with 171 additions and 1 deletions
  1. +91
    -1
      reproduction/text_classification/model/dpcnn.py
  2. +80
    -0
      reproduction/text_classification/train_dpcnn.py

+ 91
- 1
reproduction/text_classification/model/dpcnn.py View File

@@ -1 +1,91 @@
# TODO
import torch
import torch.nn as nn
from fastNLP.modules.utils import get_embeddings
from fastNLP.core import Const as C

class DPCNN(nn.Module):
def __init__(self, init_embed, num_cls, n_filters=256, kernel_size=3, n_layers=7, embed_dropout=0.1, dropout=0.1):
super().__init__()
self.region_embed = RegionEmbedding(init_embed, out_dim=n_filters, kernel_sizes=[3, 5, 9])
embed_dim = self.region_embed.embedding_dim
self.conv_list = nn.ModuleList()
for i in range(n_layers):
self.conv_list.append(nn.Sequential(
nn.ReLU(),
nn.Conv1d(n_filters, n_filters, kernel_size, padding=kernel_size//2),
nn.Conv1d(n_filters, n_filters, kernel_size, padding=kernel_size//2),
))
self.pool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
self.embed_drop = nn.Dropout(embed_dropout)
self.classfier = nn.Sequential(
nn.Dropout(dropout),
nn.Linear(n_filters, num_cls),
)
self.reset_parameters()

def reset_parameters(self):
for m in self.modules():
if isinstance(m, (nn.Conv1d, nn.Conv2d, nn.Linear)):
nn.init.normal_(m.weight, mean=0, std=0.01)
if m.bias is not None:
nn.init.normal_(m.bias, mean=0, std=0.01)

def forward(self, words, seq_len=None):
words = words.long()
# get region embeddings
x = self.region_embed(words)
x = self.embed_drop(x)

# not pooling on first conv
x = self.conv_list[0](x) + x
for conv in self.conv_list[1:]:
x = self.pool(x)
x = conv(x) + x

# B, C, L => B, C
x, _ = torch.max(x, dim=2)
x = self.classfier(x)
return {C.OUTPUT: x}

def predict(self, words, seq_len=None):
x = self.forward(words, seq_len)[C.OUTPUT]
return {C.OUTPUT: torch.argmax(x, 1)}


class RegionEmbedding(nn.Module):
def __init__(self, init_embed, out_dim=300, kernel_sizes=None):
super().__init__()
if kernel_sizes is None:
kernel_sizes = [5, 9]
assert isinstance(kernel_sizes, list), 'kernel_sizes should be List(int)'
self.embed = get_embeddings(init_embed)
try:
embed_dim = self.embed.embedding_dim
except Exception:
embed_dim = self.embed.embed_size
self.region_embeds = nn.ModuleList()
for ksz in kernel_sizes:
self.region_embeds.append(nn.Sequential(
nn.Conv1d(embed_dim, embed_dim, ksz, padding=ksz // 2),
))
self.linears = nn.ModuleList([nn.Conv1d(embed_dim, out_dim, 1)
for _ in range(len(kernel_sizes) + 1)])
self.embedding_dim = embed_dim

def forward(self, x):
x = self.embed(x)
x = x.transpose(1, 2)
# B, C, L
out = self.linears[0](x)
for conv, fc in zip(self.region_embeds, self.linears[1:]):
conv_i = conv(x)
out = out + fc(conv_i)
# B, C, L
return out


if __name__ == '__main__':
x = torch.randint(0, 10000, size=(5, 15), dtype=torch.long)
model = DPCNN((10000, 300), 20)
y = model(x)
print(y.size(), y.mean(1), y.std(1))

+ 80
- 0
reproduction/text_classification/train_dpcnn.py View File

@@ -0,0 +1,80 @@
# 首先需要加入以下的路径到环境变量,因为当前只对内部测试开放,所以需要手动申明一下路径
import os
os.environ['FASTNLP_BASE_URL'] = 'http://10.141.222.118:8888/file/download/'
os.environ['FASTNLP_CACHE_DIR'] = '/remote-home/hyan01/fastnlp_caches'
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"

from fastNLP.core.const import Const as C
from fastNLP.core import LRScheduler
import torch.nn as nn
from fastNLP.io.dataset_loader import SSTLoader
from reproduction.text_classification.model.dpcnn import DPCNN
from fastNLP.modules.encoder.embedding import StaticEmbedding, CNNCharEmbedding, StackEmbedding
from fastNLP import CrossEntropyLoss, AccuracyMetric
from fastNLP.core.trainer import Trainer
from torch.optim import SGD
import torch.cuda
from torch.optim.lr_scheduler import CosineAnnealingLR

##hyper
class Config():
model_dir_or_name="en-base-uncased"
embedding_grad= False,
train_epoch= 30
batch_size = 100
num_classes=5
task= "SST"
datadir = '/remote-home/yfshao/workdir/datasets/SST'
datafile = {"train": "train.txt", "dev": "dev.txt", "test": "test.txt"}
lr=1e-3
def __init__(self):
self.datapath = {k:os.path.join(self.datadir, v)
for k, v in self.datafile.items()}

ops=Config()


##1.task相关信息:利用dataloader载入dataInfo
datainfo=SSTLoader(fine_grained=True).process(paths=ops.datapath, train_ds='train')

print(len(datainfo.datasets['train']))
print(len(datainfo.datasets['dev']))


## 2.或直接复用fastNLP的模型
vocab = datainfo.vocabs['words']

# embedding = StackEmbedding([StaticEmbedding(vocab), CNNCharEmbedding(vocab, 100)])
embedding = StaticEmbedding(vocab)
print(len(vocab))
print(len(datainfo.vocabs['target']))
model = DPCNN(init_embed=embedding, num_cls=ops.num_classes)

## 3. 声明loss,metric,optimizer
loss=CrossEntropyLoss(pred=C.OUTPUT, target=C.TARGET)
metric=AccuracyMetric(pred=C.OUTPUT, target=C.TARGET)
optimizer= SGD([param for param in model.parameters() if param.requires_grad==True],
lr=ops.lr, momentum=0.9, weight_decay=0)

callbacks = []
callbacks.append(LRScheduler(CosineAnnealingLR(optimizer, 5)))

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print(device)

for ds in datainfo.datasets.values():
ds.apply_field(len, C.INPUT, C.INPUT_LEN)
ds.set_input(C.INPUT, C.INPUT_LEN)
ds.set_target(C.TARGET)

## 4.定义train方法
def train(model,datainfo,loss,metrics,optimizer,num_epochs=ops.train_epoch):
trainer = Trainer(datainfo.datasets['train'], model, optimizer=optimizer, loss=loss,
metrics=[metrics], dev_data=datainfo.datasets['dev'], device=device,
check_code_level=-1, batch_size=ops.batch_size, callbacks=callbacks,
n_epochs=num_epochs)
print(trainer.train())


if __name__=="__main__":
train(model,datainfo,loss,metric,optimizer)

Loading…
Cancel
Save