|
|
@@ -1,24 +1,36 @@ |
|
|
|
|
|
|
|
|
|
|
|
import unittest
|
|
|
|
from ..data.dataloader import SummarizationLoader
|
|
|
|
|
|
|
|
import sys
|
|
|
|
sys.path.append('..')
|
|
|
|
|
|
|
|
from data.dataloader import SummarizationLoader
|
|
|
|
|
|
|
|
vocab_size = 100000
|
|
|
|
vocab_path = "testdata/vocab"
|
|
|
|
sent_max_len = 100
|
|
|
|
doc_max_timesteps = 50
|
|
|
|
|
|
|
|
class TestSummarizationLoader(unittest.TestCase):
|
|
|
|
|
|
|
|
def test_case1(self):
|
|
|
|
sum_loader = SummarizationLoader()
|
|
|
|
paths = {"train":"testdata/train.jsonl", "valid":"testdata/val.jsonl", "test":"testdata/test.jsonl"}
|
|
|
|
data = sum_loader.process(paths=paths)
|
|
|
|
data = sum_loader.process(paths=paths, vocab_size=vocab_size, vocab_path=vocab_path, sent_max_len=sent_max_len, doc_max_timesteps=doc_max_timesteps)
|
|
|
|
print(data.datasets)
|
|
|
|
|
|
|
|
def test_case2(self):
|
|
|
|
sum_loader = SummarizationLoader()
|
|
|
|
paths = {"train": "testdata/train.jsonl", "valid": "testdata/val.jsonl", "test": "testdata/test.jsonl"}
|
|
|
|
data = sum_loader.process(paths=paths, domain=True)
|
|
|
|
data = sum_loader.process(paths=paths, vocab_size=vocab_size, vocab_path=vocab_path, sent_max_len=sent_max_len, doc_max_timesteps=doc_max_timesteps, domain=True)
|
|
|
|
print(data.datasets, data.vocabs)
|
|
|
|
|
|
|
|
def test_case3(self):
|
|
|
|
sum_loader = SummarizationLoader()
|
|
|
|
paths = {"train": "testdata/train.jsonl", "valid": "testdata/val.jsonl", "test": "testdata/test.jsonl"}
|
|
|
|
data = sum_loader.process(paths=paths, tag=True)
|
|
|
|
print(data.datasets, data.vocabs) |
|
|
|
data = sum_loader.process(paths=paths, vocab_size=vocab_size, vocab_path=vocab_path, sent_max_len=sent_max_len, doc_max_timesteps=doc_max_timesteps, tag=True)
|
|
|
|
print(data.datasets, data.vocabs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|