|
-
- import unittest
-
- import sys
- sys.path.append('..')
-
- from data.dataloader import SummarizationLoader
-
- vocab_size = 100000
- vocab_path = "testdata/vocab"
- sent_max_len = 100
- doc_max_timesteps = 50
-
- class TestSummarizationLoader(unittest.TestCase):
-
- def test_case1(self):
- sum_loader = SummarizationLoader()
- paths = {"train":"testdata/train.jsonl", "valid":"testdata/val.jsonl", "test":"testdata/test.jsonl"}
- data = sum_loader.process(paths=paths, vocab_size=vocab_size, vocab_path=vocab_path, sent_max_len=sent_max_len, doc_max_timesteps=doc_max_timesteps)
- print(data.datasets)
-
- def test_case2(self):
- sum_loader = SummarizationLoader()
- paths = {"train": "testdata/train.jsonl", "valid": "testdata/val.jsonl", "test": "testdata/test.jsonl"}
- data = sum_loader.process(paths=paths, vocab_size=vocab_size, vocab_path=vocab_path, sent_max_len=sent_max_len, doc_max_timesteps=doc_max_timesteps, domain=True)
- print(data.datasets, data.vocabs)
-
- def test_case3(self):
- sum_loader = SummarizationLoader()
- paths = {"train": "testdata/train.jsonl", "valid": "testdata/val.jsonl", "test": "testdata/test.jsonl"}
- data = sum_loader.process(paths=paths, vocab_size=vocab_size, vocab_path=vocab_path, sent_max_len=sent_max_len, doc_max_timesteps=doc_max_timesteps, tag=True)
- print(data.datasets, data.vocabs)
-
-
-
-
|