|
|
@@ -36,16 +36,37 @@ class TestCNClassificationPipe(unittest.TestCase): |
|
|
|
class TestRunClassificationPipe(unittest.TestCase): |
|
|
|
def test_process_from_file(self): |
|
|
|
data_set_dict = { |
|
|
|
'yelp.p': ('test/data_for_tests/io/yelp_review_polarity', YelpPolarityPipe, (6, 6, 6), (1176, 2), False), |
|
|
|
'yelp.f': ('test/data_for_tests/io/yelp_review_full', YelpFullPipe, (6, 6, 6), (1166, 5), False), |
|
|
|
'sst-2': ('test/data_for_tests/io/SST-2', SST2Pipe, (5, 5, 5), (139, 2), True), |
|
|
|
'sst': ('test/data_for_tests/io/SST', SSTPipe, (6, 354, 6), (232, 5), False), |
|
|
|
'imdb': ('test/data_for_tests/io/imdb', IMDBPipe, (6, 6, 6), (1670, 2), False), |
|
|
|
'ag': ('test/data_for_tests/io/ag', AGsNewsPipe, (5, 4), (257, 4), False), |
|
|
|
'dbpedia': ('test/data_for_tests/io/dbpedia', DBPediaPipe, (5, 14), (496, 14), False), |
|
|
|
'ChnSentiCorp': ('test/data_for_tests/io/ChnSentiCorp', ChnSentiCorpPipe, (6, 6, 6), (529, 1296, 1483, 2), False), |
|
|
|
'Chn-THUCNews': ('test/data_for_tests/io/THUCNews', THUCNewsPipe, (9, 9, 9), (1864, 9), False), |
|
|
|
'Chn-WeiboSenti100k': ('test/data_for_tests/io/WeiboSenti100k', WeiboSenti100kPipe, (7, 6, 6), (452, 2), False), |
|
|
|
'yelp.p': ('test/data_for_tests/io/yelp_review_polarity', YelpPolarityPipe, |
|
|
|
{'train': 6, 'dev': 6, 'test': 6}, {'words': 1176, 'target': 2}, |
|
|
|
False), |
|
|
|
'yelp.f': ('test/data_for_tests/io/yelp_review_full', YelpFullPipe, |
|
|
|
{'train': 6, 'dev': 6, 'test': 6}, {'words': 1166, 'target': 5}, |
|
|
|
False), |
|
|
|
'sst-2': ('test/data_for_tests/io/SST-2', SST2Pipe, |
|
|
|
{'train': 5, 'dev': 5, 'test': 5}, {'words': 139, 'target': 2}, |
|
|
|
True), |
|
|
|
'sst': ('test/data_for_tests/io/SST', SSTPipe, |
|
|
|
{'train': 354, 'dev': 6, 'test': 6}, {'words': 232, 'target': 5}, |
|
|
|
False), |
|
|
|
'imdb': ('test/data_for_tests/io/imdb', IMDBPipe, |
|
|
|
{'train': 6, 'dev': 6, 'test': 6}, {'words': 1670, 'target': 2}, |
|
|
|
False), |
|
|
|
'ag': ('test/data_for_tests/io/ag', AGsNewsPipe, |
|
|
|
{'train': 4, 'test': 5}, {'words': 257, 'target': 4}, |
|
|
|
False), |
|
|
|
'dbpedia': ('test/data_for_tests/io/dbpedia', DBPediaPipe, |
|
|
|
{'train': 14, 'test': 5}, {'words': 496, 'target': 14}, |
|
|
|
False), |
|
|
|
'ChnSentiCorp': ('test/data_for_tests/io/ChnSentiCorp', ChnSentiCorpPipe, |
|
|
|
{'train': 6, 'dev': 6, 'test': 6}, |
|
|
|
{'chars': 529, 'bigrams': 1296, 'trigrams': 1483, 'target': 2}, |
|
|
|
False), |
|
|
|
'Chn-THUCNews': ('test/data_for_tests/io/THUCNews', THUCNewsPipe, |
|
|
|
{'train': 9, 'dev': 9, 'test': 9}, {'chars': 1864, 'target': 9}, |
|
|
|
False), |
|
|
|
'Chn-WeiboSenti100k': ('test/data_for_tests/io/WeiboSenti100k', WeiboSenti100kPipe, |
|
|
|
{'train': 6, 'dev': 6, 'test': 7}, {'chars': 452, 'target': 2}, |
|
|
|
False), |
|
|
|
} |
|
|
|
for k, v in data_set_dict.items(): |
|
|
|
path, pipe, data_set, vocab, warns = v |
|
|
@@ -61,12 +82,12 @@ class TestRunClassificationPipe(unittest.TestCase): |
|
|
|
|
|
|
|
self.assertTrue(isinstance(data_bundle, DataBundle)) |
|
|
|
self.assertEqual(len(data_set), data_bundle.num_dataset) |
|
|
|
for x, y in zip(data_set, data_bundle.iter_datasets()): |
|
|
|
name, dataset = y |
|
|
|
self.assertEqual(x, len(dataset)) |
|
|
|
for name, dataset in data_bundle.iter_datasets(): |
|
|
|
self.assertTrue(name in data_set.keys()) |
|
|
|
self.assertEqual(data_set[name], len(dataset)) |
|
|
|
|
|
|
|
self.assertEqual(len(vocab), data_bundle.num_vocab) |
|
|
|
for x, y in zip(vocab, data_bundle.iter_vocabs()): |
|
|
|
name, vocabs = y |
|
|
|
self.assertEqual(x, len(vocabs)) |
|
|
|
for name, vocabs in data_bundle.iter_vocabs(): |
|
|
|
self.assertTrue(name in vocab.keys()) |
|
|
|
self.assertEqual(vocab[name], len(vocabs)) |
|
|
|
|