Browse Source

删除了不能运行的测试

tags/v0.4.10
ChenXin 6 years ago
parent
commit
14fe885ecf
3 changed files with 0 additions and 218 deletions
  1. +0
    -6
      legacy/test/api/test_pipeline.py
  2. +0
    -101
      legacy/test/api/test_processor.py
  3. +0
    -111
      legacy/test/automl/test_enas.py

+ 0
- 6
legacy/test/api/test_pipeline.py View File

@@ -1,6 +0,0 @@
import unittest


class TestPipeline(unittest.TestCase):
def test_case(self):
pass

+ 0
- 101
legacy/test/api/test_processor.py View File

@@ -1,101 +0,0 @@
import random
import unittest

import numpy as np

from fastNLP import Vocabulary, Instance
from fastNLP.api.processor import FullSpaceToHalfSpaceProcessor, PreAppendProcessor, SliceProcessor, Num2TagProcessor, \
IndexerProcessor, VocabProcessor, SeqLenProcessor, ModelProcessor, Index2WordProcessor, SetTargetProcessor, \
SetInputProcessor, VocabIndexerProcessor
from fastNLP.core.dataset import DataSet


class TestProcessor(unittest.TestCase):
def test_FullSpaceToHalfSpaceProcessor(self):
ds = DataSet({"word": ["00, u1, u), (u2, u2"]})
proc = FullSpaceToHalfSpaceProcessor("word")
ds = proc(ds)
self.assertEqual(ds.field_arrays["word"].content, ["00, u1, u), (u2, u2"])

def test_PreAppendProcessor(self):
ds = DataSet({"word": [["1234", "3456"], ["8789", "3464"]]})
proc = PreAppendProcessor(data="abc", field_name="word")
ds = proc(ds)
self.assertEqual(ds.field_arrays["word"].content, [["abc", "1234", "3456"], ["abc", "8789", "3464"]])

def test_SliceProcessor(self):
ds = DataSet({"xx": [[random.randint(0, 10) for _ in range(30)]] * 40})
proc = SliceProcessor(10, 20, 2, "xx", new_added_field_name="yy")
ds = proc(ds)
self.assertEqual(len(ds.field_arrays["yy"].content[0]), 5)

def test_Num2TagProcessor(self):
ds = DataSet({"num": [["99.9982", "2134.0"], ["0.002", "234"]]})
proc = Num2TagProcessor("<num>", "num")
ds = proc(ds)
for data in ds.field_arrays["num"].content:
for d in data:
self.assertEqual(d, "<num>")

def test_VocabProcessor_and_IndexerProcessor(self):
ds = DataSet({"xx": [[str(random.randint(0, 10)) for _ in range(30)]] * 40})
vocab_proc = VocabProcessor("xx")
vocab_proc(ds)
vocab = vocab_proc.vocab
self.assertTrue(isinstance(vocab, Vocabulary))
self.assertTrue(len(vocab) > 5)

proc = IndexerProcessor(vocab, "xx", "yy")
ds = proc(ds)
for data in ds.field_arrays["yy"].content[0]:
self.assertTrue(isinstance(data, int))

def test_SeqLenProcessor(self):
ds = DataSet({"xx": [[str(random.randint(0, 10)) for _ in range(30)]] * 10})
proc = SeqLenProcessor("xx", "len")
ds = proc(ds)
for data in ds.field_arrays["len"].content:
self.assertEqual(data, 30)

def test_ModelProcessor(self):
from fastNLP.models.cnn_text_classification import CNNText
model = CNNText((100, 100), 5)
ins_list = []
for _ in range(64):
seq_len = np.random.randint(5, 30)
ins_list.append(Instance(word_seq=[np.random.randint(0, 100) for _ in range(seq_len)], seq_lens=seq_len))
data_set = DataSet(ins_list)
data_set.set_input("word_seq", "seq_lens")
proc = ModelProcessor(model)
data_set = proc(data_set)
self.assertTrue("pred" in data_set)

def test_Index2WordProcessor(self):
vocab = Vocabulary()
vocab.add_word_lst(["a", "b", "c", "d", "e"])
proc = Index2WordProcessor(vocab, "tag_id", "tag")
data_set = DataSet([Instance(tag_id=[np.random.randint(0, 7) for _ in range(32)])])
data_set = proc(data_set)
self.assertTrue("tag" in data_set)

def test_SetTargetProcessor(self):
proc = SetTargetProcessor("a", "b", "c")
data_set = DataSet({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]})
data_set = proc(data_set)
self.assertTrue(data_set["a"].is_target)
self.assertTrue(data_set["b"].is_target)
self.assertTrue(data_set["c"].is_target)

def test_SetInputProcessor(self):
proc = SetInputProcessor("a", "b", "c")
data_set = DataSet({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]})
data_set = proc(data_set)
self.assertTrue(data_set["a"].is_input)
self.assertTrue(data_set["b"].is_input)
self.assertTrue(data_set["c"].is_input)

def test_VocabIndexerProcessor(self):
proc = VocabIndexerProcessor("word_seq", "word_ids")
data_set = DataSet([Instance(word_seq=["a", "b", "c", "d", "e"])])
data_set = proc(data_set)
self.assertTrue("word_ids" in data_set)

+ 0
- 111
legacy/test/automl/test_enas.py View File

@@ -1,111 +0,0 @@
import unittest

from fastNLP import DataSet
from fastNLP import Instance
from fastNLP import Vocabulary
from fastNLP.core.losses import CrossEntropyLoss
from fastNLP.core.metrics import AccuracyMetric


class TestENAS(unittest.TestCase):
def testENAS(self):
# 从csv读取数据到DataSet
sample_path = "tutorials/sample_data/tutorial_sample_dataset.csv"
dataset = DataSet.read_csv(sample_path, headers=('raw_sentence', 'label'),
sep='\t')
print(len(dataset))
print(dataset[0])
print(dataset[-3])

dataset.append(Instance(raw_sentence='fake data', label='0'))
# 将所有数字转为小写
dataset.apply(lambda x: x['raw_sentence'].lower(), new_field_name='raw_sentence')
# label转int
dataset.apply(lambda x: int(x['label']), new_field_name='label')

# 使用空格分割句子
def split_sent(ins):
return ins['raw_sentence'].split()

dataset.apply(split_sent, new_field_name='words')

# 增加长度信息
dataset.apply(lambda x: len(x['words']), new_field_name='seq_len')
print(len(dataset))
print(dataset[0])

# DataSet.drop(func)筛除数据
dataset.drop(lambda x: x['seq_len'] <= 3, inplace=True)
print(len(dataset))

# 设置DataSet中,哪些field要转为tensor
# set target,loss或evaluate中的golden,计算loss,模型评估时使用
dataset.set_target("label")
# set input,模型forward时使用
dataset.set_input("words", "seq_len")

# 分出测试集、训练集
test_data, train_data = dataset.split(0.5)
print(len(test_data))
print(len(train_data))

# 构建词表, Vocabulary.add(word)
vocab = Vocabulary(min_freq=2)
train_data.apply(lambda x: [vocab.add(word) for word in x['words']])
vocab.build_vocab()

# index句子, Vocabulary.to_index(word)
train_data.apply(lambda x: [vocab.to_index(word) for word in x['words']], new_field_name='words')
test_data.apply(lambda x: [vocab.to_index(word) for word in x['words']], new_field_name='words')
print(test_data[0])

# 如果你们需要做强化学习或者GAN之类的项目,你们也可以使用这些数据预处理的工具
from fastNLP.core.batch import Batch
from fastNLP.core.sampler import RandomSampler

batch_iterator = Batch(dataset=train_data, batch_size=2, sampler=RandomSampler())
for batch_x, batch_y in batch_iterator:
print("batch_x has: ", batch_x)
print("batch_y has: ", batch_y)
break

from fastNLP.automl.enas_model import ENASModel
from fastNLP.automl.enas_controller import Controller
model = ENASModel(embed_num=len(vocab), num_classes=5)
controller = Controller()

from fastNLP.automl.enas_trainer import ENASTrainer

# 更改DataSet中对应field的名称,要以模型的forward等参数名一致
train_data.rename_field('words', 'word_seq') # input field 与 forward 参数一致
train_data.rename_field('label', 'label_seq')
test_data.rename_field('words', 'word_seq')
test_data.rename_field('label', 'label_seq')

loss = CrossEntropyLoss(pred="output", target="label_seq")
metric = AccuracyMetric(pred="predict", target="label_seq")

trainer = ENASTrainer(model=model, controller=controller, train_data=train_data, dev_data=test_data,
loss=CrossEntropyLoss(pred="output", target="label_seq"),
metrics=AccuracyMetric(pred="predict", target="label_seq"),
check_code_level=-1,
save_path=None,
batch_size=32,
print_every=1,
n_epochs=3,
final_epochs=1)
trainer.train()
print('Train finished!')

# 调用Tester在test_data上评价效果
from fastNLP import Tester

tester = Tester(data=test_data, model=model, metrics=AccuracyMetric(pred="predict", target="label_seq"),
batch_size=4)

acc = tester.test()
print(acc)


if __name__ == '__main__':
unittest.main()

Loading…
Cancel
Save