@@ -0,0 +1,10 @@ | |||
bc/msnbc/00/msnbc_0000 0 0 Hi UH (TOP(FRAG(INTJ*) - - - Dan_Abrams * - | |||
bc/msnbc/00/msnbc_0000 0 1 everyone NN (NP*) - - - Dan_Abrams * - | |||
bc/msnbc/00/msnbc_0000 0 2 /. . *)) - - - Dan_Abrams * - | |||
bc/msnbc/00/msnbc_0000 0 0 first RB (TOP(S(ADVP* - - - Dan_Abrams * (ARGM-TMP* * * * - | |||
bc/msnbc/00/msnbc_0000 0 1 up RB * - - - Dan_Abrams * * * * * - | |||
bc/msnbc/00/msnbc_0000 0 2 on IN (PP* - - - Dan_Abrams * * * * * - | |||
bc/msnbc/00/msnbc_0000 0 3 the DT (NP* - - - Dan_Abrams * * * * * - | |||
bc/msnbc/00/msnbc_0000 0 4 docket NN *)) docket - - Dan_Abrams * * * * * - |
@@ -0,0 +1,10 @@ | |||
bc/msnbc/00/msnbc_0007 0 0 Dealing VBG (TOP(VP* deal 01 - speaker_1 * (V*) - | |||
bc/msnbc/00/msnbc_0007 0 1 with IN (PP* - - - speaker_1 * (ARG1* - | |||
bc/msnbc/00/msnbc_0007 0 2 serial JJ (NP(NP* - - - speaker_1 * * (156 | |||
bc/msnbc/00/msnbc_0007 0 3 crimes NNS *) crime - 1 speaker_1 * * 156) | |||
bc/msnbc/00/msnbc_0007 0 4 per FW (ADVP* - - - speaker_1 * * - | |||
bc/msnbc/00/msnbc_0007 0 5 se FW *))) - - - speaker_1 * *) - | |||
bc/msnbc/00/msnbc_0007 0 6 /. . *)) - - - speaker_1 * * - | |||
bc/msnbc/00/msnbc_0007 0 0 We PRP (TOP(S(NP*) - - - speaker_1 * (ARG0*) * (90) |
@@ -0,0 +1,50 @@ | |||
bc/msnbc/00/msnbc_0003 0 0 The DT (TOP(S(NP* - - - Chris_Matthews * * (ARG1* * * * * - | |||
bc/msnbc/00/msnbc_0003 0 1 move NN *) move 02 2 Chris_Matthews * (V*) *) * * * * - | |||
bc/msnbc/00/msnbc_0003 0 2 comes VBZ (VP* come 03 2 Chris_Matthews * * (V*) * * * * - | |||
bc/msnbc/00/msnbc_0003 0 3 a DT (SBAR(NP* - - - Chris_Matthews (DATE* * (ARGM-TMP* * * * * - | |||
bc/msnbc/00/msnbc_0003 0 4 month NN *) month - 2 Chris_Matthews *) * * * * * * - | |||
bc/msnbc/00/msnbc_0003 0 5 before IN * - - - Chris_Matthews * * * * * * * - | |||
bc/msnbc/00/msnbc_0003 0 6 the DT (S(NP* - - - Chris_Matthews * * * * (ARG1* (ARG0* * - | |||
bc/msnbc/00/msnbc_0003 0 7 Senate NNP *) - - - Chris_Matthews (ORG) * * * *) *) * - | |||
bc/msnbc/00/msnbc_0003 0 8 is VBZ (VP* be 03 - Chris_Matthews * * * (V*) * * * - | |||
bc/msnbc/00/msnbc_0003 0 9 scheduled VBN (VP* schedule 01 - Chris_Matthews * * * * (V*) * * - | |||
bc/msnbc/00/msnbc_0003 0 10 to TO (S(VP* - - - Chris_Matthews * * * * (ARG2* * * - | |||
bc/msnbc/00/msnbc_0003 0 11 hold VB (VP* hold 04 8 Chris_Matthews * * * * * (V*) * - | |||
bc/msnbc/00/msnbc_0003 0 12 confirmation NN (NP(NP* - - - Chris_Matthews * * * * * (ARG1* (ARG2*) - | |||
bc/msnbc/00/msnbc_0003 0 13 hearings NNS *) hearing 01 1 Chris_Matthews * * * * * * (V*) - | |||
bc/msnbc/00/msnbc_0003 0 14 on IN (PP* - - - Chris_Matthews * * * * * * (ARG1* - | |||
bc/msnbc/00/msnbc_0003 0 15 President NNP (NP(NP(NP* - - - Chris_Matthews * * * * * * * (194 | |||
bc/msnbc/00/msnbc_0003 0 16 Bush NNP * - - - Chris_Matthews (PERSON) * * * * * * - | |||
bc/msnbc/00/msnbc_0003 0 17 's POS *) - - - Chris_Matthews * * * * * * * 194) | |||
bc/msnbc/00/msnbc_0003 0 18 Supreme NNP (NML* - - - Chris_Matthews (ORG* * * * * * * - | |||
bc/msnbc/00/msnbc_0003 0 19 Court NNP *) - - - Chris_Matthews *) * * * * * * - | |||
bc/msnbc/00/msnbc_0003 0 20 nominee NN *) - - - Chris_Matthews * * * * * * * - | |||
bc/msnbc/00/msnbc_0003 0 21 John NNP (NP* - - - Chris_Matthews (PERSON* * * * * * * - | |||
bc/msnbc/00/msnbc_0003 0 22 Roberts NNP *)))))))))))) - - - Chris_Matthews *) * *) * *) *) *) - | |||
bc/msnbc/00/msnbc_0003 0 23 /. . *)) - - - Chris_Matthews * * * * * * * - | |||
bc/msnbc/00/msnbc_0003 0 0 Senator NNP (TOP(S(NP(NP* - - - Chris_Matthews * (ARG1* * * (162 | |||
bc/msnbc/00/msnbc_0003 0 1 Chris NNP * - - - Chris_Matthews (PERSON* * * * - | |||
bc/msnbc/00/msnbc_0003 0 2 Dodd NNP *) - - - Chris_Matthews *) * * * - | |||
bc/msnbc/00/msnbc_0003 0 3 of IN (PP* - - - Chris_Matthews * * * * - | |||
bc/msnbc/00/msnbc_0003 0 4 Connecticut NNP (NP*))) - - - Chris_Matthews (GPE) *) * * 162) | |||
bc/msnbc/00/msnbc_0003 0 5 was VBD (VP* be 01 1 Chris_Matthews * (V*) * * - | |||
bc/msnbc/00/msnbc_0003 0 6 among IN (PP* - - - Chris_Matthews * (ARG2* * * - | |||
bc/msnbc/00/msnbc_0003 0 7 those DT (NP(NP* - - - Chris_Matthews * * (ARG0* * - | |||
bc/msnbc/00/msnbc_0003 0 8 Democrats NNPS *) - - - Chris_Matthews (NORP) * *) * - | |||
bc/msnbc/00/msnbc_0003 0 9 who WP (SBAR(WHNP*) - - - Chris_Matthews * * (R-ARG0*) * - | |||
bc/msnbc/00/msnbc_0003 0 10 spoke VBD (S(VP* speak 03 5 Chris_Matthews * * (V*) * - | |||
bc/msnbc/00/msnbc_0003 0 11 out RP (PRT*) - - - Chris_Matthews * * * * - | |||
bc/msnbc/00/msnbc_0003 0 12 against IN (PP* - - - Chris_Matthews * * (ARG1* * - | |||
bc/msnbc/00/msnbc_0003 0 13 Bolton NNP (NP(NP* - - - Chris_Matthews (PERSON) * * (ARG1* (31|(130 | |||
bc/msnbc/00/msnbc_0003 0 14 's POS *) - - - Chris_Matthews * * * *) 31) | |||
bc/msnbc/00/msnbc_0003 0 15 appointment NN *)) appointment 01 1 Chris_Matthews * * *) (V*) 130) | |||
bc/msnbc/00/msnbc_0003 0 16 today NN (NP*))))))) today - 2 Chris_Matthews (DATE) *) (ARGM-TMP*) * (121) | |||
bc/msnbc/00/msnbc_0003 0 17 /. . *)) - - - Chris_Matthews * * * * - | |||
bc/msnbc/00/msnbc_0003 0 0 I PRP (TOP(S(NP*) - - - Christopher_Dodd * * (ARG0*) * (162) | |||
bc/msnbc/00/msnbc_0003 0 1 just RB (ADVP*) - - - Christopher_Dodd * * (ARGM-ADV*) * - | |||
bc/msnbc/00/msnbc_0003 0 2 do VBP (VP* do 01 - Christopher_Dodd * (V*) * * - | |||
bc/msnbc/00/msnbc_0003 0 3 n't RB * - - - Christopher_Dodd * * (ARGM-NEG*) * - | |||
bc/msnbc/00/msnbc_0003 0 4 think VB (VP* think 01 1 Christopher_Dodd * * (V*) * - |
@@ -0,0 +1,49 @@ | |||
-DOCSTART- -X- -X- O | |||
CRICKET NNP B-NP O | |||
- : O O | |||
LEICESTERSHIRE NNP B-NP B-ORG | |||
TAKE NNP I-NP O | |||
OVER IN B-PP O | |||
AT NNP B-NP O | |||
TOP NNP I-NP O | |||
AFTER NNP I-NP O | |||
INNINGS NNP I-NP O | |||
VICTORY NN I-NP O | |||
. . O O | |||
LONDON NNP B-NP B-LOC | |||
1996-08-30 CD I-NP O | |||
Phil NNP B-NP B-PER | |||
Simmons NNP I-NP I-PER | |||
took VBD B-VP O | |||
four CD B-NP O | |||
for IN B-PP O | |||
38 CD B-NP O | |||
on IN B-PP O | |||
Friday NNP B-NP O | |||
as IN B-PP O | |||
Leicestershire NNP B-NP B-ORG | |||
beat VBD B-VP O | |||
Somerset NNP B-NP B-ORG | |||
by IN B-PP O | |||
an DT B-NP O | |||
innings NN I-NP O | |||
and CC O O | |||
39 CD B-NP O | |||
runs NNS I-NP O | |||
in IN B-PP O | |||
two CD B-NP O | |||
days NNS I-NP O | |||
to TO B-VP O | |||
take VB I-VP O | |||
over IN B-PP O | |||
at IN B-PP O | |||
the DT B-NP O | |||
head NN I-NP O | |||
of IN B-PP O | |||
the DT B-NP O | |||
county NN I-NP O | |||
championship NN I-NP O | |||
. . O O |
@@ -0,0 +1,51 @@ | |||
-DOCSTART- -X- -X- O | |||
SOCCER NN B-NP O | |||
- : O O | |||
JAPAN NNP B-NP B-LOC | |||
GET VB B-VP O | |||
LUCKY NNP B-NP O | |||
WIN NNP I-NP O | |||
, , O O | |||
THE NP B-NP B-PER | |||
CHINA NNP I-NP I-PER | |||
IN IN B-PP O | |||
SURPRISE DT B-NP O | |||
DEFEAT NN I-NP O | |||
. . O O | |||
Nadim NNP B-NP B-PER | |||
Ladki NNP I-NP I-PER | |||
AL-AIN NNP B-NP B-LOC | |||
, , O O | |||
United NNP B-NP B-LOC | |||
Arab NNP I-NP I-LOC | |||
Emirates NNPS I-NP I-LOC | |||
1996-12-06 CD I-NP O | |||
Japan NNP B-NP B-LOC | |||
began VBD B-VP O | |||
the DT B-NP O | |||
defence NN I-NP O | |||
of IN B-PP O | |||
their PRP$ B-NP O | |||
Asian JJ I-NP B-MISC | |||
Cup NNP I-NP I-MISC | |||
title NN I-NP O | |||
with IN B-PP O | |||
a DT B-NP O | |||
lucky JJ I-NP O | |||
2-1 CD I-NP O | |||
win VBP B-VP O | |||
against IN B-PP O | |||
Syria NNP B-NP B-LOC | |||
in IN B-PP O | |||
a DT B-NP O | |||
Group NNP I-NP O | |||
C NNP I-NP O | |||
championship NN I-NP O | |||
match NN I-NP O | |||
on IN B-PP O | |||
Friday NNP B-NP O | |||
. . O O |
@@ -0,0 +1,48 @@ | |||
-DOCSTART- -X- -X- O | |||
EU NNP B-NP B-ORG | |||
rejects VBZ B-VP O | |||
German JJ B-NP B-MISC | |||
call NN I-NP O | |||
to TO B-VP O | |||
boycott VB I-VP O | |||
British JJ B-NP B-MISC | |||
lamb NN I-NP O | |||
. . O O | |||
Peter NNP B-NP B-PER | |||
Blackburn NNP I-NP I-PER | |||
BRUSSELS NNP B-NP B-LOC | |||
1996-08-22 CD I-NP O | |||
The DT B-NP O | |||
European NNP I-NP B-ORG | |||
Commission NNP I-NP I-ORG | |||
said VBD B-VP O | |||
on IN B-PP O | |||
Thursday NNP B-NP O | |||
it PRP B-NP O | |||
disagreed VBD B-VP O | |||
with IN B-PP O | |||
German JJ B-NP B-MISC | |||
advice NN I-NP O | |||
to TO B-PP O | |||
consumers NNS B-NP O | |||
to TO B-VP O | |||
shun VB I-VP O | |||
British JJ B-NP B-MISC | |||
lamb NN I-NP O | |||
until IN B-SBAR O | |||
scientists NNS B-NP O | |||
determine VBP B-VP O | |||
whether IN B-SBAR O | |||
mad JJ B-NP O | |||
cow NN I-NP O | |||
disease NN I-NP O | |||
can MD B-VP O | |||
be VB I-VP O | |||
transmitted VBN I-VP O | |||
to TO B-PP O | |||
sheep NN B-NP O | |||
. . O O |
@@ -26,6 +26,12 @@ class TestWeiboNER(unittest.TestCase): | |||
class TestConll2003Loader(unittest.TestCase): | |||
def test__load(self): | |||
def test_load(self): | |||
Conll2003Loader()._load('test/data_for_tests/conll_2003_example.txt') | |||
class TestConllLoader(unittest.TestCase): | |||
def test_conll(self): | |||
db = Conll2003Loader().load('test/data_for_tests/io/conll2003') | |||
print(db) | |||
@@ -1,6 +1,7 @@ | |||
import unittest | |||
import os | |||
from fastNLP.io import MsraNERPipe, PeopleDailyPipe, WeiboNERPipe, Conll2003Pipe, Conll2003NERPipe | |||
from fastNLP.io import MsraNERPipe, PeopleDailyPipe, WeiboNERPipe, Conll2003Pipe, Conll2003NERPipe, \ | |||
OntoNotesNERPipe | |||
@unittest.skipIf('TRAVIS' in os.environ, "Skip in travis") | |||
@@ -38,3 +39,14 @@ class TestNERPipe(unittest.TestCase): | |||
print(data_bundle) | |||
data_bundle = pipe(encoding_type='bioes').process_from_file(f'test/data_for_tests/io/{k}') | |||
print(data_bundle) | |||
class TestConll2003Pipe(unittest.TestCase): | |||
def test_conll(self): | |||
with self.assertWarns(Warning): | |||
data_bundle = Conll2003Pipe().process_from_file('test/data_for_tests/io/conll2003') | |||
print(data_bundle) | |||
def test_OntoNotes(self): | |||
data_bundle = OntoNotesNERPipe().process_from_file('test/data_for_tests/io/OntoNotes') | |||
print(data_bundle) |
@@ -19,5 +19,6 @@ class TestRunCWSPipe(unittest.TestCase): | |||
dataset_names = ['msra', 'cityu', 'as', 'pku'] | |||
for dataset_name in dataset_names: | |||
with self.subTest(dataset_name=dataset_name): | |||
data_bundle = CWSPipe().process_from_file(f'test/data_for_tests/io/cws_{dataset_name}') | |||
data_bundle = CWSPipe(bigrams=True, trigrams=True).\ | |||
process_from_file(f'test/data_for_tests/io/cws_{dataset_name}') | |||
print(data_bundle) |