Browse Source

新增pos output processor

tags/v0.2.0
yh yunfan 5 years ago
parent
commit
1496031182
1 changed files with 24 additions and 0 deletions
  1. +24
    -0
      reproduction/pos_tag_model/process/pos_processor.py

+ 24
- 0
reproduction/pos_tag_model/process/pos_processor.py View File

@@ -60,6 +60,30 @@ class CombineWordAndPosProcessor(Processor):


return dataset return dataset


class PosOutputStrProcessor(Processor):
def __init__(self, word_field_name, pos_field_name):
super(PosOutputStrProcessor, self).__init__(None, None)

self.word_field_name = word_field_name
self.pos_field_name = pos_field_name
self.pos = '_'

def process(self, dataset):
assert isinstance(dataset, DataSet), "Only Dataset class is allowed, not {}.".format(type(dataset))

for ins in dataset:
word_list = ins[self.word_field_name]
pos_list = ins[self.pos_field_name]

word_pos_list = []
for word, pos in zip(word_list, pos_list):
word_pos_list.append(word + self.sep + pos)

ins['word_pos_output'] = ' '.join(word_pos_list)

return dataset


if __name__ == '__main__': if __name__ == '__main__':
chars = ['迈', '向', '充', '满', '希', '望', '的', '新', '世', '纪', '—', '—', '一', '九', '九', '八', '年', '新', '年', '讲', '话', '(', '附', '图', '片', '1', '张', ')'] chars = ['迈', '向', '充', '满', '希', '望', '的', '新', '世', '纪', '—', '—', '一', '九', '九', '八', '年', '新', '年', '讲', '话', '(', '附', '图', '片', '1', '张', ')']
bmes_pos = ['B-v', 'E-v', 'B-v', 'E-v', 'B-n', 'E-n', 'S-u', 'S-a', 'B-n', 'E-n', 'B-w', 'E-w', 'B-t', 'M-t', 'M-t', 'M-t', 'E-t', 'B-t', 'E-t', 'B-n', 'E-n', 'S-w', 'S-v', 'B-n', 'E-n', 'S-m', 'S-q', 'S-w'] bmes_pos = ['B-v', 'E-v', 'B-v', 'E-v', 'B-n', 'E-n', 'S-u', 'S-a', 'B-n', 'E-n', 'B-w', 'E-w', 'B-t', 'M-t', 'M-t', 'M-t', 'E-t', 'B-t', 'E-t', 'B-n', 'E-n', 'S-w', 'S-v', 'B-n', 'E-n', 'S-m', 'S-q', 'S-w']


Loading…
Cancel
Save