- from typing import List
- def iob2(tags:List[str])->List[str]:
- """
- 检查数据是否是合法的IOB数据,如果是IOB1会被自动转换为IOB2。
- :param tags: 需要转换的tags
- """
- for i, tag in enumerate(tags):
- if tag == "O":
- continue
- split = tag.split("-")
- if len(split) != 2 or split[0] not in ["I", "B"]:
- raise TypeError("The encoding schema is not a valid IOB type.")
- if split[0] == "B":
- continue
- elif i == 0 or tags[i - 1] == "O": # conversion IOB1 to IOB2
- tags[i] = "B" + tag[1:]
- elif tags[i - 1][1:] == tag[1:]:
- continue
- else: # conversion IOB1 to IOB2
- tags[i] = "B" + tag[1:]
- return tags
- def iob2bioes(tags:List[str])->List[str]:
- """
- 将iob的tag转换为bmeso编码
- :param tags:
- :return:
- """
- new_tags = []
- for i, tag in enumerate(tags):
- if tag == 'O':
- new_tags.append(tag)
- else:
- split = tag.split('-')[0]
- if split == 'B':
- if i+1!=len(tags) and tags[i+1].split('-')[0] == 'I':
- new_tags.append(tag)
- else:
- new_tags.append(tag.replace('B-', 'S-'))
- elif split == 'I':
- if i + 1<len(tags) and tags[i+1].split('-')[0] == 'I':
- new_tags.append(tag)
- else:
- new_tags.append(tag.replace('I-', 'E-'))
- else:
- raise TypeError("Invalid IOB format.")
- return new_tags