Browse Source

Add files via upload

tags/v0.1.0
Xu Yige GitHub 6 years ago
parent
commit
fc70bfe449
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 92 additions and 1 deletions
  1. +30
    -1
      fastNLP/loader/config_loader.py
  2. +62
    -0
      fastNLP/loader/dataset_loader.py

+ 30
- 1
fastNLP/loader/config_loader.py View File

@@ -1,4 +1,8 @@
from loader.base_loader import BaseLoader
from fastNLP.loader.base_loader import BaseLoader

import configparser
import traceback
import json


class ConfigLoader(BaseLoader):
@@ -11,3 +15,28 @@ class ConfigLoader(BaseLoader):
@staticmethod
def parse(string):
raise NotImplementedError

@staticmethod
def loadConfig(filePath, sections):
"""
:param filePath: the path of config file
:param sections: the dict of sections
:return:
"""
cfg = configparser.ConfigParser()
cfg.read(filePath)
for s in sections:
attr_list = [i for i in type(sections[s]).__dict__.keys() if
not callable(getattr(sections[s], i)) and not i.startswith("__")]
gen_sec = cfg[s]
for attr in attr_list:
try:
val = json.loads(gen_sec[attr])
print(s, attr, val, type(val))
assert type(val) == type(getattr(sections[s], attr)), \
'type not match, except %s but got %s' % \
(type(getattr(sections[s], attr)), type(val))
setattr(sections[s], attr, val)
except Exception as e:
traceback.print_exc()
raise ValueError('something wrong in "%s" entry' % attr)

+ 62
- 0
fastNLP/loader/dataset_loader.py View File

@@ -1,4 +1,5 @@
from fastNLP.loader.base_loader import BaseLoader
import os


class DatasetLoader(BaseLoader):
@@ -8,6 +9,67 @@ class DatasetLoader(BaseLoader):
super(DatasetLoader, self).__init__(data_name, data_path)


class POSDatasetLoader(DatasetLoader):
"""loader for pos data sets"""

def __init__(self, data_name, data_path):
super(POSDatasetLoader, self).__init__(data_name, data_path)
#self.data_set = self.load()


def load(self):
assert os.path.exists(self.data_path)
with open(self.data_path, "r", encoding="utf-8") as f:
lines = f.readlines()
return self.parse(lines)

@staticmethod
def parse(lines):
"""
:param lines: lines from dataset
:return: list(list(list())): the three level of lists are
token, sentence, and dataset
"""
dataset = list()
for line in lines:
sentence = list()
words = line.split(" ")
for w in words:
tokens = list()
tokens.append(w.split('/')[0])
tokens.append(w.split('/')[1])
sentence.append(tokens)
dataset.append(sentence)
return dataset

class ClassficationDatasetLoader(DatasetLoader):
"""loader for classfication data sets"""

def __init__(self, data_name, data_path):
super(ClassficationDatasetLoader, data_name)

def load(self):
assert os.path.exists(self.data_path)
with open(self.data_path, "r", encoding="utf-8") as f:
lines = f.readlines()
return self.parse(lines)

@staticmethod
def parse(lines):
"""
:param lines: lines from dataset
:return: list(list(list())): the three level of lists are
words, sentence, and dataset
"""
dataset = list()
for line in lines:
label = line.split(" ")[0]
words = line.split(" ")[1:]
word = list([w for w in words])
sentence = list([word, label])
dataset.append(sentence)
return dataset

class ConllLoader(DatasetLoader):
"""loader for conll format files"""



Loading…
Cancel
Save