|
- from pathlib import Path
- from sklearn.model_selection import train_test_split
- import torch
- import csv
-
- class CVSSDataset(torch.utils.data.Dataset):
- def __init__(self, encodings, labels):
- self.encodings = encodings
- self.labels = labels
-
- def __getitem__(self, idx):
- item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
- item['labels'] = torch.tensor(self.labels[idx])
- return item
-
- def __len__(self):
- return len(self.labels)
-
- def read_cvss_txt(split_dir, list_classes):
- split_dir = Path(split_dir)
- texts = []
- labels = []
- for label_dir in ["LOW", "HIGH"]:
- for text_file in (split_dir/label_dir).iterdir():
- texts.append(text_file.read_text())
- for i in range(len(list_classes)):
- if list_classes[i] == label_dir:
- labels.append(i)
- else:
- continue
-
- return texts, labels
-
- def read_cvss_csv(file_name, num_label, list_classes):
- texts = []
- labels = []
-
- csv_file = open(file_name, 'r+',encoding='UTF-8')
- csv_reader = csv.reader(csv_file, delimiter=',', quotechar='"')
-
- for row in csv_reader:
- texts.append(row[0])
- for i in range(len(list_classes)):
- if list_classes[i] == row[num_label]:
- labels.append(i)
- else:
- continue
-
- csv_file.close()
-
- return texts, labels
|