You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

CVSSDataset.py 1.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. from pathlib import Path
  2. from sklearn.model_selection import train_test_split
  3. import torch
  4. import csv
  5. class CVSSDataset(torch.utils.data.Dataset):
  6. def __init__(self, encodings, labels):
  7. self.encodings = encodings
  8. self.labels = labels
  9. def __getitem__(self, idx):
  10. item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  11. item['labels'] = torch.tensor(self.labels[idx])
  12. return item
  13. def __len__(self):
  14. return len(self.labels)
  15. def read_cvss_txt(split_dir, list_classes):
  16. split_dir = Path(split_dir)
  17. texts = []
  18. labels = []
  19. for label_dir in ["LOW", "HIGH"]:
  20. for text_file in (split_dir/label_dir).iterdir():
  21. texts.append(text_file.read_text())
  22. for i in range(len(list_classes)):
  23. if list_classes[i] == label_dir:
  24. labels.append(i)
  25. else:
  26. continue
  27. return texts, labels
  28. def read_cvss_csv(file_name, num_label, list_classes):
  29. texts = []
  30. labels = []
  31. csv_file = open(file_name, 'r+',encoding='UTF-8')
  32. csv_reader = csv.reader(csv_file, delimiter=',', quotechar='"')
  33. for row in csv_reader:
  34. texts.append(row[0])
  35. for i in range(len(list_classes)):
  36. if list_classes[i] == row[num_label]:
  37. labels.append(i)
  38. else:
  39. continue
  40. csv_file.close()
  41. return texts, labels

在信息安全领域,漏洞评估和管理是关键任务之一。本作品探讨了如何利用预训练文本大模型来评估和研判漏洞的严重等级,具体基于通用漏洞评分系统。传统漏洞评分方法依赖于手动分析和专家评审。而基于自然语言处理文本大模型通过其深度学习能力,可以自动化地处理和分析大量的安全相关文本数据,从而提高漏洞评估的效率和准确性。结合词干提取、词性还原能够更好地发挥自然语言处理文本大模型的预测能力与准确度。