You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

jsonToCsv.py 2.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. import json
  2. import csv
  3. import pandas as pd
  4. import re
  5. def main():
  6. jsonToCsv()
  7. dataProcess()
  8. dataProcess2()
  9. dataProcess3()
  10. def jsonToCsv():
  11. with open('../data/SIR_validation_set.json', 'r', encoding='UTF-8') as json_file:
  12. data = json.load(json_file)
  13. with open('../data_process_cache/SIR_validation_set.csv', 'w', newline='', encoding='UTF-8') as csv_file:
  14. if data:
  15. fieldnames = data[0].keys()
  16. writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
  17. writer.writeheader()
  18. writer.writerows(data)
  19. def dataProcess():
  20. columns_to_keep = ['description', 'vectorString']
  21. with open('../data_process_cache/SIR_validation_set.csv', 'r', encoding='UTF-8') as infile:
  22. reader = csv.DictReader(infile)
  23. filtered_rows = [{col: row[col] for col in columns_to_keep} for row in reader]
  24. # 将选择的列写入到新 CSV 文件
  25. with open('../data_process_cache/validation_vecStr.csv', 'w', newline='', encoding='utf-8') as outfile:
  26. if filtered_rows:
  27. writer = csv.DictWriter(outfile, fieldnames=columns_to_keep)
  28. writer.writerows(filtered_rows) # 写入选择的列
  29. def dataProcess2():
  30. import pandas as pd
  31. df = pd.read_csv('../data_process_cache/validation_vecStr.csv', header=None, encoding='UTF-8')
  32. df_expanded = df[1].str.split('/', expand=True)
  33. df = pd.concat([df, df_expanded], axis=1)
  34. print(df.head())
  35. df.to_csv('../data_process_cache/output_validation.csv', index=False, header=False, encoding='UTF-8')
  36. df = pd.read_csv('../data_process_cache/output_validation.csv', header=None, encoding='UTF-8')
  37. df = df.drop(columns=[1, 2])
  38. df.to_csv('../data_process_cache/output_validation.csv',index=False, header=False, encoding='UTF-8')
  39. def dataProcess3():
  40. df = pd.read_csv('../data_process_cache/output_validation.csv', header=None, encoding='UTF-8')
  41. print(df.head())
  42. df.replace({'AV:L': 'LOCAL', 'AV:N': 'NETWORK', 'AV:A': 'ADJACENT', 'AV:P': 'PHYSICAL'}, inplace=True)
  43. df.replace({'AC:L': 'LOW', 'AC:H': 'HIGH'}, inplace=True)
  44. df.replace({'PR:N': 'NONE', 'PR:L': 'LOW', 'PR:H': 'HIGH'}, inplace=True)
  45. df.replace({'UI:N': 'NONE', 'UI:R': 'REQUIRED'}, inplace=True)
  46. df.replace({'S:U': 'UNCHANGED', 'S:C': 'CHANGED'}, inplace=True)
  47. df.replace({'C:N': 'NONE', 'C:L': 'LOW', 'C:H': 'HIGH'}, inplace=True)
  48. df.replace({'I:N': 'NONE', 'I:L': 'LOW', 'I:H': 'HIGH'}, inplace=True)
  49. df.replace({'A:N': 'NONE', 'A:L': 'LOW', 'A:H': 'HIGH'}, inplace=True)
  50. df.to_csv('../dataset/SIR_validation_set.csv', index=False, header=False, encoding='UTF-8')
  51. if __name__ == '__main__':
  52. main()

在信息安全领域,漏洞评估和管理是关键任务之一。本作品探讨了如何利用预训练文本大模型来评估和研判漏洞的严重等级,具体基于通用漏洞评分系统。传统漏洞评分方法依赖于手动分析和专家评审。而基于自然语言处理文本大模型通过其深度学习能力,可以自动化地处理和分析大量的安全相关文本数据,从而提高漏洞评估的效率和准确性。结合词干提取、词性还原能够更好地发挥自然语言处理文本大模型的预测能力与准确度。