You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

hard.ipynb 32 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "id": "991235a0-5e53-4c8c-99ab-fdd24415983a",
  7. "metadata": {},
  8. "outputs": [],
  9. "source": [
  10. "import pandas as pd\n",
  11. "import json"
  12. ]
  13. },
  14. {
  15. "cell_type": "code",
  16. "execution_count": 2,
  17. "id": "495e20b0-5de1-4a37-85dd-549b39fb3562",
  18. "metadata": {},
  19. "outputs": [],
  20. "source": [
  21. "data = pd.read_json('SIR_train_set.json')"
  22. ]
  23. },
  24. {
  25. "cell_type": "code",
  26. "execution_count": 3,
  27. "id": "e9c6b2b8-ac62-428e-a3e7-b08f0a608a93",
  28. "metadata": {},
  29. "outputs": [
  30. {
  31. "data": {
  32. "text/html": [
  33. "<div>\n",
  34. "<style scoped>\n",
  35. " .dataframe tbody tr th:only-of-type {\n",
  36. " vertical-align: middle;\n",
  37. " }\n",
  38. "\n",
  39. " .dataframe tbody tr th {\n",
  40. " vertical-align: top;\n",
  41. " }\n",
  42. "\n",
  43. " .dataframe thead th {\n",
  44. " text-align: right;\n",
  45. " }\n",
  46. "</style>\n",
  47. "<table border=\"1\" class=\"dataframe\">\n",
  48. " <thead>\n",
  49. " <tr style=\"text-align: right;\">\n",
  50. " <th></th>\n",
  51. " <th>exploitabilityScore</th>\n",
  52. " <th>baseScore</th>\n",
  53. " <th>impactScore</th>\n",
  54. " <th>severity</th>\n",
  55. " </tr>\n",
  56. " </thead>\n",
  57. " <tbody>\n",
  58. " <tr>\n",
  59. " <th>0</th>\n",
  60. " <td>3.9</td>\n",
  61. " <td>7.5</td>\n",
  62. " <td>3.6</td>\n",
  63. " <td>HIGH</td>\n",
  64. " </tr>\n",
  65. " <tr>\n",
  66. " <th>1</th>\n",
  67. " <td>3.9</td>\n",
  68. " <td>9.8</td>\n",
  69. " <td>5.9</td>\n",
  70. " <td>CRITICAL</td>\n",
  71. " </tr>\n",
  72. " <tr>\n",
  73. " <th>2</th>\n",
  74. " <td>3.9</td>\n",
  75. " <td>7.5</td>\n",
  76. " <td>3.6</td>\n",
  77. " <td>HIGH</td>\n",
  78. " </tr>\n",
  79. " <tr>\n",
  80. " <th>3</th>\n",
  81. " <td>2.2</td>\n",
  82. " <td>8.1</td>\n",
  83. " <td>5.9</td>\n",
  84. " <td>HIGH</td>\n",
  85. " </tr>\n",
  86. " <tr>\n",
  87. " <th>4</th>\n",
  88. " <td>2.8</td>\n",
  89. " <td>8.8</td>\n",
  90. " <td>5.9</td>\n",
  91. " <td>HIGH</td>\n",
  92. " </tr>\n",
  93. " <tr>\n",
  94. " <th>...</th>\n",
  95. " <td>...</td>\n",
  96. " <td>...</td>\n",
  97. " <td>...</td>\n",
  98. " <td>...</td>\n",
  99. " </tr>\n",
  100. " <tr>\n",
  101. " <th>5619</th>\n",
  102. " <td>3.9</td>\n",
  103. " <td>7.5</td>\n",
  104. " <td>3.6</td>\n",
  105. " <td>HIGH</td>\n",
  106. " </tr>\n",
  107. " <tr>\n",
  108. " <th>5620</th>\n",
  109. " <td>2.8</td>\n",
  110. " <td>6.1</td>\n",
  111. " <td>2.7</td>\n",
  112. " <td>MEDIUM</td>\n",
  113. " </tr>\n",
  114. " <tr>\n",
  115. " <th>5621</th>\n",
  116. " <td>2.8</td>\n",
  117. " <td>6.5</td>\n",
  118. " <td>3.6</td>\n",
  119. " <td>MEDIUM</td>\n",
  120. " </tr>\n",
  121. " <tr>\n",
  122. " <th>5622</th>\n",
  123. " <td>2.8</td>\n",
  124. " <td>6.5</td>\n",
  125. " <td>3.6</td>\n",
  126. " <td>MEDIUM</td>\n",
  127. " </tr>\n",
  128. " <tr>\n",
  129. " <th>5623</th>\n",
  130. " <td>2.3</td>\n",
  131. " <td>5.4</td>\n",
  132. " <td>2.7</td>\n",
  133. " <td>MEDIUM</td>\n",
  134. " </tr>\n",
  135. " </tbody>\n",
  136. "</table>\n",
  137. "<p>5624 rows × 4 columns</p>\n",
  138. "</div>"
  139. ],
  140. "text/plain": [
  141. " exploitabilityScore baseScore impactScore severity\n",
  142. "0 3.9 7.5 3.6 HIGH\n",
  143. "1 3.9 9.8 5.9 CRITICAL\n",
  144. "2 3.9 7.5 3.6 HIGH\n",
  145. "3 2.2 8.1 5.9 HIGH\n",
  146. "4 2.8 8.8 5.9 HIGH\n",
  147. "... ... ... ... ...\n",
  148. "5619 3.9 7.5 3.6 HIGH\n",
  149. "5620 2.8 6.1 2.7 MEDIUM\n",
  150. "5621 2.8 6.5 3.6 MEDIUM\n",
  151. "5622 2.8 6.5 3.6 MEDIUM\n",
  152. "5623 2.3 5.4 2.7 MEDIUM\n",
  153. "\n",
  154. "[5624 rows x 4 columns]"
  155. ]
  156. },
  157. "execution_count": 3,
  158. "metadata": {},
  159. "output_type": "execute_result"
  160. }
  161. ],
  162. "source": [
  163. "columns_1 = ['exploitabilityScore', 'baseScore', 'impactScore', 'severity']\n",
  164. "train_data = data[columns_1]\n",
  165. "train_data"
  166. ]
  167. },
  168. {
  169. "cell_type": "code",
  170. "execution_count": 4,
  171. "id": "c2b23c41-5667-4103-ae3e-628bd72c222b",
  172. "metadata": {},
  173. "outputs": [
  174. {
  175. "data": {
  176. "text/plain": [
  177. "0 CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N\n",
  178. "1 CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H\n",
  179. "2 CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N\n",
  180. "3 CVSS:3.0/AV:N/AC:H/PR:N/UI:N/S:U/C:H/I:H/A:H\n",
  181. "4 CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H\n",
  182. " ... \n",
  183. "5619 CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H\n",
  184. "5620 CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:C/C:L/I:L/A:N\n",
  185. "5621 CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:N/I:H/A:N\n",
  186. "5622 CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:N/I:H/A:N\n",
  187. "5623 CVSS:3.1/AV:N/AC:L/PR:L/UI:R/S:C/C:L/I:L/A:N\n",
  188. "Name: vectorString, Length: 5624, dtype: object"
  189. ]
  190. },
  191. "execution_count": 4,
  192. "metadata": {},
  193. "output_type": "execute_result"
  194. }
  195. ],
  196. "source": [
  197. "vectorString = data['vectorString']\n",
  198. "vectorString"
  199. ]
  200. },
  201. {
  202. "cell_type": "code",
  203. "execution_count": 5,
  204. "id": "83092a68-11a6-4a0d-b8d2-447be678076f",
  205. "metadata": {},
  206. "outputs": [
  207. {
  208. "data": {
  209. "text/plain": [
  210. "{'AV': {'network': 2897, 'adjacent': 5516, 'local': 2334},\n",
  211. " 'AC': {'low': 2659, 'medium': 5396, 'high': 2152},\n",
  212. " 'Au': {'none': 3904, 'single': 2309, 'multiple': 3674},\n",
  213. " 'C': {'none': 3904, 'partial': 7704, 'complete': 3143},\n",
  214. " 'I': {'none': 3904, 'partial': 7704, 'complete': 3143},\n",
  215. " 'A': {'none': 3904, 'partial': 7704, 'complete': 3143},\n",
  216. " 'severity': {'low': 2659, 'medium': 5396, 'high': 2152}}"
  217. ]
  218. },
  219. "execution_count": 5,
  220. "metadata": {},
  221. "output_type": "execute_result"
  222. }
  223. ],
  224. "source": [
  225. "# 打开并读取JSON文件\n",
  226. "with open('label_word_ids_CVSS2.json', 'r') as file:\n",
  227. " # 解析JSON文件\n",
  228. " cvss2 = json.load(file)\n",
  229. "cvss2"
  230. ]
  231. },
  232. {
  233. "cell_type": "code",
  234. "execution_count": 6,
  235. "id": "e420f177-ce45-45cf-b1ce-89a8876e6e6e",
  236. "metadata": {},
  237. "outputs": [
  238. {
  239. "data": {
  240. "text/plain": [
  241. "9"
  242. ]
  243. },
  244. "execution_count": 6,
  245. "metadata": {},
  246. "output_type": "execute_result"
  247. }
  248. ],
  249. "source": [
  250. "# 打开并读取JSON文件\n",
  251. "with open('label_word_ids.json', 'r') as file:\n",
  252. " # 解析JSON文件\n",
  253. " cvss = json.load(file)\n",
  254. "cvss['AV']\n",
  255. "len(cvss)"
  256. ]
  257. },
  258. {
  259. "cell_type": "code",
  260. "execution_count": 22,
  261. "id": "f32a7cd0-5b49-448d-90f5-1abcb1e13890",
  262. "metadata": {},
  263. "outputs": [
  264. {
  265. "data": {
  266. "text/plain": [
  267. "{'AV': {'N': 0.20251660258650825,\n",
  268. " 'A': 0.3855994407549808,\n",
  269. " 'L': 0.1631597343586159,\n",
  270. " 'P': 0.24872422229989516},\n",
  271. " 'AC': {'L': 0.5526917480773228, 'H': 0.4473082519226772},\n",
  272. " 'PR': {'N': 0.44796328169822147,\n",
  273. " 'L': 0.3051061388410786,\n",
  274. " 'H': 0.24693057946069993},\n",
  275. " 'UI': {'N': 0.5477760628595482, 'R': 0.4522239371404518},\n",
  276. " 'S': {'U': 0.8439380911435942, 'C': 0.15606190885640583},\n",
  277. " 'C': {'N': 0.44796328169822147,\n",
  278. " 'L': 0.3051061388410786,\n",
  279. " 'H': 0.24693057946069993},\n",
  280. " 'I': {'N': 0.44796328169822147,\n",
  281. " 'L': 0.3051061388410786,\n",
  282. " 'H': 0.24693057946069993},\n",
  283. " 'A': {'N': 0.44796328169822147,\n",
  284. " 'L': 0.3051061388410786,\n",
  285. " 'H': 0.24693057946069993},\n",
  286. " 'severity': {'low': 0.18472974850632207,\n",
  287. " 'medium': 0.37487842156454076,\n",
  288. " 'high': 0.14950673891899402,\n",
  289. " 'critical': 0.2908850910101431}}"
  290. ]
  291. },
  292. "execution_count": 22,
  293. "metadata": {},
  294. "output_type": "execute_result"
  295. }
  296. ],
  297. "source": [
  298. "columns_2 = ['AV', 'AC', 'PR', 'UI', 'S', 'C', 'I', 'A']\n",
  299. "for column in columns_2:\n",
  300. " cvss[column] = {k[0].upper(): v for k, v in cvss[column].items()}\n",
  301. "# 计算每一行的总值\n",
  302. "summ = {key: sum(values.values()) for key, values in cvss.items()}\n",
  303. "\n",
  304. "# 计算每个值除以总值\n",
  305. "cvss = {\n",
  306. " key: {subkey: value / summ[key] for subkey, value in values.items()}\n",
  307. " for key, values in cvss.items()\n",
  308. "}\n",
  309. "cvss"
  310. ]
  311. },
  312. {
  313. "cell_type": "code",
  314. "execution_count": 23,
  315. "id": "34b69fad-1643-4732-9184-8411eb99e314",
  316. "metadata": {},
  317. "outputs": [
  318. {
  319. "data": {
  320. "text/html": [
  321. "<div>\n",
  322. "<style scoped>\n",
  323. " .dataframe tbody tr th:only-of-type {\n",
  324. " vertical-align: middle;\n",
  325. " }\n",
  326. "\n",
  327. " .dataframe tbody tr th {\n",
  328. " vertical-align: top;\n",
  329. " }\n",
  330. "\n",
  331. " .dataframe thead th {\n",
  332. " text-align: right;\n",
  333. " }\n",
  334. "</style>\n",
  335. "<table border=\"1\" class=\"dataframe\">\n",
  336. " <thead>\n",
  337. " <tr style=\"text-align: right;\">\n",
  338. " <th></th>\n",
  339. " <th>AV</th>\n",
  340. " <th>AC</th>\n",
  341. " <th>PR</th>\n",
  342. " <th>UI</th>\n",
  343. " <th>S</th>\n",
  344. " <th>C</th>\n",
  345. " <th>I</th>\n",
  346. " <th>A</th>\n",
  347. " </tr>\n",
  348. " </thead>\n",
  349. " <tbody>\n",
  350. " <tr>\n",
  351. " <th>0</th>\n",
  352. " <td>N</td>\n",
  353. " <td>L</td>\n",
  354. " <td>N</td>\n",
  355. " <td>N</td>\n",
  356. " <td>U</td>\n",
  357. " <td>H</td>\n",
  358. " <td>N</td>\n",
  359. " <td>N</td>\n",
  360. " </tr>\n",
  361. " <tr>\n",
  362. " <th>1</th>\n",
  363. " <td>N</td>\n",
  364. " <td>L</td>\n",
  365. " <td>N</td>\n",
  366. " <td>N</td>\n",
  367. " <td>U</td>\n",
  368. " <td>H</td>\n",
  369. " <td>H</td>\n",
  370. " <td>H</td>\n",
  371. " </tr>\n",
  372. " <tr>\n",
  373. " <th>2</th>\n",
  374. " <td>N</td>\n",
  375. " <td>L</td>\n",
  376. " <td>N</td>\n",
  377. " <td>N</td>\n",
  378. " <td>U</td>\n",
  379. " <td>H</td>\n",
  380. " <td>N</td>\n",
  381. " <td>N</td>\n",
  382. " </tr>\n",
  383. " <tr>\n",
  384. " <th>3</th>\n",
  385. " <td>N</td>\n",
  386. " <td>H</td>\n",
  387. " <td>N</td>\n",
  388. " <td>N</td>\n",
  389. " <td>U</td>\n",
  390. " <td>H</td>\n",
  391. " <td>H</td>\n",
  392. " <td>H</td>\n",
  393. " </tr>\n",
  394. " <tr>\n",
  395. " <th>4</th>\n",
  396. " <td>N</td>\n",
  397. " <td>L</td>\n",
  398. " <td>N</td>\n",
  399. " <td>R</td>\n",
  400. " <td>U</td>\n",
  401. " <td>H</td>\n",
  402. " <td>H</td>\n",
  403. " <td>H</td>\n",
  404. " </tr>\n",
  405. " <tr>\n",
  406. " <th>...</th>\n",
  407. " <td>...</td>\n",
  408. " <td>...</td>\n",
  409. " <td>...</td>\n",
  410. " <td>...</td>\n",
  411. " <td>...</td>\n",
  412. " <td>...</td>\n",
  413. " <td>...</td>\n",
  414. " <td>...</td>\n",
  415. " </tr>\n",
  416. " <tr>\n",
  417. " <th>5619</th>\n",
  418. " <td>N</td>\n",
  419. " <td>L</td>\n",
  420. " <td>N</td>\n",
  421. " <td>N</td>\n",
  422. " <td>U</td>\n",
  423. " <td>N</td>\n",
  424. " <td>N</td>\n",
  425. " <td>H</td>\n",
  426. " </tr>\n",
  427. " <tr>\n",
  428. " <th>5620</th>\n",
  429. " <td>N</td>\n",
  430. " <td>L</td>\n",
  431. " <td>N</td>\n",
  432. " <td>R</td>\n",
  433. " <td>C</td>\n",
  434. " <td>L</td>\n",
  435. " <td>L</td>\n",
  436. " <td>N</td>\n",
  437. " </tr>\n",
  438. " <tr>\n",
  439. " <th>5621</th>\n",
  440. " <td>N</td>\n",
  441. " <td>L</td>\n",
  442. " <td>N</td>\n",
  443. " <td>R</td>\n",
  444. " <td>U</td>\n",
  445. " <td>N</td>\n",
  446. " <td>H</td>\n",
  447. " <td>N</td>\n",
  448. " </tr>\n",
  449. " <tr>\n",
  450. " <th>5622</th>\n",
  451. " <td>N</td>\n",
  452. " <td>L</td>\n",
  453. " <td>N</td>\n",
  454. " <td>R</td>\n",
  455. " <td>U</td>\n",
  456. " <td>N</td>\n",
  457. " <td>H</td>\n",
  458. " <td>N</td>\n",
  459. " </tr>\n",
  460. " <tr>\n",
  461. " <th>5623</th>\n",
  462. " <td>N</td>\n",
  463. " <td>L</td>\n",
  464. " <td>L</td>\n",
  465. " <td>R</td>\n",
  466. " <td>C</td>\n",
  467. " <td>L</td>\n",
  468. " <td>L</td>\n",
  469. " <td>N</td>\n",
  470. " </tr>\n",
  471. " </tbody>\n",
  472. "</table>\n",
  473. "<p>5624 rows × 8 columns</p>\n",
  474. "</div>"
  475. ],
  476. "text/plain": [
  477. " AV AC PR UI S C I A\n",
  478. "0 N L N N U H N N\n",
  479. "1 N L N N U H H H\n",
  480. "2 N L N N U H N N\n",
  481. "3 N H N N U H H H\n",
  482. "4 N L N R U H H H\n",
  483. "... .. .. .. .. .. .. .. ..\n",
  484. "5619 N L N N U N N H\n",
  485. "5620 N L N R C L L N\n",
  486. "5621 N L N R U N H N\n",
  487. "5622 N L N R U N H N\n",
  488. "5623 N L L R C L L N\n",
  489. "\n",
  490. "[5624 rows x 8 columns]"
  491. ]
  492. },
  493. "execution_count": 23,
  494. "metadata": {},
  495. "output_type": "execute_result"
  496. }
  497. ],
  498. "source": [
  499. "#转换数据\n",
  500. "def transform_value(val):\n",
  501. " return val.split(':')[1]\n",
  502. "temp = []\n",
  503. "for i in range(vectorString.size):\n",
  504. " part = vectorString[i].split('/')\n",
  505. " list_items = part[1::]\n",
  506. " temp.append(list_items)\n",
  507. "train_data_temp = pd.DataFrame(temp, columns=columns_2)\n",
  508. "train_data_temp = train_data_temp.applymap(transform_value)\n",
  509. "train_data_temp"
  510. ]
  511. },
  512. {
  513. "cell_type": "code",
  514. "execution_count": 24,
  515. "id": "ad6d24ea-2f6b-465c-9516-77bd88f0d01b",
  516. "metadata": {},
  517. "outputs": [
  518. {
  519. "data": {
  520. "text/html": [
  521. "<div>\n",
  522. "<style scoped>\n",
  523. " .dataframe tbody tr th:only-of-type {\n",
  524. " vertical-align: middle;\n",
  525. " }\n",
  526. "\n",
  527. " .dataframe tbody tr th {\n",
  528. " vertical-align: top;\n",
  529. " }\n",
  530. "\n",
  531. " .dataframe thead th {\n",
  532. " text-align: right;\n",
  533. " }\n",
  534. "</style>\n",
  535. "<table border=\"1\" class=\"dataframe\">\n",
  536. " <thead>\n",
  537. " <tr style=\"text-align: right;\">\n",
  538. " <th></th>\n",
  539. " <th>AV</th>\n",
  540. " <th>AC</th>\n",
  541. " <th>PR</th>\n",
  542. " <th>UI</th>\n",
  543. " <th>S</th>\n",
  544. " <th>C</th>\n",
  545. " <th>I</th>\n",
  546. " <th>A</th>\n",
  547. " </tr>\n",
  548. " </thead>\n",
  549. " <tbody>\n",
  550. " <tr>\n",
  551. " <th>0</th>\n",
  552. " <td>0.202517</td>\n",
  553. " <td>0.552692</td>\n",
  554. " <td>0.447963</td>\n",
  555. " <td>0.547776</td>\n",
  556. " <td>0.843938</td>\n",
  557. " <td>0.246931</td>\n",
  558. " <td>0.447963</td>\n",
  559. " <td>0.447963</td>\n",
  560. " </tr>\n",
  561. " <tr>\n",
  562. " <th>1</th>\n",
  563. " <td>0.202517</td>\n",
  564. " <td>0.552692</td>\n",
  565. " <td>0.447963</td>\n",
  566. " <td>0.547776</td>\n",
  567. " <td>0.843938</td>\n",
  568. " <td>0.246931</td>\n",
  569. " <td>0.246931</td>\n",
  570. " <td>0.246931</td>\n",
  571. " </tr>\n",
  572. " <tr>\n",
  573. " <th>2</th>\n",
  574. " <td>0.202517</td>\n",
  575. " <td>0.552692</td>\n",
  576. " <td>0.447963</td>\n",
  577. " <td>0.547776</td>\n",
  578. " <td>0.843938</td>\n",
  579. " <td>0.246931</td>\n",
  580. " <td>0.447963</td>\n",
  581. " <td>0.447963</td>\n",
  582. " </tr>\n",
  583. " <tr>\n",
  584. " <th>3</th>\n",
  585. " <td>0.202517</td>\n",
  586. " <td>0.447308</td>\n",
  587. " <td>0.447963</td>\n",
  588. " <td>0.547776</td>\n",
  589. " <td>0.843938</td>\n",
  590. " <td>0.246931</td>\n",
  591. " <td>0.246931</td>\n",
  592. " <td>0.246931</td>\n",
  593. " </tr>\n",
  594. " <tr>\n",
  595. " <th>4</th>\n",
  596. " <td>0.202517</td>\n",
  597. " <td>0.552692</td>\n",
  598. " <td>0.447963</td>\n",
  599. " <td>0.452224</td>\n",
  600. " <td>0.843938</td>\n",
  601. " <td>0.246931</td>\n",
  602. " <td>0.246931</td>\n",
  603. " <td>0.246931</td>\n",
  604. " </tr>\n",
  605. " <tr>\n",
  606. " <th>...</th>\n",
  607. " <td>...</td>\n",
  608. " <td>...</td>\n",
  609. " <td>...</td>\n",
  610. " <td>...</td>\n",
  611. " <td>...</td>\n",
  612. " <td>...</td>\n",
  613. " <td>...</td>\n",
  614. " <td>...</td>\n",
  615. " </tr>\n",
  616. " <tr>\n",
  617. " <th>5619</th>\n",
  618. " <td>0.202517</td>\n",
  619. " <td>0.552692</td>\n",
  620. " <td>0.447963</td>\n",
  621. " <td>0.547776</td>\n",
  622. " <td>0.843938</td>\n",
  623. " <td>0.447963</td>\n",
  624. " <td>0.447963</td>\n",
  625. " <td>0.246931</td>\n",
  626. " </tr>\n",
  627. " <tr>\n",
  628. " <th>5620</th>\n",
  629. " <td>0.202517</td>\n",
  630. " <td>0.552692</td>\n",
  631. " <td>0.447963</td>\n",
  632. " <td>0.452224</td>\n",
  633. " <td>0.156062</td>\n",
  634. " <td>0.305106</td>\n",
  635. " <td>0.305106</td>\n",
  636. " <td>0.447963</td>\n",
  637. " </tr>\n",
  638. " <tr>\n",
  639. " <th>5621</th>\n",
  640. " <td>0.202517</td>\n",
  641. " <td>0.552692</td>\n",
  642. " <td>0.447963</td>\n",
  643. " <td>0.452224</td>\n",
  644. " <td>0.843938</td>\n",
  645. " <td>0.447963</td>\n",
  646. " <td>0.246931</td>\n",
  647. " <td>0.447963</td>\n",
  648. " </tr>\n",
  649. " <tr>\n",
  650. " <th>5622</th>\n",
  651. " <td>0.202517</td>\n",
  652. " <td>0.552692</td>\n",
  653. " <td>0.447963</td>\n",
  654. " <td>0.452224</td>\n",
  655. " <td>0.843938</td>\n",
  656. " <td>0.447963</td>\n",
  657. " <td>0.246931</td>\n",
  658. " <td>0.447963</td>\n",
  659. " </tr>\n",
  660. " <tr>\n",
  661. " <th>5623</th>\n",
  662. " <td>0.202517</td>\n",
  663. " <td>0.552692</td>\n",
  664. " <td>0.305106</td>\n",
  665. " <td>0.452224</td>\n",
  666. " <td>0.156062</td>\n",
  667. " <td>0.305106</td>\n",
  668. " <td>0.305106</td>\n",
  669. " <td>0.447963</td>\n",
  670. " </tr>\n",
  671. " </tbody>\n",
  672. "</table>\n",
  673. "<p>5624 rows × 8 columns</p>\n",
  674. "</div>"
  675. ],
  676. "text/plain": [
  677. " AV AC PR UI S C I \\\n",
  678. "0 0.202517 0.552692 0.447963 0.547776 0.843938 0.246931 0.447963 \n",
  679. "1 0.202517 0.552692 0.447963 0.547776 0.843938 0.246931 0.246931 \n",
  680. "2 0.202517 0.552692 0.447963 0.547776 0.843938 0.246931 0.447963 \n",
  681. "3 0.202517 0.447308 0.447963 0.547776 0.843938 0.246931 0.246931 \n",
  682. "4 0.202517 0.552692 0.447963 0.452224 0.843938 0.246931 0.246931 \n",
  683. "... ... ... ... ... ... ... ... \n",
  684. "5619 0.202517 0.552692 0.447963 0.547776 0.843938 0.447963 0.447963 \n",
  685. "5620 0.202517 0.552692 0.447963 0.452224 0.156062 0.305106 0.305106 \n",
  686. "5621 0.202517 0.552692 0.447963 0.452224 0.843938 0.447963 0.246931 \n",
  687. "5622 0.202517 0.552692 0.447963 0.452224 0.843938 0.447963 0.246931 \n",
  688. "5623 0.202517 0.552692 0.305106 0.452224 0.156062 0.305106 0.305106 \n",
  689. "\n",
  690. " A \n",
  691. "0 0.447963 \n",
  692. "1 0.246931 \n",
  693. "2 0.447963 \n",
  694. "3 0.246931 \n",
  695. "4 0.246931 \n",
  696. "... ... \n",
  697. "5619 0.246931 \n",
  698. "5620 0.447963 \n",
  699. "5621 0.447963 \n",
  700. "5622 0.447963 \n",
  701. "5623 0.447963 \n",
  702. "\n",
  703. "[5624 rows x 8 columns]"
  704. ]
  705. },
  706. "execution_count": 24,
  707. "metadata": {},
  708. "output_type": "execute_result"
  709. }
  710. ],
  711. "source": [
  712. "# 替换 DataFrame 中的值\n",
  713. "train_data_temp.replace(cvss, inplace=True)\n",
  714. "train_data_temp"
  715. ]
  716. },
  717. {
  718. "cell_type": "code",
  719. "execution_count": 28,
  720. "id": "d33ba418-4964-4d8b-8410-e420cde1589b",
  721. "metadata": {},
  722. "outputs": [
  723. {
  724. "data": {
  725. "text/html": [
  726. "<div>\n",
  727. "<style scoped>\n",
  728. " .dataframe tbody tr th:only-of-type {\n",
  729. " vertical-align: middle;\n",
  730. " }\n",
  731. "\n",
  732. " .dataframe tbody tr th {\n",
  733. " vertical-align: top;\n",
  734. " }\n",
  735. "\n",
  736. " .dataframe thead th {\n",
  737. " text-align: right;\n",
  738. " }\n",
  739. "</style>\n",
  740. "<table border=\"1\" class=\"dataframe\">\n",
  741. " <thead>\n",
  742. " <tr style=\"text-align: right;\">\n",
  743. " <th></th>\n",
  744. " <th>AV</th>\n",
  745. " <th>AC</th>\n",
  746. " <th>PR</th>\n",
  747. " <th>UI</th>\n",
  748. " <th>S</th>\n",
  749. " <th>C</th>\n",
  750. " <th>I</th>\n",
  751. " <th>A</th>\n",
  752. " <th>exploitabilityScore</th>\n",
  753. " <th>baseScore</th>\n",
  754. " <th>impactScore</th>\n",
  755. " <th>severity</th>\n",
  756. " </tr>\n",
  757. " </thead>\n",
  758. " <tbody>\n",
  759. " <tr>\n",
  760. " <th>0</th>\n",
  761. " <td>0.202517</td>\n",
  762. " <td>0.552692</td>\n",
  763. " <td>0.447963</td>\n",
  764. " <td>0.547776</td>\n",
  765. " <td>0.843938</td>\n",
  766. " <td>0.246931</td>\n",
  767. " <td>0.447963</td>\n",
  768. " <td>0.447963</td>\n",
  769. " <td>3.9</td>\n",
  770. " <td>7.5</td>\n",
  771. " <td>3.6</td>\n",
  772. " <td>HIGH</td>\n",
  773. " </tr>\n",
  774. " <tr>\n",
  775. " <th>1</th>\n",
  776. " <td>0.202517</td>\n",
  777. " <td>0.552692</td>\n",
  778. " <td>0.447963</td>\n",
  779. " <td>0.547776</td>\n",
  780. " <td>0.843938</td>\n",
  781. " <td>0.246931</td>\n",
  782. " <td>0.246931</td>\n",
  783. " <td>0.246931</td>\n",
  784. " <td>3.9</td>\n",
  785. " <td>9.8</td>\n",
  786. " <td>5.9</td>\n",
  787. " <td>CRITICAL</td>\n",
  788. " </tr>\n",
  789. " <tr>\n",
  790. " <th>2</th>\n",
  791. " <td>0.202517</td>\n",
  792. " <td>0.552692</td>\n",
  793. " <td>0.447963</td>\n",
  794. " <td>0.547776</td>\n",
  795. " <td>0.843938</td>\n",
  796. " <td>0.246931</td>\n",
  797. " <td>0.447963</td>\n",
  798. " <td>0.447963</td>\n",
  799. " <td>3.9</td>\n",
  800. " <td>7.5</td>\n",
  801. " <td>3.6</td>\n",
  802. " <td>HIGH</td>\n",
  803. " </tr>\n",
  804. " <tr>\n",
  805. " <th>3</th>\n",
  806. " <td>0.202517</td>\n",
  807. " <td>0.447308</td>\n",
  808. " <td>0.447963</td>\n",
  809. " <td>0.547776</td>\n",
  810. " <td>0.843938</td>\n",
  811. " <td>0.246931</td>\n",
  812. " <td>0.246931</td>\n",
  813. " <td>0.246931</td>\n",
  814. " <td>2.2</td>\n",
  815. " <td>8.1</td>\n",
  816. " <td>5.9</td>\n",
  817. " <td>HIGH</td>\n",
  818. " </tr>\n",
  819. " <tr>\n",
  820. " <th>4</th>\n",
  821. " <td>0.202517</td>\n",
  822. " <td>0.552692</td>\n",
  823. " <td>0.447963</td>\n",
  824. " <td>0.452224</td>\n",
  825. " <td>0.843938</td>\n",
  826. " <td>0.246931</td>\n",
  827. " <td>0.246931</td>\n",
  828. " <td>0.246931</td>\n",
  829. " <td>2.8</td>\n",
  830. " <td>8.8</td>\n",
  831. " <td>5.9</td>\n",
  832. " <td>HIGH</td>\n",
  833. " </tr>\n",
  834. " <tr>\n",
  835. " <th>...</th>\n",
  836. " <td>...</td>\n",
  837. " <td>...</td>\n",
  838. " <td>...</td>\n",
  839. " <td>...</td>\n",
  840. " <td>...</td>\n",
  841. " <td>...</td>\n",
  842. " <td>...</td>\n",
  843. " <td>...</td>\n",
  844. " <td>...</td>\n",
  845. " <td>...</td>\n",
  846. " <td>...</td>\n",
  847. " <td>...</td>\n",
  848. " </tr>\n",
  849. " <tr>\n",
  850. " <th>5619</th>\n",
  851. " <td>0.202517</td>\n",
  852. " <td>0.552692</td>\n",
  853. " <td>0.447963</td>\n",
  854. " <td>0.547776</td>\n",
  855. " <td>0.843938</td>\n",
  856. " <td>0.447963</td>\n",
  857. " <td>0.447963</td>\n",
  858. " <td>0.246931</td>\n",
  859. " <td>3.9</td>\n",
  860. " <td>7.5</td>\n",
  861. " <td>3.6</td>\n",
  862. " <td>HIGH</td>\n",
  863. " </tr>\n",
  864. " <tr>\n",
  865. " <th>5620</th>\n",
  866. " <td>0.202517</td>\n",
  867. " <td>0.552692</td>\n",
  868. " <td>0.447963</td>\n",
  869. " <td>0.452224</td>\n",
  870. " <td>0.156062</td>\n",
  871. " <td>0.305106</td>\n",
  872. " <td>0.305106</td>\n",
  873. " <td>0.447963</td>\n",
  874. " <td>2.8</td>\n",
  875. " <td>6.1</td>\n",
  876. " <td>2.7</td>\n",
  877. " <td>MEDIUM</td>\n",
  878. " </tr>\n",
  879. " <tr>\n",
  880. " <th>5621</th>\n",
  881. " <td>0.202517</td>\n",
  882. " <td>0.552692</td>\n",
  883. " <td>0.447963</td>\n",
  884. " <td>0.452224</td>\n",
  885. " <td>0.843938</td>\n",
  886. " <td>0.447963</td>\n",
  887. " <td>0.246931</td>\n",
  888. " <td>0.447963</td>\n",
  889. " <td>2.8</td>\n",
  890. " <td>6.5</td>\n",
  891. " <td>3.6</td>\n",
  892. " <td>MEDIUM</td>\n",
  893. " </tr>\n",
  894. " <tr>\n",
  895. " <th>5622</th>\n",
  896. " <td>0.202517</td>\n",
  897. " <td>0.552692</td>\n",
  898. " <td>0.447963</td>\n",
  899. " <td>0.452224</td>\n",
  900. " <td>0.843938</td>\n",
  901. " <td>0.447963</td>\n",
  902. " <td>0.246931</td>\n",
  903. " <td>0.447963</td>\n",
  904. " <td>2.8</td>\n",
  905. " <td>6.5</td>\n",
  906. " <td>3.6</td>\n",
  907. " <td>MEDIUM</td>\n",
  908. " </tr>\n",
  909. " <tr>\n",
  910. " <th>5623</th>\n",
  911. " <td>0.202517</td>\n",
  912. " <td>0.552692</td>\n",
  913. " <td>0.305106</td>\n",
  914. " <td>0.452224</td>\n",
  915. " <td>0.156062</td>\n",
  916. " <td>0.305106</td>\n",
  917. " <td>0.305106</td>\n",
  918. " <td>0.447963</td>\n",
  919. " <td>2.3</td>\n",
  920. " <td>5.4</td>\n",
  921. " <td>2.7</td>\n",
  922. " <td>MEDIUM</td>\n",
  923. " </tr>\n",
  924. " </tbody>\n",
  925. "</table>\n",
  926. "<p>5624 rows × 12 columns</p>\n",
  927. "</div>"
  928. ],
  929. "text/plain": [
  930. " AV AC PR UI S C I \\\n",
  931. "0 0.202517 0.552692 0.447963 0.547776 0.843938 0.246931 0.447963 \n",
  932. "1 0.202517 0.552692 0.447963 0.547776 0.843938 0.246931 0.246931 \n",
  933. "2 0.202517 0.552692 0.447963 0.547776 0.843938 0.246931 0.447963 \n",
  934. "3 0.202517 0.447308 0.447963 0.547776 0.843938 0.246931 0.246931 \n",
  935. "4 0.202517 0.552692 0.447963 0.452224 0.843938 0.246931 0.246931 \n",
  936. "... ... ... ... ... ... ... ... \n",
  937. "5619 0.202517 0.552692 0.447963 0.547776 0.843938 0.447963 0.447963 \n",
  938. "5620 0.202517 0.552692 0.447963 0.452224 0.156062 0.305106 0.305106 \n",
  939. "5621 0.202517 0.552692 0.447963 0.452224 0.843938 0.447963 0.246931 \n",
  940. "5622 0.202517 0.552692 0.447963 0.452224 0.843938 0.447963 0.246931 \n",
  941. "5623 0.202517 0.552692 0.305106 0.452224 0.156062 0.305106 0.305106 \n",
  942. "\n",
  943. " A exploitabilityScore baseScore impactScore severity \n",
  944. "0 0.447963 3.9 7.5 3.6 HIGH \n",
  945. "1 0.246931 3.9 9.8 5.9 CRITICAL \n",
  946. "2 0.447963 3.9 7.5 3.6 HIGH \n",
  947. "3 0.246931 2.2 8.1 5.9 HIGH \n",
  948. "4 0.246931 2.8 8.8 5.9 HIGH \n",
  949. "... ... ... ... ... ... \n",
  950. "5619 0.246931 3.9 7.5 3.6 HIGH \n",
  951. "5620 0.447963 2.8 6.1 2.7 MEDIUM \n",
  952. "5621 0.447963 2.8 6.5 3.6 MEDIUM \n",
  953. "5622 0.447963 2.8 6.5 3.6 MEDIUM \n",
  954. "5623 0.447963 2.3 5.4 2.7 MEDIUM \n",
  955. "\n",
  956. "[5624 rows x 12 columns]"
  957. ]
  958. },
  959. "execution_count": 28,
  960. "metadata": {},
  961. "output_type": "execute_result"
  962. }
  963. ],
  964. "source": [
  965. "train_data = pd.concat([train_data_temp, train_data], axis=1)\n",
  966. "train_data"
  967. ]
  968. },
  969. {
  970. "cell_type": "code",
  971. "execution_count": null,
  972. "id": "1d4077ea-ab43-4bd6-9a19-e74c98cad80d",
  973. "metadata": {},
  974. "outputs": [],
  975. "source": []
  976. }
  977. ],
  978. "metadata": {
  979. "kernelspec": {
  980. "display_name": "Python 3 (ipykernel)",
  981. "language": "python",
  982. "name": "python3"
  983. },
  984. "language_info": {
  985. "codemirror_mode": {
  986. "name": "ipython",
  987. "version": 3
  988. },
  989. "file_extension": ".py",
  990. "mimetype": "text/x-python",
  991. "name": "python",
  992. "nbconvert_exporter": "python",
  993. "pygments_lexer": "ipython3",
  994. "version": "3.11.4"
  995. }
  996. },
  997. "nbformat": 4,
  998. "nbformat_minor": 5
  999. }

在信息安全领域,漏洞评估和管理是关键任务之一。本作品探讨了如何利用预训练文本大模型来评估和研判漏洞的严重等级,具体基于通用漏洞评分系统。传统漏洞评分方法依赖于手动分析和专家评审。而基于自然语言处理文本大模型通过其深度学习能力,可以自动化地处理和分析大量的安全相关文本数据,从而提高漏洞评估的效率和准确性。结合词干提取、词性还原能够更好地发挥自然语言处理文本大模型的预测能力与准确度。