{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "991235a0-5e53-4c8c-99ab-fdd24415983a", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import json" ] }, { "cell_type": "code", "execution_count": 2, "id": "495e20b0-5de1-4a37-85dd-549b39fb3562", "metadata": {}, "outputs": [], "source": [ "data = pd.read_json('SIR_train_set.json')" ] }, { "cell_type": "code", "execution_count": 3, "id": "e9c6b2b8-ac62-428e-a3e7-b08f0a608a93", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
exploitabilityScorebaseScoreimpactScoreseverity
03.97.53.6HIGH
13.99.85.9CRITICAL
23.97.53.6HIGH
32.28.15.9HIGH
42.88.85.9HIGH
...............
56193.97.53.6HIGH
56202.86.12.7MEDIUM
56212.86.53.6MEDIUM
56222.86.53.6MEDIUM
56232.35.42.7MEDIUM
\n", "

5624 rows × 4 columns

\n", "
" ], "text/plain": [ " exploitabilityScore baseScore impactScore severity\n", "0 3.9 7.5 3.6 HIGH\n", "1 3.9 9.8 5.9 CRITICAL\n", "2 3.9 7.5 3.6 HIGH\n", "3 2.2 8.1 5.9 HIGH\n", "4 2.8 8.8 5.9 HIGH\n", "... ... ... ... ...\n", "5619 3.9 7.5 3.6 HIGH\n", "5620 2.8 6.1 2.7 MEDIUM\n", "5621 2.8 6.5 3.6 MEDIUM\n", "5622 2.8 6.5 3.6 MEDIUM\n", "5623 2.3 5.4 2.7 MEDIUM\n", "\n", "[5624 rows x 4 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "columns_1 = ['exploitabilityScore', 'baseScore', 'impactScore', 'severity']\n", "train_data = data[columns_1]\n", "train_data" ] }, { "cell_type": "code", "execution_count": 4, "id": "c2b23c41-5667-4103-ae3e-628bd72c222b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N\n", "1 CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H\n", "2 CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N\n", "3 CVSS:3.0/AV:N/AC:H/PR:N/UI:N/S:U/C:H/I:H/A:H\n", "4 CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:H/I:H/A:H\n", " ... \n", "5619 CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H\n", "5620 CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:C/C:L/I:L/A:N\n", "5621 CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:N/I:H/A:N\n", "5622 CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:U/C:N/I:H/A:N\n", "5623 CVSS:3.1/AV:N/AC:L/PR:L/UI:R/S:C/C:L/I:L/A:N\n", "Name: vectorString, Length: 5624, dtype: object" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vectorString = data['vectorString']\n", "vectorString" ] }, { "cell_type": "code", "execution_count": 5, "id": "83092a68-11a6-4a0d-b8d2-447be678076f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'AV': {'network': 2897, 'adjacent': 5516, 'local': 2334},\n", " 'AC': {'low': 2659, 'medium': 5396, 'high': 2152},\n", " 'Au': {'none': 3904, 'single': 2309, 'multiple': 3674},\n", " 'C': {'none': 3904, 'partial': 7704, 'complete': 3143},\n", " 'I': {'none': 3904, 'partial': 7704, 'complete': 3143},\n", " 'A': {'none': 3904, 'partial': 7704, 'complete': 3143},\n", " 'severity': {'low': 2659, 'medium': 5396, 'high': 2152}}" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 打开并读取JSON文件\n", "with open('label_word_ids_CVSS2.json', 'r') as file:\n", " # 解析JSON文件\n", " cvss2 = json.load(file)\n", "cvss2" ] }, { "cell_type": "code", "execution_count": 6, "id": "e420f177-ce45-45cf-b1ce-89a8876e6e6e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "9" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 打开并读取JSON文件\n", "with open('label_word_ids.json', 'r') as file:\n", " # 解析JSON文件\n", " cvss = json.load(file)\n", "cvss['AV']\n", "len(cvss)" ] }, { "cell_type": "code", "execution_count": 22, "id": "f32a7cd0-5b49-448d-90f5-1abcb1e13890", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'AV': {'N': 0.20251660258650825,\n", " 'A': 0.3855994407549808,\n", " 'L': 0.1631597343586159,\n", " 'P': 0.24872422229989516},\n", " 'AC': {'L': 0.5526917480773228, 'H': 0.4473082519226772},\n", " 'PR': {'N': 0.44796328169822147,\n", " 'L': 0.3051061388410786,\n", " 'H': 0.24693057946069993},\n", " 'UI': {'N': 0.5477760628595482, 'R': 0.4522239371404518},\n", " 'S': {'U': 0.8439380911435942, 'C': 0.15606190885640583},\n", " 'C': {'N': 0.44796328169822147,\n", " 'L': 0.3051061388410786,\n", " 'H': 0.24693057946069993},\n", " 'I': {'N': 0.44796328169822147,\n", " 'L': 0.3051061388410786,\n", " 'H': 0.24693057946069993},\n", " 'A': {'N': 0.44796328169822147,\n", " 'L': 0.3051061388410786,\n", " 'H': 0.24693057946069993},\n", " 'severity': {'low': 0.18472974850632207,\n", " 'medium': 0.37487842156454076,\n", " 'high': 0.14950673891899402,\n", " 'critical': 0.2908850910101431}}" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "columns_2 = ['AV', 'AC', 'PR', 'UI', 'S', 'C', 'I', 'A']\n", "for column in columns_2:\n", " cvss[column] = {k[0].upper(): v for k, v in cvss[column].items()}\n", "# 计算每一行的总值\n", "summ = {key: sum(values.values()) for key, values in cvss.items()}\n", "\n", "# 计算每个值除以总值\n", "cvss = {\n", " key: {subkey: value / summ[key] for subkey, value in values.items()}\n", " for key, values in cvss.items()\n", "}\n", "cvss" ] }, { "cell_type": "code", "execution_count": 23, "id": "34b69fad-1643-4732-9184-8411eb99e314", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AVACPRUISCIA
0NLNNUHNN
1NLNNUHHH
2NLNNUHNN
3NHNNUHHH
4NLNRUHHH
...........................
5619NLNNUNNH
5620NLNRCLLN
5621NLNRUNHN
5622NLNRUNHN
5623NLLRCLLN
\n", "

5624 rows × 8 columns

\n", "
" ], "text/plain": [ " AV AC PR UI S C I A\n", "0 N L N N U H N N\n", "1 N L N N U H H H\n", "2 N L N N U H N N\n", "3 N H N N U H H H\n", "4 N L N R U H H H\n", "... .. .. .. .. .. .. .. ..\n", "5619 N L N N U N N H\n", "5620 N L N R C L L N\n", "5621 N L N R U N H N\n", "5622 N L N R U N H N\n", "5623 N L L R C L L N\n", "\n", "[5624 rows x 8 columns]" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#转换数据\n", "def transform_value(val):\n", " return val.split(':')[1]\n", "temp = []\n", "for i in range(vectorString.size):\n", " part = vectorString[i].split('/')\n", " list_items = part[1::]\n", " temp.append(list_items)\n", "train_data_temp = pd.DataFrame(temp, columns=columns_2)\n", "train_data_temp = train_data_temp.applymap(transform_value)\n", "train_data_temp" ] }, { "cell_type": "code", "execution_count": 24, "id": "ad6d24ea-2f6b-465c-9516-77bd88f0d01b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AVACPRUISCIA
00.2025170.5526920.4479630.5477760.8439380.2469310.4479630.447963
10.2025170.5526920.4479630.5477760.8439380.2469310.2469310.246931
20.2025170.5526920.4479630.5477760.8439380.2469310.4479630.447963
30.2025170.4473080.4479630.5477760.8439380.2469310.2469310.246931
40.2025170.5526920.4479630.4522240.8439380.2469310.2469310.246931
...........................
56190.2025170.5526920.4479630.5477760.8439380.4479630.4479630.246931
56200.2025170.5526920.4479630.4522240.1560620.3051060.3051060.447963
56210.2025170.5526920.4479630.4522240.8439380.4479630.2469310.447963
56220.2025170.5526920.4479630.4522240.8439380.4479630.2469310.447963
56230.2025170.5526920.3051060.4522240.1560620.3051060.3051060.447963
\n", "

5624 rows × 8 columns

\n", "
" ], "text/plain": [ " AV AC PR UI S C I \\\n", "0 0.202517 0.552692 0.447963 0.547776 0.843938 0.246931 0.447963 \n", "1 0.202517 0.552692 0.447963 0.547776 0.843938 0.246931 0.246931 \n", "2 0.202517 0.552692 0.447963 0.547776 0.843938 0.246931 0.447963 \n", "3 0.202517 0.447308 0.447963 0.547776 0.843938 0.246931 0.246931 \n", "4 0.202517 0.552692 0.447963 0.452224 0.843938 0.246931 0.246931 \n", "... ... ... ... ... ... ... ... \n", "5619 0.202517 0.552692 0.447963 0.547776 0.843938 0.447963 0.447963 \n", "5620 0.202517 0.552692 0.447963 0.452224 0.156062 0.305106 0.305106 \n", "5621 0.202517 0.552692 0.447963 0.452224 0.843938 0.447963 0.246931 \n", "5622 0.202517 0.552692 0.447963 0.452224 0.843938 0.447963 0.246931 \n", "5623 0.202517 0.552692 0.305106 0.452224 0.156062 0.305106 0.305106 \n", "\n", " A \n", "0 0.447963 \n", "1 0.246931 \n", "2 0.447963 \n", "3 0.246931 \n", "4 0.246931 \n", "... ... \n", "5619 0.246931 \n", "5620 0.447963 \n", "5621 0.447963 \n", "5622 0.447963 \n", "5623 0.447963 \n", "\n", "[5624 rows x 8 columns]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 替换 DataFrame 中的值\n", "train_data_temp.replace(cvss, inplace=True)\n", "train_data_temp" ] }, { "cell_type": "code", "execution_count": 28, "id": "d33ba418-4964-4d8b-8410-e420cde1589b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AVACPRUISCIAexploitabilityScorebaseScoreimpactScoreseverity
00.2025170.5526920.4479630.5477760.8439380.2469310.4479630.4479633.97.53.6HIGH
10.2025170.5526920.4479630.5477760.8439380.2469310.2469310.2469313.99.85.9CRITICAL
20.2025170.5526920.4479630.5477760.8439380.2469310.4479630.4479633.97.53.6HIGH
30.2025170.4473080.4479630.5477760.8439380.2469310.2469310.2469312.28.15.9HIGH
40.2025170.5526920.4479630.4522240.8439380.2469310.2469310.2469312.88.85.9HIGH
.......................................
56190.2025170.5526920.4479630.5477760.8439380.4479630.4479630.2469313.97.53.6HIGH
56200.2025170.5526920.4479630.4522240.1560620.3051060.3051060.4479632.86.12.7MEDIUM
56210.2025170.5526920.4479630.4522240.8439380.4479630.2469310.4479632.86.53.6MEDIUM
56220.2025170.5526920.4479630.4522240.8439380.4479630.2469310.4479632.86.53.6MEDIUM
56230.2025170.5526920.3051060.4522240.1560620.3051060.3051060.4479632.35.42.7MEDIUM
\n", "

5624 rows × 12 columns

\n", "
" ], "text/plain": [ " AV AC PR UI S C I \\\n", "0 0.202517 0.552692 0.447963 0.547776 0.843938 0.246931 0.447963 \n", "1 0.202517 0.552692 0.447963 0.547776 0.843938 0.246931 0.246931 \n", "2 0.202517 0.552692 0.447963 0.547776 0.843938 0.246931 0.447963 \n", "3 0.202517 0.447308 0.447963 0.547776 0.843938 0.246931 0.246931 \n", "4 0.202517 0.552692 0.447963 0.452224 0.843938 0.246931 0.246931 \n", "... ... ... ... ... ... ... ... \n", "5619 0.202517 0.552692 0.447963 0.547776 0.843938 0.447963 0.447963 \n", "5620 0.202517 0.552692 0.447963 0.452224 0.156062 0.305106 0.305106 \n", "5621 0.202517 0.552692 0.447963 0.452224 0.843938 0.447963 0.246931 \n", "5622 0.202517 0.552692 0.447963 0.452224 0.843938 0.447963 0.246931 \n", "5623 0.202517 0.552692 0.305106 0.452224 0.156062 0.305106 0.305106 \n", "\n", " A exploitabilityScore baseScore impactScore severity \n", "0 0.447963 3.9 7.5 3.6 HIGH \n", "1 0.246931 3.9 9.8 5.9 CRITICAL \n", "2 0.447963 3.9 7.5 3.6 HIGH \n", "3 0.246931 2.2 8.1 5.9 HIGH \n", "4 0.246931 2.8 8.8 5.9 HIGH \n", "... ... ... ... ... ... \n", "5619 0.246931 3.9 7.5 3.6 HIGH \n", "5620 0.447963 2.8 6.1 2.7 MEDIUM \n", "5621 0.447963 2.8 6.5 3.6 MEDIUM \n", "5622 0.447963 2.8 6.5 3.6 MEDIUM \n", "5623 0.447963 2.3 5.4 2.7 MEDIUM \n", "\n", "[5624 rows x 12 columns]" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_data = pd.concat([train_data_temp, train_data], axis=1)\n", "train_data" ] }, { "cell_type": "code", "execution_count": null, "id": "1d4077ea-ab43-4bd6-9a19-e74c98cad80d", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 5 }