| @@ -1,7 +1,8 @@ | |||||
| #! /bin/env python3 | #! /bin/env python3 | ||||
| import argparse | import argparse | ||||
| import os | import os | ||||
| from typing import List | |||||
| from pathlib import Path | |||||
| from typing import List, Optional, Tuple | |||||
| import pygments | import pygments | ||||
| from marko.block import FencedCode | from marko.block import FencedCode | ||||
| @@ -16,6 +17,8 @@ from revChatGPT.V1 import Chatbot | |||||
| ENV_UUID = "GPTTRACE_CONV_UUID" | ENV_UUID = "GPTTRACE_CONV_UUID" | ||||
| ENV_ACCESS_TOKEN = "GPTTRACE_ACCESS_TOKEN" | ENV_ACCESS_TOKEN = "GPTTRACE_ACCESS_TOKEN" | ||||
| PROMPTS_DIR = Path("./prompts") | |||||
| def pretty_print(input, lexer=MarkdownLexer): | def pretty_print(input, lexer=MarkdownLexer): | ||||
| tokens = list(pygments.lex(input, lexer=lexer())) | tokens = list(pygments.lex(input, lexer=lexer())) | ||||
| @@ -29,6 +32,7 @@ def main(): | |||||
| ) | ) | ||||
| group = parser.add_mutually_exclusive_group() | group = parser.add_mutually_exclusive_group() | ||||
| group.add_argument( | group.add_argument( | ||||
| "-i", "--info", help="Let ChatGPT explain what's eBPF", action="store_true" | "-i", "--info", help="Let ChatGPT explain what's eBPF", action="store_true" | ||||
| ) | ) | ||||
| @@ -40,19 +44,10 @@ def main(): | |||||
| metavar="TEXT", | metavar="TEXT", | ||||
| ) | ) | ||||
| group.add_argument( | group.add_argument( | ||||
| "-g", | |||||
| "--generate", | |||||
| help="Generate eBPF programs using your input with ChatGPT", | |||||
| action="store", | |||||
| metavar="TEXT", | |||||
| ) | |||||
| "-g", "--generate", help="Generate eBPF programs using your input with ChatGPT", action="store", metavar="TEXT") | |||||
| group.add_argument( | |||||
| "--train", help="Train ChatGPT with conversions we provided", action="store_true") | |||||
| parser.add_argument( | |||||
| "-v", | |||||
| "--verbose", | |||||
| help="Print the prompt and receive message", | |||||
| action="store_true", | |||||
| ) | |||||
| parser.add_argument( | parser.add_argument( | ||||
| "-u", | "-u", | ||||
| "--uuid", | "--uuid", | ||||
| @@ -78,10 +73,8 @@ def main(): | |||||
| elif args.execute is not None: | elif args.execute is not None: | ||||
| desc: str = args.execute | desc: str = args.execute | ||||
| print("Sending query to ChatGPT: " + desc) | print("Sending query to ChatGPT: " + desc) | ||||
| ret_val = generate_result( | |||||
| chatbot, construct_running_prompt(desc), conv_uuid, args.verbose | |||||
| ) | |||||
| pretty_print(ret_val) | |||||
| ret_val, _ = generate_result( | |||||
| chatbot, construct_running_prompt(desc), conv_uuid, args.verbose) | |||||
| # print(ret_val) | # print(ret_val) | ||||
| parsed = make_executable_command(ret_val) | parsed = make_executable_command(ret_val) | ||||
| # print(f"Command to run: {parsed}") | # print(f"Command to run: {parsed}") | ||||
| @@ -90,15 +83,32 @@ def main(): | |||||
| elif args.generate is not None: | elif args.generate is not None: | ||||
| desc: str = args.generate | desc: str = args.generate | ||||
| print("Sending query to ChatGPT: " + desc) | print("Sending query to ChatGPT: " + desc) | ||||
| ret_val = generate_result( | |||||
| chatbot, construct_generate_prompt(desc), conv_uuid, True | |||||
| ) | |||||
| # print(ret_val) | |||||
| ret_val, _ = generate_result( | |||||
| chatbot, construct_generate_prompt(desc), conv_uuid) | |||||
| pretty_print(ret_val) | |||||
| parsed = extract_code_blocks(ret_val) | parsed = extract_code_blocks(ret_val) | ||||
| # print(f"Command to run: {parsed}") | # print(f"Command to run: {parsed}") | ||||
| with open("generated.bpf.c", "w") as f: | with open("generated.bpf.c", "w") as f: | ||||
| for code in parsed: | for code in parsed: | ||||
| f.write(code) | f.write(code) | ||||
| elif args.train: | |||||
| prompts = os.listdir(PROMPTS_DIR) | |||||
| prompts.sort() | |||||
| # conv_uuid could be None, in which we will create a new session and use it in the next steps | |||||
| session = conv_uuid | |||||
| for file in prompts: | |||||
| info = f"Training ChatGPT with `{file}`" | |||||
| print("-"*len(info)) | |||||
| print(info) | |||||
| print("-"*len(info)) | |||||
| with open(PROMPTS_DIR/file, "r") as f: | |||||
| input_data = f.read() | |||||
| if args.verbose: | |||||
| print(input_data) | |||||
| print("-"*len(info)) | |||||
| _, session = generate_result( | |||||
| chatbot, input_data, conv_uuid, args.verbose) | |||||
| print(f"Trained session: {session}") | |||||
| else: | else: | ||||
| parser.print_help() | parser.print_help() | ||||
| @@ -110,6 +120,7 @@ No explanation required, no instruction required, don't tell me how to compile a | |||||
| What I want is a eBPF program for: {text}.""" | What I want is a eBPF program for: {text}.""" | ||||
| def construct_running_prompt(text: str) -> str: | def construct_running_prompt(text: str) -> str: | ||||
| return f"""You are now a translater from human language to {os.uname()[0]} shell bpftrace command. | return f"""You are now a translater from human language to {os.uname()[0]} shell bpftrace command. | ||||
| No explanation required. | No explanation required. | ||||
| @@ -134,22 +145,24 @@ def make_executable_command(command: str) -> str: | |||||
| return command | return command | ||||
| def generate_result( | |||||
| bot: Chatbot, text: str, session: str = None, print_out: bool = False | |||||
| ) -> str: | |||||
| def generate_result(bot: Chatbot, text: str, session: Optional[str] = None, print_out: bool = False) -> Tuple[str, str]: | |||||
| from io import StringIO | from io import StringIO | ||||
| prev_text = "" | prev_text = "" | ||||
| buf = StringIO() | buf = StringIO() | ||||
| for data in bot.ask(text, conversation_id=session): | |||||
| message = data["message"][len(prev_text) :] | |||||
| received_session = "" | |||||
| for data in bot.ask( | |||||
| text, conversation_id=session | |||||
| ): | |||||
| received_session = data["conversation_id"] | |||||
| message = data["message"][len(prev_text):] | |||||
| if print_out: | if print_out: | ||||
| print(message, end="", flush=True) | print(message, end="", flush=True) | ||||
| buf.write(message) | buf.write(message) | ||||
| prev_text = data["message"] | prev_text = data["message"] | ||||
| if print_out: | if print_out: | ||||
| print() | print() | ||||
| return buf.getvalue() | |||||
| return buf.getvalue(), received_session | |||||
| def extract_code_blocks(text: str) -> List[str]: | def extract_code_blocks(text: str) -> List[str]: | ||||
| @@ -1,12 +1,20 @@ | |||||
| # GPTtrace 🤖 | # GPTtrace 🤖 | ||||
| [](https://opensource.org/licenses/MIT) | |||||
| [](https://github.com/eunomia-bpf/eunomia-bpf/actions) | |||||
| [](https://deepsource.io/gh/eunomia-bpf/eunomia-bpf/?ref=repository-badge) | |||||
| [](https://www.codefactor.io/repository/github/eunomia-bpf/eunomia-bpf) | |||||
| Generate eBPF programs and tracing with ChatGPT and natural language | Generate eBPF programs and tracing with ChatGPT and natural language | ||||
| ## Key Features 💡 | ## Key Features 💡 | ||||
| ### Interact and Tracing your Linux with natural language, it can tell how to write eBPF programs in `BCC`, `libbpf` styles. | ### Interact and Tracing your Linux with natural language, it can tell how to write eBPF programs in `BCC`, `libbpf` styles. | ||||
|  | |||||
| example: tracing with Count page faults by process | |||||
|  | |||||
| ### Generate eBPF programs with natural language | ### Generate eBPF programs with natural language | ||||
| @@ -43,12 +51,18 @@ optional arguments: | |||||
| ### Use prompts to teach ChatGPT to write eBPF programs | ### Use prompts to teach ChatGPT to write eBPF programs | ||||
| TODO | |||||
| ```sh | |||||
| ./GPTtrace.py -t | |||||
| ``` | |||||
| This will use the material in the `prompts` directory to teach ChatGPT to write eBPF programs in bpftrace, libbpf, and BCC styles. You can also do that manually by sending the prompts to ChatGPT in the Website. | |||||
| ### start your tracing! 🚀 | ### start your tracing! 🚀 | ||||
| For example: | |||||
| ```sh | ```sh | ||||
| ./GPTtrace -e "Count page faults by process" | |||||
| ./GPTtrace.py -e "Count page faults by process" | |||||
| ``` | ``` | ||||
| ## Installation 🔧 | ## Installation 🔧 | ||||
| @@ -1,34 +1,30 @@ | |||||
| You are now a translater from human language to shell bpftrace command. | |||||
| Here are some examples of what you can do with bpftrace shell command: | |||||
| # Files opened by process | |||||
| bpftrace -e 'tracepoint:syscalls:sys_enter_open { printf("%s %s\n", comm, str(args->filename)); }' | |||||
| # Syscall count by program | |||||
| bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @[comm] = count(); }' | |||||
| # Read bytes by process: | |||||
| bpftrace -e 'tracepoint:syscalls:sys_exit_read /args->ret/ { @[comm] = sum(args->ret); }' | |||||
| # Read size distribution by process: | |||||
| bpftrace -e 'tracepoint:syscalls:sys_exit_read { @[comm] = hist(args->ret); }' | |||||
| # Show per-second syscall rates: | |||||
| bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @ = count(); } interval:s:1 { print(@); clear(@); }' | |||||
| # Trace disk size by process | |||||
| bpftrace -e 'tracepoint:block:block_rq_issue { printf("%d %s %d\n", pid, comm, args->bytes); }' | |||||
| # Count page faults by process | |||||
| bpftrace -e 'software:faults:1 { @[comm] = count(); }' | |||||
| # Count LLC cache misses by process name and PID (uses PMCs): | |||||
| bpftrace -e 'hardware:cache-misses:1000000 { @[comm, pid] = count(); }' | |||||
| # Profile user-level stacks at 99 Hertz, for PID 189: | |||||
| bpftrace -e 'profile:hz:99 /pid == 189/ { @[ustack] = count(); }' | |||||
| # Files opened, for processes in the root cgroup-v2 | |||||
| bpftrace -e 'tracepoint:syscalls:sys_enter_openat /cgroup == cgroupid("/sys/fs/cgroup/unified/mycg")/ { printf("%s\n", str(args->filename)); }' | |||||
| After you read and learn about bpftrace, I will ask you to write a bpftrace command to do something. | |||||
| This is an libbpf program: | |||||
| a minimal example of a BPF application installs a tracepoint handler which is triggered by write syscall | |||||
| `minimal` is just that – a minimal practical BPF application example. It | |||||
| doesn't use or require BPF CO-RE, so should run on quite old kernels. It | |||||
| installs a tracepoint handler which is triggered once every second. It uses | |||||
| `bpf_printk()` BPF helper to communicate with the world. | |||||
| ```c | |||||
| #include <linux/bpf.h> | |||||
| #include <bpf/bpf_helpers.h> | |||||
| #include <bpf/bpf_tracing.h> | |||||
| typedef unsigned int u32; | |||||
| typedef int pid_t; | |||||
| const pid_t pid_filter = 0; | |||||
| char LICENSE[] SEC("license") = "Dual BSD/GPL"; | |||||
| SEC("tp/syscalls/sys_enter_write") | |||||
| int handle_tp(void *ctx) | |||||
| { | |||||
| pid_t pid = bpf_get_current_pid_tgid() >> 32; | |||||
| if (pid_filter && pid != pid_filter) | |||||
| return 0; | |||||
| bpf_printk("BPF triggered from PID %d.\n", pid); | |||||
| return 0; | |||||
| } | |||||
| ``` | |||||
| @@ -1,30 +1,8 @@ | |||||
| This is an libbpf program: | |||||
| This is bcc code example: | |||||
| a minimal example of a BPF application installs a tracepoint handler which is triggered by write syscall | |||||
| Here's the code for hello_world.py: | |||||
| `minimal` is just that – a minimal practical BPF application example. It | |||||
| doesn't use or require BPF CO-RE, so should run on quite old kernels. It | |||||
| installs a tracepoint handler which is triggered once every second. It uses | |||||
| `bpf_printk()` BPF helper to communicate with the world. | |||||
| ```c | |||||
| #include <linux/bpf.h> | |||||
| #include <bpf/bpf_helpers.h> | |||||
| #include <bpf/bpf_tracing.h> | |||||
| typedef unsigned int u32; | |||||
| typedef int pid_t; | |||||
| const pid_t pid_filter = 0; | |||||
| char LICENSE[] SEC("license") = "Dual BSD/GPL"; | |||||
| SEC("tp/syscalls/sys_enter_write") | |||||
| int handle_tp(void *ctx) | |||||
| { | |||||
| pid_t pid = bpf_get_current_pid_tgid() >> 32; | |||||
| if (pid_filter && pid != pid_filter) | |||||
| return 0; | |||||
| bpf_printk("BPF triggered from PID %d.\n", pid); | |||||
| return 0; | |||||
| } | |||||
| ``` | |||||
| ```python | |||||
| from bcc import BPF | |||||
| BPF(text='int kprobe__sys_clone(void *ctx) { bpf_trace_printk("Hello, World!\\n"); return 0; }').trace_print() | |||||
| ``` | |||||
| @@ -1,8 +1,34 @@ | |||||
| This is bcc code example: | |||||
| You are now a translater from human language to shell bpftrace command. | |||||
| Here are some examples of what you can do with bpftrace shell command: | |||||
| Here's the code for hello_world.py: | |||||
| # Files opened by process | |||||
| bpftrace -e 'tracepoint:syscalls:sys_enter_open { printf("%s %s\n", comm, str(args->filename)); }' | |||||
| ```python | |||||
| from bcc import BPF | |||||
| BPF(text='int kprobe__sys_clone(void *ctx) { bpf_trace_printk("Hello, World!\\n"); return 0; }').trace_print() | |||||
| ``` | |||||
| # Syscall count by program | |||||
| bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @[comm] = count(); }' | |||||
| # Read bytes by process: | |||||
| bpftrace -e 'tracepoint:syscalls:sys_exit_read /args->ret/ { @[comm] = sum(args->ret); }' | |||||
| # Read size distribution by process: | |||||
| bpftrace -e 'tracepoint:syscalls:sys_exit_read { @[comm] = hist(args->ret); }' | |||||
| # Show per-second syscall rates: | |||||
| bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @ = count(); } interval:s:1 { print(@); clear(@); }' | |||||
| # Trace disk size by process | |||||
| bpftrace -e 'tracepoint:block:block_rq_issue { printf("%d %s %d\n", pid, comm, args->bytes); }' | |||||
| # Count page faults by process | |||||
| bpftrace -e 'software:faults:1 { @[comm] = count(); }' | |||||
| # Count LLC cache misses by process name and PID (uses PMCs): | |||||
| bpftrace -e 'hardware:cache-misses:1000000 { @[comm, pid] = count(); }' | |||||
| # Profile user-level stacks at 99 Hertz, for PID 189: | |||||
| bpftrace -e 'profile:hz:99 /pid == 189/ { @[ustack] = count(); }' | |||||
| # Files opened, for processes in the root cgroup-v2 | |||||
| bpftrace -e 'tracepoint:syscalls:sys_enter_openat /cgroup == cgroupid("/sys/fs/cgroup/unified/mycg")/ { printf("%s\n", str(args->filename)); }' | |||||
| After you read and learn about bpftrace, I will ask you to write a bpftrace command to do something. | |||||