| @@ -1,7 +1,8 @@ | |||
| #! /bin/env python3 | |||
| import argparse | |||
| import os | |||
| from typing import List | |||
| from pathlib import Path | |||
| from typing import List, Optional, Tuple | |||
| import pygments | |||
| from marko.block import FencedCode | |||
| @@ -16,6 +17,8 @@ from revChatGPT.V1 import Chatbot | |||
| ENV_UUID = "GPTTRACE_CONV_UUID" | |||
| ENV_ACCESS_TOKEN = "GPTTRACE_ACCESS_TOKEN" | |||
| PROMPTS_DIR = Path("./prompts") | |||
| def pretty_print(input, lexer=MarkdownLexer): | |||
| tokens = list(pygments.lex(input, lexer=lexer())) | |||
| @@ -29,6 +32,7 @@ def main(): | |||
| ) | |||
| group = parser.add_mutually_exclusive_group() | |||
| group.add_argument( | |||
| "-i", "--info", help="Let ChatGPT explain what's eBPF", action="store_true" | |||
| ) | |||
| @@ -40,19 +44,10 @@ def main(): | |||
| metavar="TEXT", | |||
| ) | |||
| group.add_argument( | |||
| "-g", | |||
| "--generate", | |||
| help="Generate eBPF programs using your input with ChatGPT", | |||
| action="store", | |||
| metavar="TEXT", | |||
| ) | |||
| "-g", "--generate", help="Generate eBPF programs using your input with ChatGPT", action="store", metavar="TEXT") | |||
| group.add_argument( | |||
| "--train", help="Train ChatGPT with conversions we provided", action="store_true") | |||
| parser.add_argument( | |||
| "-v", | |||
| "--verbose", | |||
| help="Print the prompt and receive message", | |||
| action="store_true", | |||
| ) | |||
| parser.add_argument( | |||
| "-u", | |||
| "--uuid", | |||
| @@ -78,10 +73,8 @@ def main(): | |||
| elif args.execute is not None: | |||
| desc: str = args.execute | |||
| print("Sending query to ChatGPT: " + desc) | |||
| ret_val = generate_result( | |||
| chatbot, construct_running_prompt(desc), conv_uuid, args.verbose | |||
| ) | |||
| pretty_print(ret_val) | |||
| ret_val, _ = generate_result( | |||
| chatbot, construct_running_prompt(desc), conv_uuid, args.verbose) | |||
| # print(ret_val) | |||
| parsed = make_executable_command(ret_val) | |||
| # print(f"Command to run: {parsed}") | |||
| @@ -90,15 +83,32 @@ def main(): | |||
| elif args.generate is not None: | |||
| desc: str = args.generate | |||
| print("Sending query to ChatGPT: " + desc) | |||
| ret_val = generate_result( | |||
| chatbot, construct_generate_prompt(desc), conv_uuid, True | |||
| ) | |||
| # print(ret_val) | |||
| ret_val, _ = generate_result( | |||
| chatbot, construct_generate_prompt(desc), conv_uuid) | |||
| pretty_print(ret_val) | |||
| parsed = extract_code_blocks(ret_val) | |||
| # print(f"Command to run: {parsed}") | |||
| with open("generated.bpf.c", "w") as f: | |||
| for code in parsed: | |||
| f.write(code) | |||
| elif args.train: | |||
| prompts = os.listdir(PROMPTS_DIR) | |||
| prompts.sort() | |||
| # conv_uuid could be None, in which we will create a new session and use it in the next steps | |||
| session = conv_uuid | |||
| for file in prompts: | |||
| info = f"Training ChatGPT with `{file}`" | |||
| print("-"*len(info)) | |||
| print(info) | |||
| print("-"*len(info)) | |||
| with open(PROMPTS_DIR/file, "r") as f: | |||
| input_data = f.read() | |||
| if args.verbose: | |||
| print(input_data) | |||
| print("-"*len(info)) | |||
| _, session = generate_result( | |||
| chatbot, input_data, conv_uuid, args.verbose) | |||
| print(f"Trained session: {session}") | |||
| else: | |||
| parser.print_help() | |||
| @@ -110,6 +120,7 @@ No explanation required, no instruction required, don't tell me how to compile a | |||
| What I want is a eBPF program for: {text}.""" | |||
| def construct_running_prompt(text: str) -> str: | |||
| return f"""You are now a translater from human language to {os.uname()[0]} shell bpftrace command. | |||
| No explanation required. | |||
| @@ -134,22 +145,24 @@ def make_executable_command(command: str) -> str: | |||
| return command | |||
| def generate_result( | |||
| bot: Chatbot, text: str, session: str = None, print_out: bool = False | |||
| ) -> str: | |||
| def generate_result(bot: Chatbot, text: str, session: Optional[str] = None, print_out: bool = False) -> Tuple[str, str]: | |||
| from io import StringIO | |||
| prev_text = "" | |||
| buf = StringIO() | |||
| for data in bot.ask(text, conversation_id=session): | |||
| message = data["message"][len(prev_text) :] | |||
| received_session = "" | |||
| for data in bot.ask( | |||
| text, conversation_id=session | |||
| ): | |||
| received_session = data["conversation_id"] | |||
| message = data["message"][len(prev_text):] | |||
| if print_out: | |||
| print(message, end="", flush=True) | |||
| buf.write(message) | |||
| prev_text = data["message"] | |||
| if print_out: | |||
| print() | |||
| return buf.getvalue() | |||
| return buf.getvalue(), received_session | |||
| def extract_code_blocks(text: str) -> List[str]: | |||
| @@ -1,12 +1,20 @@ | |||
| # GPTtrace 🤖 | |||
| [](https://opensource.org/licenses/MIT) | |||
| [](https://github.com/eunomia-bpf/eunomia-bpf/actions) | |||
| [](https://deepsource.io/gh/eunomia-bpf/eunomia-bpf/?ref=repository-badge) | |||
| [](https://www.codefactor.io/repository/github/eunomia-bpf/eunomia-bpf) | |||
| Generate eBPF programs and tracing with ChatGPT and natural language | |||
| ## Key Features 💡 | |||
| ### Interact and Tracing your Linux with natural language, it can tell how to write eBPF programs in `BCC`, `libbpf` styles. | |||
|  | |||
| example: tracing with Count page faults by process | |||
|  | |||
| ### Generate eBPF programs with natural language | |||
| @@ -43,12 +51,18 @@ optional arguments: | |||
| ### Use prompts to teach ChatGPT to write eBPF programs | |||
| TODO | |||
| ```sh | |||
| ./GPTtrace.py -t | |||
| ``` | |||
| This will use the material in the `prompts` directory to teach ChatGPT to write eBPF programs in bpftrace, libbpf, and BCC styles. You can also do that manually by sending the prompts to ChatGPT in the Website. | |||
| ### start your tracing! 🚀 | |||
| For example: | |||
| ```sh | |||
| ./GPTtrace -e "Count page faults by process" | |||
| ./GPTtrace.py -e "Count page faults by process" | |||
| ``` | |||
| ## Installation 🔧 | |||
| @@ -1,34 +1,30 @@ | |||
| You are now a translater from human language to shell bpftrace command. | |||
| Here are some examples of what you can do with bpftrace shell command: | |||
| # Files opened by process | |||
| bpftrace -e 'tracepoint:syscalls:sys_enter_open { printf("%s %s\n", comm, str(args->filename)); }' | |||
| # Syscall count by program | |||
| bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @[comm] = count(); }' | |||
| # Read bytes by process: | |||
| bpftrace -e 'tracepoint:syscalls:sys_exit_read /args->ret/ { @[comm] = sum(args->ret); }' | |||
| # Read size distribution by process: | |||
| bpftrace -e 'tracepoint:syscalls:sys_exit_read { @[comm] = hist(args->ret); }' | |||
| # Show per-second syscall rates: | |||
| bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @ = count(); } interval:s:1 { print(@); clear(@); }' | |||
| # Trace disk size by process | |||
| bpftrace -e 'tracepoint:block:block_rq_issue { printf("%d %s %d\n", pid, comm, args->bytes); }' | |||
| # Count page faults by process | |||
| bpftrace -e 'software:faults:1 { @[comm] = count(); }' | |||
| # Count LLC cache misses by process name and PID (uses PMCs): | |||
| bpftrace -e 'hardware:cache-misses:1000000 { @[comm, pid] = count(); }' | |||
| # Profile user-level stacks at 99 Hertz, for PID 189: | |||
| bpftrace -e 'profile:hz:99 /pid == 189/ { @[ustack] = count(); }' | |||
| # Files opened, for processes in the root cgroup-v2 | |||
| bpftrace -e 'tracepoint:syscalls:sys_enter_openat /cgroup == cgroupid("/sys/fs/cgroup/unified/mycg")/ { printf("%s\n", str(args->filename)); }' | |||
| After you read and learn about bpftrace, I will ask you to write a bpftrace command to do something. | |||
| This is an libbpf program: | |||
| a minimal example of a BPF application installs a tracepoint handler which is triggered by write syscall | |||
| `minimal` is just that – a minimal practical BPF application example. It | |||
| doesn't use or require BPF CO-RE, so should run on quite old kernels. It | |||
| installs a tracepoint handler which is triggered once every second. It uses | |||
| `bpf_printk()` BPF helper to communicate with the world. | |||
| ```c | |||
| #include <linux/bpf.h> | |||
| #include <bpf/bpf_helpers.h> | |||
| #include <bpf/bpf_tracing.h> | |||
| typedef unsigned int u32; | |||
| typedef int pid_t; | |||
| const pid_t pid_filter = 0; | |||
| char LICENSE[] SEC("license") = "Dual BSD/GPL"; | |||
| SEC("tp/syscalls/sys_enter_write") | |||
| int handle_tp(void *ctx) | |||
| { | |||
| pid_t pid = bpf_get_current_pid_tgid() >> 32; | |||
| if (pid_filter && pid != pid_filter) | |||
| return 0; | |||
| bpf_printk("BPF triggered from PID %d.\n", pid); | |||
| return 0; | |||
| } | |||
| ``` | |||
| @@ -1,30 +1,8 @@ | |||
| This is an libbpf program: | |||
| This is bcc code example: | |||
| a minimal example of a BPF application installs a tracepoint handler which is triggered by write syscall | |||
| Here's the code for hello_world.py: | |||
| `minimal` is just that – a minimal practical BPF application example. It | |||
| doesn't use or require BPF CO-RE, so should run on quite old kernels. It | |||
| installs a tracepoint handler which is triggered once every second. It uses | |||
| `bpf_printk()` BPF helper to communicate with the world. | |||
| ```c | |||
| #include <linux/bpf.h> | |||
| #include <bpf/bpf_helpers.h> | |||
| #include <bpf/bpf_tracing.h> | |||
| typedef unsigned int u32; | |||
| typedef int pid_t; | |||
| const pid_t pid_filter = 0; | |||
| char LICENSE[] SEC("license") = "Dual BSD/GPL"; | |||
| SEC("tp/syscalls/sys_enter_write") | |||
| int handle_tp(void *ctx) | |||
| { | |||
| pid_t pid = bpf_get_current_pid_tgid() >> 32; | |||
| if (pid_filter && pid != pid_filter) | |||
| return 0; | |||
| bpf_printk("BPF triggered from PID %d.\n", pid); | |||
| return 0; | |||
| } | |||
| ``` | |||
| ```python | |||
| from bcc import BPF | |||
| BPF(text='int kprobe__sys_clone(void *ctx) { bpf_trace_printk("Hello, World!\\n"); return 0; }').trace_print() | |||
| ``` | |||
| @@ -1,8 +1,34 @@ | |||
| This is bcc code example: | |||
| You are now a translater from human language to shell bpftrace command. | |||
| Here are some examples of what you can do with bpftrace shell command: | |||
| Here's the code for hello_world.py: | |||
| # Files opened by process | |||
| bpftrace -e 'tracepoint:syscalls:sys_enter_open { printf("%s %s\n", comm, str(args->filename)); }' | |||
| ```python | |||
| from bcc import BPF | |||
| BPF(text='int kprobe__sys_clone(void *ctx) { bpf_trace_printk("Hello, World!\\n"); return 0; }').trace_print() | |||
| ``` | |||
| # Syscall count by program | |||
| bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @[comm] = count(); }' | |||
| # Read bytes by process: | |||
| bpftrace -e 'tracepoint:syscalls:sys_exit_read /args->ret/ { @[comm] = sum(args->ret); }' | |||
| # Read size distribution by process: | |||
| bpftrace -e 'tracepoint:syscalls:sys_exit_read { @[comm] = hist(args->ret); }' | |||
| # Show per-second syscall rates: | |||
| bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @ = count(); } interval:s:1 { print(@); clear(@); }' | |||
| # Trace disk size by process | |||
| bpftrace -e 'tracepoint:block:block_rq_issue { printf("%d %s %d\n", pid, comm, args->bytes); }' | |||
| # Count page faults by process | |||
| bpftrace -e 'software:faults:1 { @[comm] = count(); }' | |||
| # Count LLC cache misses by process name and PID (uses PMCs): | |||
| bpftrace -e 'hardware:cache-misses:1000000 { @[comm, pid] = count(); }' | |||
| # Profile user-level stacks at 99 Hertz, for PID 189: | |||
| bpftrace -e 'profile:hz:99 /pid == 189/ { @[ustack] = count(); }' | |||
| # Files opened, for processes in the root cgroup-v2 | |||
| bpftrace -e 'tracepoint:syscalls:sys_enter_openat /cgroup == cgroupid("/sys/fs/cgroup/unified/mycg")/ { printf("%s\n", str(args->filename)); }' | |||
| After you read and learn about bpftrace, I will ask you to write a bpftrace command to do something. | |||