From 4c498cabceb8b80c9d1d0a1a658783bfc4acf29f Mon Sep 17 00:00:00 2001 From: officeyutong Date: Sat, 18 Feb 2023 12:37:55 +0000 Subject: [PATCH] update: support training ChatGPT --- GPTtrace.py | 40 +++++++++++++++++++++++++++----- prompts/1.md | 64 ++++++++++++++++++++++++---------------------------- prompts/2.md | 34 +++++----------------------- prompts/3.md | 38 ++++++++++++++++++++++++++----- 4 files changed, 102 insertions(+), 74 deletions(-) diff --git a/GPTtrace.py b/GPTtrace.py index ec1a560..459a848 100755 --- a/GPTtrace.py +++ b/GPTtrace.py @@ -3,14 +3,17 @@ import os import argparse from revChatGPT.V1 import Chatbot -from typing import List +from typing import List, Optional, Tuple from marko.parser import Parser from marko.block import FencedCode from marko.inline import RawText +from pathlib import Path ENV_UUID = "GPTTRACE_CONV_UUID" ENV_ACCESS_TOKEN = "GPTTRACE_ACCESS_TOKEN" +PROMPTS_DIR = Path("./prompts") + def main(): parser = argparse.ArgumentParser( @@ -18,13 +21,16 @@ def main(): description='Use ChatGPT to write eBPF programs (bpftrace, etc.)') group = parser.add_mutually_exclusive_group() + group.add_argument( "-i", "--info", help="Let ChatGPT explain what's eBPF", action="store_true") group.add_argument( "-e", "--execute", help="Generate commands using your input with ChatGPT, and run it", action="store", metavar="TEXT") group.add_argument( "-g", "--generate", help="Generate eBPF programs using your input with ChatGPT", action="store", metavar="TEXT") - + group.add_argument( + "--train", help="Train ChatGPT with conversions we provided", action="store_true") + parser.add_argument( "-v", "--verbose", help="Print the prompt and receive message", action="store_true") parser.add_argument( @@ -45,7 +51,7 @@ def main(): elif args.execute is not None: desc: str = args.execute print("Sending query to ChatGPT: " + desc) - ret_val = generate_result( + ret_val, _ = generate_result( chatbot, construct_running_prompt(desc), conv_uuid, args.verbose) # print(ret_val) parsed = make_executable_command(ret_val) @@ -55,7 +61,7 @@ def main(): elif args.generate is not None: desc: str = args.generate print("Sending query to ChatGPT: " + desc) - ret_val = generate_result( + ret_val, _ = generate_result( chatbot, construct_generate_prompt(desc), conv_uuid, True) # print(ret_val) parsed = extract_code_blocks(ret_val) @@ -63,15 +69,35 @@ def main(): with open("generated.bpf.c", "w") as f: for code in parsed: f.write(code) + elif args.train: + prompts = os.listdir(PROMPTS_DIR) + prompts.sort() + # conv_uuid could be None, in which we will create a new session and use it in the next steps + session = conv_uuid + for file in prompts: + info = f"Training ChatGPT with `{file}`" + print("-"*len(info)) + print(info) + print("-"*len(info)) + with open(PROMPTS_DIR/file, "r") as f: + input_data = f.read() + if args.verbose: + print(input_data) + print("-"*len(info)) + _, session = generate_result( + chatbot, input_data, conv_uuid, args.verbose) + print(f"Trained session: {session}") else: parser.print_help() + def construct_generate_prompt(text: str) -> str: return f'''You are now a translater from human language to {os.uname()[0]} eBPF programs. Please write eBPF programs for me. No explanation required, no instruction required, don't tell me how to compile and run. What I want is a eBPF program for: {text}.''' + def construct_running_prompt(text: str) -> str: return f'''You are now a translater from human language to {os.uname()[0]} shell bpftrace command. No explanation required. @@ -96,13 +122,15 @@ def make_executable_command(command: str) -> str: return command -def generate_result(bot: Chatbot, text: str, session: str = None, print_out: bool = False) -> str: +def generate_result(bot: Chatbot, text: str, session: Optional[str] = None, print_out: bool = False) -> Tuple[str, str]: from io import StringIO prev_text = "" buf = StringIO() + received_session = "" for data in bot.ask( text, conversation_id=session ): + received_session = data["conversation_id"] message = data["message"][len(prev_text):] if print_out: print(message, end="", flush=True) @@ -110,7 +138,7 @@ def generate_result(bot: Chatbot, text: str, session: str = None, print_out: boo prev_text = data["message"] if print_out: print() - return buf.getvalue() + return buf.getvalue(), received_session def extract_code_blocks(text: str) -> List[str]: diff --git a/prompts/1.md b/prompts/1.md index b0a6460..e7d8c5c 100644 --- a/prompts/1.md +++ b/prompts/1.md @@ -1,34 +1,30 @@ -You are now a translater from human language to shell bpftrace command. -Here are some examples of what you can do with bpftrace shell command: - -# Files opened by process -bpftrace -e 'tracepoint:syscalls:sys_enter_open { printf("%s %s\n", comm, str(args->filename)); }' - -# Syscall count by program -bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @[comm] = count(); }' - -# Read bytes by process: -bpftrace -e 'tracepoint:syscalls:sys_exit_read /args->ret/ { @[comm] = sum(args->ret); }' - -# Read size distribution by process: -bpftrace -e 'tracepoint:syscalls:sys_exit_read { @[comm] = hist(args->ret); }' - -# Show per-second syscall rates: -bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @ = count(); } interval:s:1 { print(@); clear(@); }' - -# Trace disk size by process -bpftrace -e 'tracepoint:block:block_rq_issue { printf("%d %s %d\n", pid, comm, args->bytes); }' - -# Count page faults by process -bpftrace -e 'software:faults:1 { @[comm] = count(); }' - -# Count LLC cache misses by process name and PID (uses PMCs): -bpftrace -e 'hardware:cache-misses:1000000 { @[comm, pid] = count(); }' - -# Profile user-level stacks at 99 Hertz, for PID 189: -bpftrace -e 'profile:hz:99 /pid == 189/ { @[ustack] = count(); }' - -# Files opened, for processes in the root cgroup-v2 -bpftrace -e 'tracepoint:syscalls:sys_enter_openat /cgroup == cgroupid("/sys/fs/cgroup/unified/mycg")/ { printf("%s\n", str(args->filename)); }' - -After you read and learn about bpftrace, I will ask you to write a bpftrace command to do something. +This is an libbpf program: + +a minimal example of a BPF application installs a tracepoint handler which is triggered by write syscall + +`minimal` is just that – a minimal practical BPF application example. It +doesn't use or require BPF CO-RE, so should run on quite old kernels. It +installs a tracepoint handler which is triggered once every second. It uses +`bpf_printk()` BPF helper to communicate with the world. + +```c +#include +#include +#include + +typedef unsigned int u32; +typedef int pid_t; +const pid_t pid_filter = 0; + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; + +SEC("tp/syscalls/sys_enter_write") +int handle_tp(void *ctx) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + if (pid_filter && pid != pid_filter) + return 0; + bpf_printk("BPF triggered from PID %d.\n", pid); + return 0; +} +``` \ No newline at end of file diff --git a/prompts/2.md b/prompts/2.md index e7d8c5c..e6ac645 100644 --- a/prompts/2.md +++ b/prompts/2.md @@ -1,30 +1,8 @@ -This is an libbpf program: +This is bcc code example: -a minimal example of a BPF application installs a tracepoint handler which is triggered by write syscall +Here's the code for hello_world.py: -`minimal` is just that – a minimal practical BPF application example. It -doesn't use or require BPF CO-RE, so should run on quite old kernels. It -installs a tracepoint handler which is triggered once every second. It uses -`bpf_printk()` BPF helper to communicate with the world. - -```c -#include -#include -#include - -typedef unsigned int u32; -typedef int pid_t; -const pid_t pid_filter = 0; - -char LICENSE[] SEC("license") = "Dual BSD/GPL"; - -SEC("tp/syscalls/sys_enter_write") -int handle_tp(void *ctx) -{ - pid_t pid = bpf_get_current_pid_tgid() >> 32; - if (pid_filter && pid != pid_filter) - return 0; - bpf_printk("BPF triggered from PID %d.\n", pid); - return 0; -} -``` \ No newline at end of file +```python +from bcc import BPF +BPF(text='int kprobe__sys_clone(void *ctx) { bpf_trace_printk("Hello, World!\\n"); return 0; }').trace_print() +``` diff --git a/prompts/3.md b/prompts/3.md index e6ac645..b0a6460 100644 --- a/prompts/3.md +++ b/prompts/3.md @@ -1,8 +1,34 @@ -This is bcc code example: +You are now a translater from human language to shell bpftrace command. +Here are some examples of what you can do with bpftrace shell command: -Here's the code for hello_world.py: +# Files opened by process +bpftrace -e 'tracepoint:syscalls:sys_enter_open { printf("%s %s\n", comm, str(args->filename)); }' -```python -from bcc import BPF -BPF(text='int kprobe__sys_clone(void *ctx) { bpf_trace_printk("Hello, World!\\n"); return 0; }').trace_print() -``` +# Syscall count by program +bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @[comm] = count(); }' + +# Read bytes by process: +bpftrace -e 'tracepoint:syscalls:sys_exit_read /args->ret/ { @[comm] = sum(args->ret); }' + +# Read size distribution by process: +bpftrace -e 'tracepoint:syscalls:sys_exit_read { @[comm] = hist(args->ret); }' + +# Show per-second syscall rates: +bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @ = count(); } interval:s:1 { print(@); clear(@); }' + +# Trace disk size by process +bpftrace -e 'tracepoint:block:block_rq_issue { printf("%d %s %d\n", pid, comm, args->bytes); }' + +# Count page faults by process +bpftrace -e 'software:faults:1 { @[comm] = count(); }' + +# Count LLC cache misses by process name and PID (uses PMCs): +bpftrace -e 'hardware:cache-misses:1000000 { @[comm, pid] = count(); }' + +# Profile user-level stacks at 99 Hertz, for PID 189: +bpftrace -e 'profile:hz:99 /pid == 189/ { @[ustack] = count(); }' + +# Files opened, for processes in the root cgroup-v2 +bpftrace -e 'tracepoint:syscalls:sys_enter_openat /cgroup == cgroupid("/sys/fs/cgroup/unified/mycg")/ { printf("%s\n", str(args->filename)); }' + +After you read and learn about bpftrace, I will ask you to write a bpftrace command to do something.