update: support training ChatGPT

2 years ago · 4c498cabce
--- a/GPTtrace.py
+++ b/GPTtrace.py
@@ -3,14 +3,17 @@ import os
 import argparse
 from revChatGPT.V1 import Chatbot
 from typing import List
 from typing import List, Optional, Tuple
 from marko.parser import Parser
 from marko.block import FencedCode
 from marko.inline import RawText
 from pathlib import Path
 ENV_UUID = "GPTTRACE_CONV_UUID"
 ENV_ACCESS_TOKEN = "GPTTRACE_ACCESS_TOKEN"
 PROMPTS_DIR = Path("./prompts")
 def main():
    parser = argparse.ArgumentParser(
@@ -18,13 +21,16 @@ def main():
        description='Use ChatGPT to write eBPF programs (bpftrace, etc.)')
    group = parser.add_mutually_exclusive_group()
    group.add_argument(
        "-i", "--info", help="Let ChatGPT explain what's eBPF", action="store_true")
    group.add_argument(
        "-e", "--execute", help="Generate commands using your input with ChatGPT, and run it", action="store", metavar="TEXT")
    group.add_argument(
        "-g", "--generate", help="Generate eBPF programs using your input with ChatGPT", action="store", metavar="TEXT")
    group.add_argument(
        "--train", help="Train ChatGPT with conversions we provided", action="store_true")
    parser.add_argument(
        "-v", "--verbose", help="Print the prompt and receive message", action="store_true")
    parser.add_argument(
@@ -45,7 +51,7 @@ def main():
    elif args.execute is not None:
        desc: str = args.execute
        print("Sending query to ChatGPT: " + desc)
        ret_val = generate_result(
        ret_val, _ = generate_result(
            chatbot, construct_running_prompt(desc), conv_uuid, args.verbose)
        # print(ret_val)
        parsed = make_executable_command(ret_val)
@@ -55,7 +61,7 @@ def main():
    elif args.generate is not None:
        desc: str = args.generate
        print("Sending query to ChatGPT: " + desc)
        ret_val = generate_result(
        ret_val, _ = generate_result(
            chatbot, construct_generate_prompt(desc), conv_uuid, True)
        # print(ret_val)
        parsed = extract_code_blocks(ret_val)
@@ -63,15 +69,35 @@ def main():
        with open("generated.bpf.c", "w") as f:
            for code in parsed:
                f.write(code)
    elif args.train:
        prompts = os.listdir(PROMPTS_DIR)
        prompts.sort()
        # conv_uuid could be None, in which we will create a new session and use it in the next steps
        session = conv_uuid
        for file in prompts:
            info = f"Training ChatGPT with `{file}`"
            print("-"*len(info))
            print(info)
            print("-"*len(info))
            with open(PROMPTS_DIR/file, "r") as f:
                input_data = f.read()
            if args.verbose:
                print(input_data)
            print("-"*len(info))
            _, session = generate_result(
                chatbot, input_data, conv_uuid, args.verbose)
        print(f"Trained session: {session}")
    else:
        parser.print_help()
 def construct_generate_prompt(text: str) -> str:
    return f'''You are now a translater from human language to {os.uname()[0]} eBPF programs.
 Please write eBPF programs for me.
 No explanation required, no instruction required, don't tell me how to compile and run.
 What I want is a eBPF program for: {text}.'''
 def construct_running_prompt(text: str) -> str:
    return f'''You are now a translater from human language to {os.uname()[0]} shell bpftrace command. 
 No explanation required.
@@ -96,13 +122,15 @@ def make_executable_command(command: str) -> str:
    return command
 def generate_result(bot: Chatbot, text: str, session: str = None, print_out: bool = False) -> str:
 def generate_result(bot: Chatbot, text: str, session: Optional[str] = None, print_out: bool = False) -> Tuple[str, str]:
    from io import StringIO
    prev_text = ""
    buf = StringIO()
    received_session = ""
    for data in bot.ask(
        text, conversation_id=session
    ):
        received_session = data["conversation_id"]
        message = data["message"][len(prev_text):]
        if print_out:
            print(message, end="", flush=True)
@@ -110,7 +138,7 @@ def generate_result(bot: Chatbot, text: str, session: str = None, print_out: boo
        prev_text = data["message"]
    if print_out:
        print()
    return buf.getvalue()
    return buf.getvalue(), received_session
 def extract_code_blocks(text: str) -> List[str]:
--- a/prompts/1.md
+++ b/prompts/1.md
@@ -1,34 +1,30 @@
 You are now a translater from human language to shell bpftrace command. 
 Here are some examples of what you can do with bpftrace shell command:
 # Files opened by process
 bpftrace -e 'tracepoint:syscalls:sys_enter_open { printf("%s %s\n", comm, str(args->filename)); }'
 # Syscall count by program
 bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @[comm] = count(); }'
 # Read bytes by process:
 bpftrace -e 'tracepoint:syscalls:sys_exit_read /args->ret/ { @[comm] = sum(args->ret); }'
 # Read size distribution by process:
 bpftrace -e 'tracepoint:syscalls:sys_exit_read { @[comm] = hist(args->ret); }'
 # Show per-second syscall rates:
 bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @ = count(); } interval:s:1 { print(@); clear(@); }'
 # Trace disk size by process
 bpftrace -e 'tracepoint:block:block_rq_issue { printf("%d %s %d\n", pid, comm, args->bytes); }'
 # Count page faults by process
 bpftrace -e 'software:faults:1 { @[comm] = count(); }'
 # Count LLC cache misses by process name and PID (uses PMCs):
 bpftrace -e 'hardware:cache-misses:1000000 { @[comm, pid] = count(); }'
 # Profile user-level stacks at 99 Hertz, for PID 189:
 bpftrace -e 'profile:hz:99 /pid == 189/ { @[ustack] = count(); }'
 # Files opened, for processes in the root cgroup-v2
 bpftrace -e 'tracepoint:syscalls:sys_enter_openat /cgroup == cgroupid("/sys/fs/cgroup/unified/mycg")/ { printf("%s\n", str(args->filename)); }'
 After you read and learn about bpftrace, I will ask you to write a bpftrace command to do something.
 This is an libbpf program:
 a minimal example of a BPF application installs a tracepoint handler which is triggered by write syscall
 `minimal` is just that – a minimal practical BPF application example. It
 doesn't use or require BPF CO-RE, so should run on quite old kernels. It
 installs a tracepoint handler which is triggered once every second. It uses
 `bpf_printk()` BPF helper to communicate with the world. 
 ```c
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 typedef unsigned int u32;
 typedef int pid_t;
 const pid_t pid_filter = 0;
 char LICENSE[] SEC("license") = "Dual BSD/GPL";
 SEC("tp/syscalls/sys_enter_write")
 int handle_tp(void *ctx)
 {
 	pid_t pid = bpf_get_current_pid_tgid() >> 32;
 	if (pid_filter && pid != pid_filter)
 		return 0;
 	bpf_printk("BPF triggered from PID %d.\n", pid);
 	return 0;
 }
 ```
--- a/prompts/2.md
+++ b/prompts/2.md
@@ -1,30 +1,8 @@
 This is an libbpf program:
 This is bcc code example:
 a minimal example of a BPF application installs a tracepoint handler which is triggered by write syscall
 Here's the code for hello_world.py:
 `minimal` is just that – a minimal practical BPF application example. It
 doesn't use or require BPF CO-RE, so should run on quite old kernels. It
 installs a tracepoint handler which is triggered once every second. It uses
 `bpf_printk()` BPF helper to communicate with the world. 
 ```c
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 typedef unsigned int u32;
 typedef int pid_t;
 const pid_t pid_filter = 0;
 char LICENSE[] SEC("license") = "Dual BSD/GPL";
 SEC("tp/syscalls/sys_enter_write")
 int handle_tp(void *ctx)
 {
 	pid_t pid = bpf_get_current_pid_tgid() >> 32;
 	if (pid_filter && pid != pid_filter)
 		return 0;
 	bpf_printk("BPF triggered from PID %d.\n", pid);
 	return 0;
 }
 ```
 ```python
 from bcc import BPF
 BPF(text='int kprobe__sys_clone(void *ctx) { bpf_trace_printk("Hello, World!\\n"); return 0; }').trace_print()
 ```
--- a/prompts/3.md
+++ b/prompts/3.md
@@ -1,8 +1,34 @@
 This is bcc code example:
 You are now a translater from human language to shell bpftrace command. 
 Here are some examples of what you can do with bpftrace shell command:
 Here's the code for hello_world.py:
 # Files opened by process
 bpftrace -e 'tracepoint:syscalls:sys_enter_open { printf("%s %s\n", comm, str(args->filename)); }'
 ```python
 from bcc import BPF
 BPF(text='int kprobe__sys_clone(void *ctx) { bpf_trace_printk("Hello, World!\\n"); return 0; }').trace_print()
 ```
 # Syscall count by program
 bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @[comm] = count(); }'
 # Read bytes by process:
 bpftrace -e 'tracepoint:syscalls:sys_exit_read /args->ret/ { @[comm] = sum(args->ret); }'
 # Read size distribution by process:
 bpftrace -e 'tracepoint:syscalls:sys_exit_read { @[comm] = hist(args->ret); }'
 # Show per-second syscall rates:
 bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @ = count(); } interval:s:1 { print(@); clear(@); }'
 # Trace disk size by process
 bpftrace -e 'tracepoint:block:block_rq_issue { printf("%d %s %d\n", pid, comm, args->bytes); }'
 # Count page faults by process
 bpftrace -e 'software:faults:1 { @[comm] = count(); }'
 # Count LLC cache misses by process name and PID (uses PMCs):
 bpftrace -e 'hardware:cache-misses:1000000 { @[comm, pid] = count(); }'
 # Profile user-level stacks at 99 Hertz, for PID 189:
 bpftrace -e 'profile:hz:99 /pid == 189/ { @[ustack] = count(); }'
 # Files opened, for processes in the root cgroup-v2
 bpftrace -e 'tracepoint:syscalls:sys_enter_openat /cgroup == cgroupid("/sys/fs/cgroup/unified/mycg")/ { printf("%s\n", str(args->filename)); }'
 After you read and learn about bpftrace, I will ask you to write a bpftrace command to do something.