Merge branch 'main' of https://github.com/eunomia-bpf/GPTtrace

2 years ago · 38fc63fc6d
--- a/GPTtrace.py
+++ b/GPTtrace.py
@@ -1,7 +1,8 @@
 #! /bin/env python3
 import argparse
 import os
 from typing import List
 from pathlib import Path
 from typing import List, Optional, Tuple

 import pygments
 from marko.block import FencedCode
@@ -16,6 +17,8 @@ from revChatGPT.V1 import Chatbot
 ENV_UUID = "GPTTRACE_CONV_UUID"
 ENV_ACCESS_TOKEN = "GPTTRACE_ACCESS_TOKEN"

 PROMPTS_DIR = Path("./prompts")


 def pretty_print(input, lexer=MarkdownLexer):
    tokens = list(pygments.lex(input, lexer=lexer()))
@@ -29,6 +32,7 @@ def main():
    )

    group = parser.add_mutually_exclusive_group()

    group.add_argument(
        "-i", "--info", help="Let ChatGPT explain what's eBPF", action="store_true"
    )
@@ -40,19 +44,10 @@ def main():
        metavar="TEXT",
    )
    group.add_argument(
        "-g",
        "--generate",
        help="Generate eBPF programs using your input with ChatGPT",
        action="store",
        metavar="TEXT",
    )
        "-g", "--generate", help="Generate eBPF programs using your input with ChatGPT", action="store", metavar="TEXT")
    group.add_argument(
        "--train", help="Train ChatGPT with conversions we provided", action="store_true")

    parser.add_argument(
        "-v",
        "--verbose",
        help="Print the prompt and receive message",
        action="store_true",
    )
    parser.add_argument(
        "-u",
        "--uuid",
@@ -78,10 +73,8 @@ def main():
    elif args.execute is not None:
        desc: str = args.execute
        print("Sending query to ChatGPT: " + desc)
        ret_val = generate_result(
            chatbot, construct_running_prompt(desc), conv_uuid, args.verbose
        )
        pretty_print(ret_val)
        ret_val, _ = generate_result(
            chatbot, construct_running_prompt(desc), conv_uuid, args.verbose)
        # print(ret_val)
        parsed = make_executable_command(ret_val)
        # print(f"Command to run: {parsed}")
@@ -90,15 +83,32 @@ def main():
    elif args.generate is not None:
        desc: str = args.generate
        print("Sending query to ChatGPT: " + desc)
        ret_val = generate_result(
            chatbot, construct_generate_prompt(desc), conv_uuid, True
        )
        # print(ret_val)
        ret_val, _ = generate_result(
            chatbot, construct_generate_prompt(desc), conv_uuid)
        pretty_print(ret_val)
        parsed = extract_code_blocks(ret_val)
        # print(f"Command to run: {parsed}")
        with open("generated.bpf.c", "w") as f:
            for code in parsed:
                f.write(code)
    elif args.train:
        prompts = os.listdir(PROMPTS_DIR)
        prompts.sort()
        # conv_uuid could be None, in which we will create a new session and use it in the next steps
        session = conv_uuid
        for file in prompts:
            info = f"Training ChatGPT with `{file}`"
            print("-"*len(info))
            print(info)
            print("-"*len(info))
            with open(PROMPTS_DIR/file, "r") as f:
                input_data = f.read()
            if args.verbose:
                print(input_data)
            print("-"*len(info))
            _, session = generate_result(
                chatbot, input_data, conv_uuid, args.verbose)
        print(f"Trained session: {session}")
    else:
        parser.print_help()

@@ -110,6 +120,7 @@ No explanation required, no instruction required, don't tell me how to compile a
 What I want is a eBPF program for: {text}."""



 def construct_running_prompt(text: str) -> str:
    return f"""You are now a translater from human language to {os.uname()[0]} shell bpftrace command.
 No explanation required.
@@ -134,22 +145,24 @@ def make_executable_command(command: str) -> str:
    return command


 def generate_result(
    bot: Chatbot, text: str, session: str = None, print_out: bool = False
 ) -> str:
 def generate_result(bot: Chatbot, text: str, session: Optional[str] = None, print_out: bool = False) -> Tuple[str, str]:
    from io import StringIO

    prev_text = ""
    buf = StringIO()
    for data in bot.ask(text, conversation_id=session):
        message = data["message"][len(prev_text) :]
    received_session = ""
    for data in bot.ask(
        text, conversation_id=session
    ):
        received_session = data["conversation_id"]
        message = data["message"][len(prev_text):]
        if print_out:
            print(message, end="", flush=True)
        buf.write(message)
        prev_text = data["message"]
    if print_out:
        print()
    return buf.getvalue()
    return buf.getvalue(), received_session


 def extract_code_blocks(text: str) -> List[str]:
--- a/README.md
+++ b/README.md
@@ -1,12 +1,20 @@
 # GPTtrace 🤖

 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![Actions Status](https://github.com/eunomia-bpf/eunomia-bpf/workflows/Ubuntu/badge.svg)](https://github.com/eunomia-bpf/eunomia-bpf/actions)
 [![DeepSource](https://deepsource.io/gh/eunomia-bpf/eunomia-bpf.svg/?label=active+issues&show_trend=true&token=rcSI3J1-gpwLIgZWtKZC-N6C)](https://deepsource.io/gh/eunomia-bpf/eunomia-bpf/?ref=repository-badge)
 [![CodeFactor](https://www.codefactor.io/repository/github/eunomia-bpf/eunomia-bpf/badge)](https://www.codefactor.io/repository/github/eunomia-bpf/eunomia-bpf)


 Generate eBPF programs and tracing with ChatGPT and natural language

 ## Key Features 💡

 ### Interact and Tracing your Linux with natural language, it can tell how to write eBPF programs in `BCC`, `libbpf` styles.

 ![result](doc/result.png)
 example: tracing with Count page faults by process

 ![result](doc/result.gif)

 ### Generate eBPF programs with natural language

@@ -43,12 +51,18 @@ optional arguments:

 ### Use prompts to teach ChatGPT to write eBPF programs

 TODO
 ```sh
 ./GPTtrace.py -t
 ```

 This will use the material in the `prompts` directory to teach ChatGPT to write eBPF programs in bpftrace, libbpf, and BCC styles. You can also do that manually by sending the prompts to ChatGPT in the Website.

 ### start your tracing! 🚀

 For example:

 ```sh
 ./GPTtrace -e "Count page faults by process"
 ./GPTtrace.py -e "Count page faults by process"
 ```

 ## Installation 🔧
--- a/doc/result.gif
+++ b/doc/result.gif
--- a/prompts/1.md
+++ b/prompts/1.md
@@ -1,34 +1,30 @@
 You are now a translater from human language to shell bpftrace command. 
 Here are some examples of what you can do with bpftrace shell command:

 # Files opened by process
 bpftrace -e 'tracepoint:syscalls:sys_enter_open { printf("%s %s\n", comm, str(args->filename)); }'

 # Syscall count by program
 bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @[comm] = count(); }'

 # Read bytes by process:
 bpftrace -e 'tracepoint:syscalls:sys_exit_read /args->ret/ { @[comm] = sum(args->ret); }'

 # Read size distribution by process:
 bpftrace -e 'tracepoint:syscalls:sys_exit_read { @[comm] = hist(args->ret); }'

 # Show per-second syscall rates:
 bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @ = count(); } interval:s:1 { print(@); clear(@); }'

 # Trace disk size by process
 bpftrace -e 'tracepoint:block:block_rq_issue { printf("%d %s %d\n", pid, comm, args->bytes); }'

 # Count page faults by process
 bpftrace -e 'software:faults:1 { @[comm] = count(); }'

 # Count LLC cache misses by process name and PID (uses PMCs):
 bpftrace -e 'hardware:cache-misses:1000000 { @[comm, pid] = count(); }'

 # Profile user-level stacks at 99 Hertz, for PID 189:
 bpftrace -e 'profile:hz:99 /pid == 189/ { @[ustack] = count(); }'

 # Files opened, for processes in the root cgroup-v2
 bpftrace -e 'tracepoint:syscalls:sys_enter_openat /cgroup == cgroupid("/sys/fs/cgroup/unified/mycg")/ { printf("%s\n", str(args->filename)); }'

 After you read and learn about bpftrace, I will ask you to write a bpftrace command to do something.
 This is an libbpf program:

 a minimal example of a BPF application installs a tracepoint handler which is triggered by write syscall

 `minimal` is just that – a minimal practical BPF application example. It
 doesn't use or require BPF CO-RE, so should run on quite old kernels. It
 installs a tracepoint handler which is triggered once every second. It uses
 `bpf_printk()` BPF helper to communicate with the world. 

 ```c
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>

 typedef unsigned int u32;
 typedef int pid_t;
 const pid_t pid_filter = 0;

 char LICENSE[] SEC("license") = "Dual BSD/GPL";

 SEC("tp/syscalls/sys_enter_write")
 int handle_tp(void *ctx)
 {
 	pid_t pid = bpf_get_current_pid_tgid() >> 32;
 	if (pid_filter && pid != pid_filter)
 		return 0;
 	bpf_printk("BPF triggered from PID %d.\n", pid);
 	return 0;
 }
 ```
--- a/prompts/2.md
+++ b/prompts/2.md
@@ -1,30 +1,8 @@
 This is an libbpf program:
 This is bcc code example:

 a minimal example of a BPF application installs a tracepoint handler which is triggered by write syscall
 Here's the code for hello_world.py:

 `minimal` is just that – a minimal practical BPF application example. It
 doesn't use or require BPF CO-RE, so should run on quite old kernels. It
 installs a tracepoint handler which is triggered once every second. It uses
 `bpf_printk()` BPF helper to communicate with the world. 

 ```c
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>

 typedef unsigned int u32;
 typedef int pid_t;
 const pid_t pid_filter = 0;

 char LICENSE[] SEC("license") = "Dual BSD/GPL";

 SEC("tp/syscalls/sys_enter_write")
 int handle_tp(void *ctx)
 {
 	pid_t pid = bpf_get_current_pid_tgid() >> 32;
 	if (pid_filter && pid != pid_filter)
 		return 0;
 	bpf_printk("BPF triggered from PID %d.\n", pid);
 	return 0;
 }
 ```
 ```python
 from bcc import BPF
 BPF(text='int kprobe__sys_clone(void *ctx) { bpf_trace_printk("Hello, World!\\n"); return 0; }').trace_print()
 ```
--- a/prompts/3.md
+++ b/prompts/3.md
@@ -1,8 +1,34 @@
 This is bcc code example:
 You are now a translater from human language to shell bpftrace command. 
 Here are some examples of what you can do with bpftrace shell command:

 Here's the code for hello_world.py:
 # Files opened by process
 bpftrace -e 'tracepoint:syscalls:sys_enter_open { printf("%s %s\n", comm, str(args->filename)); }'

 ```python
 from bcc import BPF
 BPF(text='int kprobe__sys_clone(void *ctx) { bpf_trace_printk("Hello, World!\\n"); return 0; }').trace_print()
 ```
 # Syscall count by program
 bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @[comm] = count(); }'

 # Read bytes by process:
 bpftrace -e 'tracepoint:syscalls:sys_exit_read /args->ret/ { @[comm] = sum(args->ret); }'

 # Read size distribution by process:
 bpftrace -e 'tracepoint:syscalls:sys_exit_read { @[comm] = hist(args->ret); }'

 # Show per-second syscall rates:
 bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @ = count(); } interval:s:1 { print(@); clear(@); }'

 # Trace disk size by process
 bpftrace -e 'tracepoint:block:block_rq_issue { printf("%d %s %d\n", pid, comm, args->bytes); }'

 # Count page faults by process
 bpftrace -e 'software:faults:1 { @[comm] = count(); }'

 # Count LLC cache misses by process name and PID (uses PMCs):
 bpftrace -e 'hardware:cache-misses:1000000 { @[comm, pid] = count(); }'

 # Profile user-level stacks at 99 Hertz, for PID 189:
 bpftrace -e 'profile:hz:99 /pid == 189/ { @[ustack] = count(); }'

 # Files opened, for processes in the root cgroup-v2
 bpftrace -e 'tracepoint:syscalls:sys_enter_openat /cgroup == cgroupid("/sys/fs/cgroup/unified/mycg")/ { printf("%s\n", str(args->filename)); }'

 After you read and learn about bpftrace, I will ask you to write a bpftrace command to do something.