Browse Source

Merge branch 'main' of https://github.com/eunomia-bpf/GPTtrace

pull/3/head
Littlefisher619 2 years ago
parent
commit
38fc63fc6d
6 changed files with 125 additions and 98 deletions
  1. +40
    -27
      GPTtrace.py
  2. +17
    -3
      README.md
  3. BIN
      doc/result.gif
  4. +30
    -34
      prompts/1.md
  5. +6
    -28
      prompts/2.md
  6. +32
    -6
      prompts/3.md

+ 40
- 27
GPTtrace.py View File

@@ -1,7 +1,8 @@
#! /bin/env python3
import argparse
import os
from typing import List
from pathlib import Path
from typing import List, Optional, Tuple

import pygments
from marko.block import FencedCode
@@ -16,6 +17,8 @@ from revChatGPT.V1 import Chatbot
ENV_UUID = "GPTTRACE_CONV_UUID"
ENV_ACCESS_TOKEN = "GPTTRACE_ACCESS_TOKEN"

PROMPTS_DIR = Path("./prompts")


def pretty_print(input, lexer=MarkdownLexer):
tokens = list(pygments.lex(input, lexer=lexer()))
@@ -29,6 +32,7 @@ def main():
)

group = parser.add_mutually_exclusive_group()

group.add_argument(
"-i", "--info", help="Let ChatGPT explain what's eBPF", action="store_true"
)
@@ -40,19 +44,10 @@ def main():
metavar="TEXT",
)
group.add_argument(
"-g",
"--generate",
help="Generate eBPF programs using your input with ChatGPT",
action="store",
metavar="TEXT",
)
"-g", "--generate", help="Generate eBPF programs using your input with ChatGPT", action="store", metavar="TEXT")
group.add_argument(
"--train", help="Train ChatGPT with conversions we provided", action="store_true")

parser.add_argument(
"-v",
"--verbose",
help="Print the prompt and receive message",
action="store_true",
)
parser.add_argument(
"-u",
"--uuid",
@@ -78,10 +73,8 @@ def main():
elif args.execute is not None:
desc: str = args.execute
print("Sending query to ChatGPT: " + desc)
ret_val = generate_result(
chatbot, construct_running_prompt(desc), conv_uuid, args.verbose
)
pretty_print(ret_val)
ret_val, _ = generate_result(
chatbot, construct_running_prompt(desc), conv_uuid, args.verbose)
# print(ret_val)
parsed = make_executable_command(ret_val)
# print(f"Command to run: {parsed}")
@@ -90,15 +83,32 @@ def main():
elif args.generate is not None:
desc: str = args.generate
print("Sending query to ChatGPT: " + desc)
ret_val = generate_result(
chatbot, construct_generate_prompt(desc), conv_uuid, True
)
# print(ret_val)
ret_val, _ = generate_result(
chatbot, construct_generate_prompt(desc), conv_uuid)
pretty_print(ret_val)
parsed = extract_code_blocks(ret_val)
# print(f"Command to run: {parsed}")
with open("generated.bpf.c", "w") as f:
for code in parsed:
f.write(code)
elif args.train:
prompts = os.listdir(PROMPTS_DIR)
prompts.sort()
# conv_uuid could be None, in which we will create a new session and use it in the next steps
session = conv_uuid
for file in prompts:
info = f"Training ChatGPT with `{file}`"
print("-"*len(info))
print(info)
print("-"*len(info))
with open(PROMPTS_DIR/file, "r") as f:
input_data = f.read()
if args.verbose:
print(input_data)
print("-"*len(info))
_, session = generate_result(
chatbot, input_data, conv_uuid, args.verbose)
print(f"Trained session: {session}")
else:
parser.print_help()

@@ -110,6 +120,7 @@ No explanation required, no instruction required, don't tell me how to compile a
What I want is a eBPF program for: {text}."""



def construct_running_prompt(text: str) -> str:
return f"""You are now a translater from human language to {os.uname()[0]} shell bpftrace command.
No explanation required.
@@ -134,22 +145,24 @@ def make_executable_command(command: str) -> str:
return command


def generate_result(
bot: Chatbot, text: str, session: str = None, print_out: bool = False
) -> str:
def generate_result(bot: Chatbot, text: str, session: Optional[str] = None, print_out: bool = False) -> Tuple[str, str]:
from io import StringIO

prev_text = ""
buf = StringIO()
for data in bot.ask(text, conversation_id=session):
message = data["message"][len(prev_text) :]
received_session = ""
for data in bot.ask(
text, conversation_id=session
):
received_session = data["conversation_id"]
message = data["message"][len(prev_text):]
if print_out:
print(message, end="", flush=True)
buf.write(message)
prev_text = data["message"]
if print_out:
print()
return buf.getvalue()
return buf.getvalue(), received_session


def extract_code_blocks(text: str) -> List[str]:


+ 17
- 3
README.md View File

@@ -1,12 +1,20 @@
# GPTtrace 🤖

[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
[![Actions Status](https://github.com/eunomia-bpf/eunomia-bpf/workflows/Ubuntu/badge.svg)](https://github.com/eunomia-bpf/eunomia-bpf/actions)
[![DeepSource](https://deepsource.io/gh/eunomia-bpf/eunomia-bpf.svg/?label=active+issues&show_trend=true&token=rcSI3J1-gpwLIgZWtKZC-N6C)](https://deepsource.io/gh/eunomia-bpf/eunomia-bpf/?ref=repository-badge)
[![CodeFactor](https://www.codefactor.io/repository/github/eunomia-bpf/eunomia-bpf/badge)](https://www.codefactor.io/repository/github/eunomia-bpf/eunomia-bpf)


Generate eBPF programs and tracing with ChatGPT and natural language

## Key Features 💡

### Interact and Tracing your Linux with natural language, it can tell how to write eBPF programs in `BCC`, `libbpf` styles.

![result](doc/result.png)
example: tracing with Count page faults by process

![result](doc/result.gif)

### Generate eBPF programs with natural language

@@ -43,12 +51,18 @@ optional arguments:

### Use prompts to teach ChatGPT to write eBPF programs

TODO
```sh
./GPTtrace.py -t
```

This will use the material in the `prompts` directory to teach ChatGPT to write eBPF programs in bpftrace, libbpf, and BCC styles. You can also do that manually by sending the prompts to ChatGPT in the Website.

### start your tracing! 🚀

For example:

```sh
./GPTtrace -e "Count page faults by process"
./GPTtrace.py -e "Count page faults by process"
```

## Installation 🔧


BIN
doc/result.gif View File

Before After
Width: 600  |  Height: 338  |  Size: 311 kB

+ 30
- 34
prompts/1.md View File

@@ -1,34 +1,30 @@
You are now a translater from human language to shell bpftrace command.
Here are some examples of what you can do with bpftrace shell command:

# Files opened by process
bpftrace -e 'tracepoint:syscalls:sys_enter_open { printf("%s %s\n", comm, str(args->filename)); }'

# Syscall count by program
bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @[comm] = count(); }'

# Read bytes by process:
bpftrace -e 'tracepoint:syscalls:sys_exit_read /args->ret/ { @[comm] = sum(args->ret); }'

# Read size distribution by process:
bpftrace -e 'tracepoint:syscalls:sys_exit_read { @[comm] = hist(args->ret); }'

# Show per-second syscall rates:
bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @ = count(); } interval:s:1 { print(@); clear(@); }'

# Trace disk size by process
bpftrace -e 'tracepoint:block:block_rq_issue { printf("%d %s %d\n", pid, comm, args->bytes); }'

# Count page faults by process
bpftrace -e 'software:faults:1 { @[comm] = count(); }'

# Count LLC cache misses by process name and PID (uses PMCs):
bpftrace -e 'hardware:cache-misses:1000000 { @[comm, pid] = count(); }'

# Profile user-level stacks at 99 Hertz, for PID 189:
bpftrace -e 'profile:hz:99 /pid == 189/ { @[ustack] = count(); }'

# Files opened, for processes in the root cgroup-v2
bpftrace -e 'tracepoint:syscalls:sys_enter_openat /cgroup == cgroupid("/sys/fs/cgroup/unified/mycg")/ { printf("%s\n", str(args->filename)); }'

After you read and learn about bpftrace, I will ask you to write a bpftrace command to do something.
This is an libbpf program:

a minimal example of a BPF application installs a tracepoint handler which is triggered by write syscall

`minimal` is just that – a minimal practical BPF application example. It
doesn't use or require BPF CO-RE, so should run on quite old kernels. It
installs a tracepoint handler which is triggered once every second. It uses
`bpf_printk()` BPF helper to communicate with the world.

```c
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>

typedef unsigned int u32;
typedef int pid_t;
const pid_t pid_filter = 0;

char LICENSE[] SEC("license") = "Dual BSD/GPL";

SEC("tp/syscalls/sys_enter_write")
int handle_tp(void *ctx)
{
pid_t pid = bpf_get_current_pid_tgid() >> 32;
if (pid_filter && pid != pid_filter)
return 0;
bpf_printk("BPF triggered from PID %d.\n", pid);
return 0;
}
```

+ 6
- 28
prompts/2.md View File

@@ -1,30 +1,8 @@
This is an libbpf program:
This is bcc code example:

a minimal example of a BPF application installs a tracepoint handler which is triggered by write syscall
Here's the code for hello_world.py:

`minimal` is just that – a minimal practical BPF application example. It
doesn't use or require BPF CO-RE, so should run on quite old kernels. It
installs a tracepoint handler which is triggered once every second. It uses
`bpf_printk()` BPF helper to communicate with the world.

```c
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>

typedef unsigned int u32;
typedef int pid_t;
const pid_t pid_filter = 0;

char LICENSE[] SEC("license") = "Dual BSD/GPL";

SEC("tp/syscalls/sys_enter_write")
int handle_tp(void *ctx)
{
pid_t pid = bpf_get_current_pid_tgid() >> 32;
if (pid_filter && pid != pid_filter)
return 0;
bpf_printk("BPF triggered from PID %d.\n", pid);
return 0;
}
```
```python
from bcc import BPF
BPF(text='int kprobe__sys_clone(void *ctx) { bpf_trace_printk("Hello, World!\\n"); return 0; }').trace_print()
```

+ 32
- 6
prompts/3.md View File

@@ -1,8 +1,34 @@
This is bcc code example:
You are now a translater from human language to shell bpftrace command.
Here are some examples of what you can do with bpftrace shell command:

Here's the code for hello_world.py:
# Files opened by process
bpftrace -e 'tracepoint:syscalls:sys_enter_open { printf("%s %s\n", comm, str(args->filename)); }'

```python
from bcc import BPF
BPF(text='int kprobe__sys_clone(void *ctx) { bpf_trace_printk("Hello, World!\\n"); return 0; }').trace_print()
```
# Syscall count by program
bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @[comm] = count(); }'

# Read bytes by process:
bpftrace -e 'tracepoint:syscalls:sys_exit_read /args->ret/ { @[comm] = sum(args->ret); }'

# Read size distribution by process:
bpftrace -e 'tracepoint:syscalls:sys_exit_read { @[comm] = hist(args->ret); }'

# Show per-second syscall rates:
bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @ = count(); } interval:s:1 { print(@); clear(@); }'

# Trace disk size by process
bpftrace -e 'tracepoint:block:block_rq_issue { printf("%d %s %d\n", pid, comm, args->bytes); }'

# Count page faults by process
bpftrace -e 'software:faults:1 { @[comm] = count(); }'

# Count LLC cache misses by process name and PID (uses PMCs):
bpftrace -e 'hardware:cache-misses:1000000 { @[comm, pid] = count(); }'

# Profile user-level stacks at 99 Hertz, for PID 189:
bpftrace -e 'profile:hz:99 /pid == 189/ { @[ustack] = count(); }'

# Files opened, for processes in the root cgroup-v2
bpftrace -e 'tracepoint:syscalls:sys_enter_openat /cgroup == cgroupid("/sys/fs/cgroup/unified/mycg")/ { printf("%s\n", str(args->filename)); }'

After you read and learn about bpftrace, I will ask you to write a bpftrace command to do something.

Loading…
Cancel
Save