Browse Source

Merge branch 'main' of https://github.com/eunomia-bpf/GPTtrace

pull/3/head
Littlefisher619 2 years ago
parent
commit
38fc63fc6d
6 changed files with 125 additions and 98 deletions
  1. +40
    -27
      GPTtrace.py
  2. +17
    -3
      README.md
  3. BIN
      doc/result.gif
  4. +30
    -34
      prompts/1.md
  5. +6
    -28
      prompts/2.md
  6. +32
    -6
      prompts/3.md

+ 40
- 27
GPTtrace.py View File

@@ -1,7 +1,8 @@
#! /bin/env python3 #! /bin/env python3
import argparse import argparse
import os import os
from typing import List
from pathlib import Path
from typing import List, Optional, Tuple


import pygments import pygments
from marko.block import FencedCode from marko.block import FencedCode
@@ -16,6 +17,8 @@ from revChatGPT.V1 import Chatbot
ENV_UUID = "GPTTRACE_CONV_UUID" ENV_UUID = "GPTTRACE_CONV_UUID"
ENV_ACCESS_TOKEN = "GPTTRACE_ACCESS_TOKEN" ENV_ACCESS_TOKEN = "GPTTRACE_ACCESS_TOKEN"


PROMPTS_DIR = Path("./prompts")



def pretty_print(input, lexer=MarkdownLexer): def pretty_print(input, lexer=MarkdownLexer):
tokens = list(pygments.lex(input, lexer=lexer())) tokens = list(pygments.lex(input, lexer=lexer()))
@@ -29,6 +32,7 @@ def main():
) )


group = parser.add_mutually_exclusive_group() group = parser.add_mutually_exclusive_group()

group.add_argument( group.add_argument(
"-i", "--info", help="Let ChatGPT explain what's eBPF", action="store_true" "-i", "--info", help="Let ChatGPT explain what's eBPF", action="store_true"
) )
@@ -40,19 +44,10 @@ def main():
metavar="TEXT", metavar="TEXT",
) )
group.add_argument( group.add_argument(
"-g",
"--generate",
help="Generate eBPF programs using your input with ChatGPT",
action="store",
metavar="TEXT",
)
"-g", "--generate", help="Generate eBPF programs using your input with ChatGPT", action="store", metavar="TEXT")
group.add_argument(
"--train", help="Train ChatGPT with conversions we provided", action="store_true")


parser.add_argument(
"-v",
"--verbose",
help="Print the prompt and receive message",
action="store_true",
)
parser.add_argument( parser.add_argument(
"-u", "-u",
"--uuid", "--uuid",
@@ -78,10 +73,8 @@ def main():
elif args.execute is not None: elif args.execute is not None:
desc: str = args.execute desc: str = args.execute
print("Sending query to ChatGPT: " + desc) print("Sending query to ChatGPT: " + desc)
ret_val = generate_result(
chatbot, construct_running_prompt(desc), conv_uuid, args.verbose
)
pretty_print(ret_val)
ret_val, _ = generate_result(
chatbot, construct_running_prompt(desc), conv_uuid, args.verbose)
# print(ret_val) # print(ret_val)
parsed = make_executable_command(ret_val) parsed = make_executable_command(ret_val)
# print(f"Command to run: {parsed}") # print(f"Command to run: {parsed}")
@@ -90,15 +83,32 @@ def main():
elif args.generate is not None: elif args.generate is not None:
desc: str = args.generate desc: str = args.generate
print("Sending query to ChatGPT: " + desc) print("Sending query to ChatGPT: " + desc)
ret_val = generate_result(
chatbot, construct_generate_prompt(desc), conv_uuid, True
)
# print(ret_val)
ret_val, _ = generate_result(
chatbot, construct_generate_prompt(desc), conv_uuid)
pretty_print(ret_val)
parsed = extract_code_blocks(ret_val) parsed = extract_code_blocks(ret_val)
# print(f"Command to run: {parsed}") # print(f"Command to run: {parsed}")
with open("generated.bpf.c", "w") as f: with open("generated.bpf.c", "w") as f:
for code in parsed: for code in parsed:
f.write(code) f.write(code)
elif args.train:
prompts = os.listdir(PROMPTS_DIR)
prompts.sort()
# conv_uuid could be None, in which we will create a new session and use it in the next steps
session = conv_uuid
for file in prompts:
info = f"Training ChatGPT with `{file}`"
print("-"*len(info))
print(info)
print("-"*len(info))
with open(PROMPTS_DIR/file, "r") as f:
input_data = f.read()
if args.verbose:
print(input_data)
print("-"*len(info))
_, session = generate_result(
chatbot, input_data, conv_uuid, args.verbose)
print(f"Trained session: {session}")
else: else:
parser.print_help() parser.print_help()


@@ -110,6 +120,7 @@ No explanation required, no instruction required, don't tell me how to compile a
What I want is a eBPF program for: {text}.""" What I want is a eBPF program for: {text}."""





def construct_running_prompt(text: str) -> str: def construct_running_prompt(text: str) -> str:
return f"""You are now a translater from human language to {os.uname()[0]} shell bpftrace command. return f"""You are now a translater from human language to {os.uname()[0]} shell bpftrace command.
No explanation required. No explanation required.
@@ -134,22 +145,24 @@ def make_executable_command(command: str) -> str:
return command return command




def generate_result(
bot: Chatbot, text: str, session: str = None, print_out: bool = False
) -> str:
def generate_result(bot: Chatbot, text: str, session: Optional[str] = None, print_out: bool = False) -> Tuple[str, str]:
from io import StringIO from io import StringIO


prev_text = "" prev_text = ""
buf = StringIO() buf = StringIO()
for data in bot.ask(text, conversation_id=session):
message = data["message"][len(prev_text) :]
received_session = ""
for data in bot.ask(
text, conversation_id=session
):
received_session = data["conversation_id"]
message = data["message"][len(prev_text):]
if print_out: if print_out:
print(message, end="", flush=True) print(message, end="", flush=True)
buf.write(message) buf.write(message)
prev_text = data["message"] prev_text = data["message"]
if print_out: if print_out:
print() print()
return buf.getvalue()
return buf.getvalue(), received_session




def extract_code_blocks(text: str) -> List[str]: def extract_code_blocks(text: str) -> List[str]:


+ 17
- 3
README.md View File

@@ -1,12 +1,20 @@
# GPTtrace 🤖 # GPTtrace 🤖


[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
[![Actions Status](https://github.com/eunomia-bpf/eunomia-bpf/workflows/Ubuntu/badge.svg)](https://github.com/eunomia-bpf/eunomia-bpf/actions)
[![DeepSource](https://deepsource.io/gh/eunomia-bpf/eunomia-bpf.svg/?label=active+issues&show_trend=true&token=rcSI3J1-gpwLIgZWtKZC-N6C)](https://deepsource.io/gh/eunomia-bpf/eunomia-bpf/?ref=repository-badge)
[![CodeFactor](https://www.codefactor.io/repository/github/eunomia-bpf/eunomia-bpf/badge)](https://www.codefactor.io/repository/github/eunomia-bpf/eunomia-bpf)


Generate eBPF programs and tracing with ChatGPT and natural language Generate eBPF programs and tracing with ChatGPT and natural language


## Key Features 💡 ## Key Features 💡


### Interact and Tracing your Linux with natural language, it can tell how to write eBPF programs in `BCC`, `libbpf` styles. ### Interact and Tracing your Linux with natural language, it can tell how to write eBPF programs in `BCC`, `libbpf` styles.


![result](doc/result.png)
example: tracing with Count page faults by process

![result](doc/result.gif)


### Generate eBPF programs with natural language ### Generate eBPF programs with natural language


@@ -43,12 +51,18 @@ optional arguments:


### Use prompts to teach ChatGPT to write eBPF programs ### Use prompts to teach ChatGPT to write eBPF programs


TODO
```sh
./GPTtrace.py -t
```

This will use the material in the `prompts` directory to teach ChatGPT to write eBPF programs in bpftrace, libbpf, and BCC styles. You can also do that manually by sending the prompts to ChatGPT in the Website.


### start your tracing! 🚀 ### start your tracing! 🚀


For example:

```sh ```sh
./GPTtrace -e "Count page faults by process"
./GPTtrace.py -e "Count page faults by process"
``` ```


## Installation 🔧 ## Installation 🔧


BIN
doc/result.gif View File

Before After
Width: 600  |  Height: 338  |  Size: 311 kB

+ 30
- 34
prompts/1.md View File

@@ -1,34 +1,30 @@
You are now a translater from human language to shell bpftrace command.
Here are some examples of what you can do with bpftrace shell command:

# Files opened by process
bpftrace -e 'tracepoint:syscalls:sys_enter_open { printf("%s %s\n", comm, str(args->filename)); }'

# Syscall count by program
bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @[comm] = count(); }'

# Read bytes by process:
bpftrace -e 'tracepoint:syscalls:sys_exit_read /args->ret/ { @[comm] = sum(args->ret); }'

# Read size distribution by process:
bpftrace -e 'tracepoint:syscalls:sys_exit_read { @[comm] = hist(args->ret); }'

# Show per-second syscall rates:
bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @ = count(); } interval:s:1 { print(@); clear(@); }'

# Trace disk size by process
bpftrace -e 'tracepoint:block:block_rq_issue { printf("%d %s %d\n", pid, comm, args->bytes); }'

# Count page faults by process
bpftrace -e 'software:faults:1 { @[comm] = count(); }'

# Count LLC cache misses by process name and PID (uses PMCs):
bpftrace -e 'hardware:cache-misses:1000000 { @[comm, pid] = count(); }'

# Profile user-level stacks at 99 Hertz, for PID 189:
bpftrace -e 'profile:hz:99 /pid == 189/ { @[ustack] = count(); }'

# Files opened, for processes in the root cgroup-v2
bpftrace -e 'tracepoint:syscalls:sys_enter_openat /cgroup == cgroupid("/sys/fs/cgroup/unified/mycg")/ { printf("%s\n", str(args->filename)); }'

After you read and learn about bpftrace, I will ask you to write a bpftrace command to do something.
This is an libbpf program:

a minimal example of a BPF application installs a tracepoint handler which is triggered by write syscall

`minimal` is just that – a minimal practical BPF application example. It
doesn't use or require BPF CO-RE, so should run on quite old kernels. It
installs a tracepoint handler which is triggered once every second. It uses
`bpf_printk()` BPF helper to communicate with the world.

```c
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>

typedef unsigned int u32;
typedef int pid_t;
const pid_t pid_filter = 0;

char LICENSE[] SEC("license") = "Dual BSD/GPL";

SEC("tp/syscalls/sys_enter_write")
int handle_tp(void *ctx)
{
pid_t pid = bpf_get_current_pid_tgid() >> 32;
if (pid_filter && pid != pid_filter)
return 0;
bpf_printk("BPF triggered from PID %d.\n", pid);
return 0;
}
```

+ 6
- 28
prompts/2.md View File

@@ -1,30 +1,8 @@
This is an libbpf program:
This is bcc code example:


a minimal example of a BPF application installs a tracepoint handler which is triggered by write syscall
Here's the code for hello_world.py:


`minimal` is just that – a minimal practical BPF application example. It
doesn't use or require BPF CO-RE, so should run on quite old kernels. It
installs a tracepoint handler which is triggered once every second. It uses
`bpf_printk()` BPF helper to communicate with the world.

```c
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>

typedef unsigned int u32;
typedef int pid_t;
const pid_t pid_filter = 0;

char LICENSE[] SEC("license") = "Dual BSD/GPL";

SEC("tp/syscalls/sys_enter_write")
int handle_tp(void *ctx)
{
pid_t pid = bpf_get_current_pid_tgid() >> 32;
if (pid_filter && pid != pid_filter)
return 0;
bpf_printk("BPF triggered from PID %d.\n", pid);
return 0;
}
```
```python
from bcc import BPF
BPF(text='int kprobe__sys_clone(void *ctx) { bpf_trace_printk("Hello, World!\\n"); return 0; }').trace_print()
```

+ 32
- 6
prompts/3.md View File

@@ -1,8 +1,34 @@
This is bcc code example:
You are now a translater from human language to shell bpftrace command.
Here are some examples of what you can do with bpftrace shell command:


Here's the code for hello_world.py:
# Files opened by process
bpftrace -e 'tracepoint:syscalls:sys_enter_open { printf("%s %s\n", comm, str(args->filename)); }'


```python
from bcc import BPF
BPF(text='int kprobe__sys_clone(void *ctx) { bpf_trace_printk("Hello, World!\\n"); return 0; }').trace_print()
```
# Syscall count by program
bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @[comm] = count(); }'

# Read bytes by process:
bpftrace -e 'tracepoint:syscalls:sys_exit_read /args->ret/ { @[comm] = sum(args->ret); }'

# Read size distribution by process:
bpftrace -e 'tracepoint:syscalls:sys_exit_read { @[comm] = hist(args->ret); }'

# Show per-second syscall rates:
bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @ = count(); } interval:s:1 { print(@); clear(@); }'

# Trace disk size by process
bpftrace -e 'tracepoint:block:block_rq_issue { printf("%d %s %d\n", pid, comm, args->bytes); }'

# Count page faults by process
bpftrace -e 'software:faults:1 { @[comm] = count(); }'

# Count LLC cache misses by process name and PID (uses PMCs):
bpftrace -e 'hardware:cache-misses:1000000 { @[comm, pid] = count(); }'

# Profile user-level stacks at 99 Hertz, for PID 189:
bpftrace -e 'profile:hz:99 /pid == 189/ { @[ustack] = count(); }'

# Files opened, for processes in the root cgroup-v2
bpftrace -e 'tracepoint:syscalls:sys_enter_openat /cgroup == cgroupid("/sys/fs/cgroup/unified/mycg")/ { printf("%s\n", str(args->filename)); }'

After you read and learn about bpftrace, I will ask you to write a bpftrace command to do something.

Loading…
Cancel
Save