Browse Source

update: support training ChatGPT

pull/3/head
officeyutong 2 years ago
parent
commit
4c498cabce
4 changed files with 102 additions and 74 deletions
  1. +34
    -6
      GPTtrace.py
  2. +30
    -34
      prompts/1.md
  3. +6
    -28
      prompts/2.md
  4. +32
    -6
      prompts/3.md

+ 34
- 6
GPTtrace.py View File

@@ -3,14 +3,17 @@ import os
import argparse import argparse


from revChatGPT.V1 import Chatbot from revChatGPT.V1 import Chatbot
from typing import List
from typing import List, Optional, Tuple
from marko.parser import Parser from marko.parser import Parser
from marko.block import FencedCode from marko.block import FencedCode
from marko.inline import RawText from marko.inline import RawText
from pathlib import Path


ENV_UUID = "GPTTRACE_CONV_UUID" ENV_UUID = "GPTTRACE_CONV_UUID"
ENV_ACCESS_TOKEN = "GPTTRACE_ACCESS_TOKEN" ENV_ACCESS_TOKEN = "GPTTRACE_ACCESS_TOKEN"


PROMPTS_DIR = Path("./prompts")



def main(): def main():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
@@ -18,13 +21,16 @@ def main():
description='Use ChatGPT to write eBPF programs (bpftrace, etc.)') description='Use ChatGPT to write eBPF programs (bpftrace, etc.)')


group = parser.add_mutually_exclusive_group() group = parser.add_mutually_exclusive_group()

group.add_argument( group.add_argument(
"-i", "--info", help="Let ChatGPT explain what's eBPF", action="store_true") "-i", "--info", help="Let ChatGPT explain what's eBPF", action="store_true")
group.add_argument( group.add_argument(
"-e", "--execute", help="Generate commands using your input with ChatGPT, and run it", action="store", metavar="TEXT") "-e", "--execute", help="Generate commands using your input with ChatGPT, and run it", action="store", metavar="TEXT")
group.add_argument( group.add_argument(
"-g", "--generate", help="Generate eBPF programs using your input with ChatGPT", action="store", metavar="TEXT") "-g", "--generate", help="Generate eBPF programs using your input with ChatGPT", action="store", metavar="TEXT")
group.add_argument(
"--train", help="Train ChatGPT with conversions we provided", action="store_true")

parser.add_argument( parser.add_argument(
"-v", "--verbose", help="Print the prompt and receive message", action="store_true") "-v", "--verbose", help="Print the prompt and receive message", action="store_true")
parser.add_argument( parser.add_argument(
@@ -45,7 +51,7 @@ def main():
elif args.execute is not None: elif args.execute is not None:
desc: str = args.execute desc: str = args.execute
print("Sending query to ChatGPT: " + desc) print("Sending query to ChatGPT: " + desc)
ret_val = generate_result(
ret_val, _ = generate_result(
chatbot, construct_running_prompt(desc), conv_uuid, args.verbose) chatbot, construct_running_prompt(desc), conv_uuid, args.verbose)
# print(ret_val) # print(ret_val)
parsed = make_executable_command(ret_val) parsed = make_executable_command(ret_val)
@@ -55,7 +61,7 @@ def main():
elif args.generate is not None: elif args.generate is not None:
desc: str = args.generate desc: str = args.generate
print("Sending query to ChatGPT: " + desc) print("Sending query to ChatGPT: " + desc)
ret_val = generate_result(
ret_val, _ = generate_result(
chatbot, construct_generate_prompt(desc), conv_uuid, True) chatbot, construct_generate_prompt(desc), conv_uuid, True)
# print(ret_val) # print(ret_val)
parsed = extract_code_blocks(ret_val) parsed = extract_code_blocks(ret_val)
@@ -63,15 +69,35 @@ def main():
with open("generated.bpf.c", "w") as f: with open("generated.bpf.c", "w") as f:
for code in parsed: for code in parsed:
f.write(code) f.write(code)
elif args.train:
prompts = os.listdir(PROMPTS_DIR)
prompts.sort()
# conv_uuid could be None, in which we will create a new session and use it in the next steps
session = conv_uuid
for file in prompts:
info = f"Training ChatGPT with `{file}`"
print("-"*len(info))
print(info)
print("-"*len(info))
with open(PROMPTS_DIR/file, "r") as f:
input_data = f.read()
if args.verbose:
print(input_data)
print("-"*len(info))
_, session = generate_result(
chatbot, input_data, conv_uuid, args.verbose)
print(f"Trained session: {session}")
else: else:
parser.print_help() parser.print_help()



def construct_generate_prompt(text: str) -> str: def construct_generate_prompt(text: str) -> str:
return f'''You are now a translater from human language to {os.uname()[0]} eBPF programs. return f'''You are now a translater from human language to {os.uname()[0]} eBPF programs.
Please write eBPF programs for me. Please write eBPF programs for me.
No explanation required, no instruction required, don't tell me how to compile and run. No explanation required, no instruction required, don't tell me how to compile and run.
What I want is a eBPF program for: {text}.''' What I want is a eBPF program for: {text}.'''



def construct_running_prompt(text: str) -> str: def construct_running_prompt(text: str) -> str:
return f'''You are now a translater from human language to {os.uname()[0]} shell bpftrace command. return f'''You are now a translater from human language to {os.uname()[0]} shell bpftrace command.
No explanation required. No explanation required.
@@ -96,13 +122,15 @@ def make_executable_command(command: str) -> str:
return command return command




def generate_result(bot: Chatbot, text: str, session: str = None, print_out: bool = False) -> str:
def generate_result(bot: Chatbot, text: str, session: Optional[str] = None, print_out: bool = False) -> Tuple[str, str]:
from io import StringIO from io import StringIO
prev_text = "" prev_text = ""
buf = StringIO() buf = StringIO()
received_session = ""
for data in bot.ask( for data in bot.ask(
text, conversation_id=session text, conversation_id=session
): ):
received_session = data["conversation_id"]
message = data["message"][len(prev_text):] message = data["message"][len(prev_text):]
if print_out: if print_out:
print(message, end="", flush=True) print(message, end="", flush=True)
@@ -110,7 +138,7 @@ def generate_result(bot: Chatbot, text: str, session: str = None, print_out: boo
prev_text = data["message"] prev_text = data["message"]
if print_out: if print_out:
print() print()
return buf.getvalue()
return buf.getvalue(), received_session




def extract_code_blocks(text: str) -> List[str]: def extract_code_blocks(text: str) -> List[str]:


+ 30
- 34
prompts/1.md View File

@@ -1,34 +1,30 @@
You are now a translater from human language to shell bpftrace command.
Here are some examples of what you can do with bpftrace shell command:

# Files opened by process
bpftrace -e 'tracepoint:syscalls:sys_enter_open { printf("%s %s\n", comm, str(args->filename)); }'

# Syscall count by program
bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @[comm] = count(); }'

# Read bytes by process:
bpftrace -e 'tracepoint:syscalls:sys_exit_read /args->ret/ { @[comm] = sum(args->ret); }'

# Read size distribution by process:
bpftrace -e 'tracepoint:syscalls:sys_exit_read { @[comm] = hist(args->ret); }'

# Show per-second syscall rates:
bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @ = count(); } interval:s:1 { print(@); clear(@); }'

# Trace disk size by process
bpftrace -e 'tracepoint:block:block_rq_issue { printf("%d %s %d\n", pid, comm, args->bytes); }'

# Count page faults by process
bpftrace -e 'software:faults:1 { @[comm] = count(); }'

# Count LLC cache misses by process name and PID (uses PMCs):
bpftrace -e 'hardware:cache-misses:1000000 { @[comm, pid] = count(); }'

# Profile user-level stacks at 99 Hertz, for PID 189:
bpftrace -e 'profile:hz:99 /pid == 189/ { @[ustack] = count(); }'

# Files opened, for processes in the root cgroup-v2
bpftrace -e 'tracepoint:syscalls:sys_enter_openat /cgroup == cgroupid("/sys/fs/cgroup/unified/mycg")/ { printf("%s\n", str(args->filename)); }'

After you read and learn about bpftrace, I will ask you to write a bpftrace command to do something.
This is an libbpf program:

a minimal example of a BPF application installs a tracepoint handler which is triggered by write syscall

`minimal` is just that – a minimal practical BPF application example. It
doesn't use or require BPF CO-RE, so should run on quite old kernels. It
installs a tracepoint handler which is triggered once every second. It uses
`bpf_printk()` BPF helper to communicate with the world.

```c
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>

typedef unsigned int u32;
typedef int pid_t;
const pid_t pid_filter = 0;

char LICENSE[] SEC("license") = "Dual BSD/GPL";

SEC("tp/syscalls/sys_enter_write")
int handle_tp(void *ctx)
{
pid_t pid = bpf_get_current_pid_tgid() >> 32;
if (pid_filter && pid != pid_filter)
return 0;
bpf_printk("BPF triggered from PID %d.\n", pid);
return 0;
}
```

+ 6
- 28
prompts/2.md View File

@@ -1,30 +1,8 @@
This is an libbpf program:
This is bcc code example:


a minimal example of a BPF application installs a tracepoint handler which is triggered by write syscall
Here's the code for hello_world.py:


`minimal` is just that – a minimal practical BPF application example. It
doesn't use or require BPF CO-RE, so should run on quite old kernels. It
installs a tracepoint handler which is triggered once every second. It uses
`bpf_printk()` BPF helper to communicate with the world.

```c
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>

typedef unsigned int u32;
typedef int pid_t;
const pid_t pid_filter = 0;

char LICENSE[] SEC("license") = "Dual BSD/GPL";

SEC("tp/syscalls/sys_enter_write")
int handle_tp(void *ctx)
{
pid_t pid = bpf_get_current_pid_tgid() >> 32;
if (pid_filter && pid != pid_filter)
return 0;
bpf_printk("BPF triggered from PID %d.\n", pid);
return 0;
}
```
```python
from bcc import BPF
BPF(text='int kprobe__sys_clone(void *ctx) { bpf_trace_printk("Hello, World!\\n"); return 0; }').trace_print()
```

+ 32
- 6
prompts/3.md View File

@@ -1,8 +1,34 @@
This is bcc code example:
You are now a translater from human language to shell bpftrace command.
Here are some examples of what you can do with bpftrace shell command:


Here's the code for hello_world.py:
# Files opened by process
bpftrace -e 'tracepoint:syscalls:sys_enter_open { printf("%s %s\n", comm, str(args->filename)); }'


```python
from bcc import BPF
BPF(text='int kprobe__sys_clone(void *ctx) { bpf_trace_printk("Hello, World!\\n"); return 0; }').trace_print()
```
# Syscall count by program
bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @[comm] = count(); }'

# Read bytes by process:
bpftrace -e 'tracepoint:syscalls:sys_exit_read /args->ret/ { @[comm] = sum(args->ret); }'

# Read size distribution by process:
bpftrace -e 'tracepoint:syscalls:sys_exit_read { @[comm] = hist(args->ret); }'

# Show per-second syscall rates:
bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @ = count(); } interval:s:1 { print(@); clear(@); }'

# Trace disk size by process
bpftrace -e 'tracepoint:block:block_rq_issue { printf("%d %s %d\n", pid, comm, args->bytes); }'

# Count page faults by process
bpftrace -e 'software:faults:1 { @[comm] = count(); }'

# Count LLC cache misses by process name and PID (uses PMCs):
bpftrace -e 'hardware:cache-misses:1000000 { @[comm, pid] = count(); }'

# Profile user-level stacks at 99 Hertz, for PID 189:
bpftrace -e 'profile:hz:99 /pid == 189/ { @[ustack] = count(); }'

# Files opened, for processes in the root cgroup-v2
bpftrace -e 'tracepoint:syscalls:sys_enter_openat /cgroup == cgroupid("/sys/fs/cgroup/unified/mycg")/ { printf("%s\n", str(args->filename)); }'

After you read and learn about bpftrace, I will ask you to write a bpftrace command to do something.

Loading…
Cancel
Save