/* malloc(size_t n) Returns a pointer to a newly allocated chunk of at least n bytes, or null if no space is available. Additionally, on failure, errno is set to ENOMEM on ANSI C systems. If n is zero, malloc returns a minumum-sized chunk. (The minimum size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit systems.) On most systems, size_t is an unsigned type, so calls with negative arguments are interpreted as requests for huge amounts of space, which will often fail. The maximum supported value of n differs across systems, but is in all cases less than the maximum representable value of a size_t. */
/* free(void* p) Releases the chunk of memory pointed to by p, that had been previously allocated using malloc or a related routine such as realloc. It has no effect if p is null. It can have arbitrary (i.e., bad!) effects if p has already been freed. Unless disabled (using mallopt), freeing very large spaces will when possible, automatically trigger operations that give back unused memory to the system, thus reducing program footprint. */
可以看出,free 函数会释放由 p 所指向的内存块。这个内存块有可能是通过 malloc 函数得到的,也有可能是通过相关的函数 realloc 得到的。
sploitfun@sploitfun-VirtualBox:~/ptmalloc.ppt/mthread$ ./mthread Welcome to per thread arena example::6501 Before malloc in main thread After malloc and before free in main thread After free in main thread ... sploitfun@sploitfun-VirtualBox:~/lsploits/hof/ptmalloc.ppt/mthread$ cat /proc/6501/maps 08048000-08049000 r-xp 00000000 08:01 539625 /home/sploitfun/ptmalloc.ppt/mthread/mthread 08049000-0804a000 r--p 00000000 08:01 539625 /home/sploitfun/ptmalloc.ppt/mthread/mthread 0804a000-0804b000 rw-p 00001000 08:01 539625 /home/sploitfun/ptmalloc.ppt/mthread/mthread 0804b000-0806c000 rw-p 00000000 00:00 0 [heap] b7e05000-b7e07000 rw-p 00000000 00:00 0 ... sploitfun@sploitfun-VirtualBox:~/ptmalloc.ppt/mthread$
sploitfun@sploitfun-VirtualBox:~/ptmalloc.ppt/mthread$ ./mthread Welcome to per thread arena example::6501 Before malloc in main thread After malloc and before free in main thread After free in main thread Before malloc in thread 1 ... sploitfun@sploitfun-VirtualBox:~/ptmalloc.ppt/mthread$ cat /proc/6501/maps 08048000-08049000 r-xp 00000000 08:01 539625 /home/sploitfun/ptmalloc.ppt/mthread/mthread 08049000-0804a000 r--p 00000000 08:01 539625 /home/sploitfun/ptmalloc.ppt/mthread/mthread 0804a000-0804b000 rw-p 00001000 08:01 539625 /home/sploitfun/ptmalloc.ppt/mthread/mthread 0804b000-0806c000 rw-p 00000000 00:00 0 [heap] b7604000-b7605000 ---p 00000000 00:00 0 b7605000-b7e07000 rw-p 00000000 00:00 0 [stack:6594] ... sploitfun@sploitfun-VirtualBox:~/ptmalloc.ppt/mthread$
sploitfun@sploitfun-VirtualBox:~/ptmalloc.ppt/mthread$ ./mthread Welcome to per thread arena example::6501 Before malloc in main thread After malloc and before free in main thread After free in main thread Before malloc in thread 1 After malloc and before free in thread 1 ... sploitfun@sploitfun-VirtualBox:~/ptmalloc.ppt/mthread$ cat /proc/6501/maps 08048000-08049000 r-xp 00000000 08:01 539625 /home/sploitfun/ptmalloc.ppt/mthread/mthread 08049000-0804a000 r--p 00000000 08:01 539625 /home/sploitfun/ptmalloc.ppt/mthread/mthread 0804a000-0804b000 rw-p 00001000 08:01 539625 /home/sploitfun/ptmalloc.ppt/mthread/mthread 0804b000-0806c000 rw-p 00000000 00:00 0 [heap] b7500000-b7521000 rw-p 00000000 00:00 0 b7521000-b7600000 ---p 00000000 00:00 0 b7604000-b7605000 ---p 00000000 00:00 0 b7605000-b7e07000 rw-p 00000000 00:00 0 [stack:6594] ... sploitfun@sploitfun-VirtualBox:~/ptmalloc.ppt/mthread$
sploitfun@sploitfun-VirtualBox:~/ptmalloc.ppt/mthread$ ./mthread Welcome to per thread arena example::6501 Before malloc in main thread After malloc and before free in main thread After free in main thread Before malloc in thread 1 After malloc and before free in thread 1 After free in thread 1 ... sploitfun@sploitfun-VirtualBox:~/ptmalloc.ppt/mthread$ cat /proc/6501/maps 08048000-08049000 r-xp 00000000 08:01 539625 /home/sploitfun/ptmalloc.ppt/mthread/mthread 08049000-0804a000 r--p 00000000 08:01 539625 /home/sploitfun/ptmalloc.ppt/mthread/mthread 0804a000-0804b000 rw-p 00001000 08:01 539625 /home/sploitfun/ptmalloc.ppt/mthread/mthread 0804b000-0806c000 rw-p 00000000 00:00 0 [heap] b7500000-b7521000 rw-p 00000000 00:00 0 b7521000-b7600000 ---p 00000000 00:00 0 b7604000-b7605000 ---p 00000000 00:00 0 b7605000-b7e07000 rw-p 00000000 00:00 0 [stack:6594] ... sploitfun@sploitfun-VirtualBox:~/ptmalloc.ppt/mthread$
typedefstruct _heap_info { mstate ar_ptr; /* Arena for this heap. */ struct _heap_info *prev;/* Previous heap. */ size_t size; /* Current size in bytes. */ size_t mprotect_size; /* Size in bytes that has been mprotected PROT_READ|PROT_WRITE. */ /* Make sure the following data is properly aligned, particularly that sizeof (heap_info) + 2 * SIZE_SZ is a multiple of MALLOC_ALIGNMENT. */ char pad[-6 * SIZE_SZ & MALLOC_ALIGN_MASK]; } heap_info;
structmalloc_state { /* Serialize access. */ mutex_t mutex; /* Flags (formerly in max_fast). */ int flags; /* Fastbins */ mfastbinptr fastbinsY[NFASTBINS]; /* Base of the topmost chunk -- not otherwise kept in a bin */ mchunkptr top; /* The remainder from the most recent split of a small request */ mchunkptr last_remainder; /* Normal bins packed as described above */ mchunkptr bins[NBINS * 2 - 2]; /* Bitmap of bins */ unsignedint binmap[BINMAPSIZE]; /* Linked list */ structmalloc_state *next; /* Linked list for free arenas. */ structmalloc_state *next_free; /* Memory allocated from the system in this arena. */ INTERNAL_SIZE_T system_mem; INTERNAL_SIZE_T max_system_mem; };
structmalloc_chunk { /* #define INTERNAL_SIZE_T size_t */ INTERNAL_SIZE_T prev_size; /* Size of previous chunk (if free). */ INTERNAL_SIZE_T size; /* Size in bytes, including overhead. */ structmalloc_chunk* fd;/* double links -- used only if free. 这两个指针只在free chunk中存在*/ structmalloc_chunk* bk; /* Only used for large blocks: pointer to next larger size. */ structmalloc_chunk* fd_nextsize;/* double links -- used only if free. */ structmalloc_chunk* bk_nextsize; };
/* This struct declaration is misleading (but accurate and necessary). It declares a "view" into memory allowing access to necessary fields at known offsets from a given base. See explanation below. */ structmalloc_chunk {
INTERNAL_SIZE_T prev_size; /* Size of previous chunk (if free). */ INTERNAL_SIZE_T size; /* Size in bytes, including overhead. */
structmalloc_chunk* fd;/* double links -- used only if free. */ structmalloc_chunk* bk;
/* Only used for large blocks: pointer to next larger size. */ structmalloc_chunk* fd_nextsize;/* double links -- used only if free. */ structmalloc_chunk* bk_nextsize; };
已经被分配使用的 chunk 结构如下两个图:(图一图二 size 位 A 与 N 相同含义只是表示不同)
1 2 3 4 5 6 7 8 9 10 11 12 13 14
chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Size of previous chunk, if unallocated (P clear) | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Size of chunk, in bytes |A|M|P| mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | User data starts here... . . . . (malloc_usable_size() bytes) . next . | chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | (size of chunk, but used for application data) | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Size of next chunk, in bytes |A|0|1| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
free_chunk
被释放的 chunk 被记录在链表中(可能是循环双向链表,也可能是单向链表)。具体结构如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Size of previous chunk, if unallocated (P clear) | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ `head:' | Size of chunk, in bytes |A|0|P| mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Forward pointer to next chunk in list | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Back pointer to previous chunk in list | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Unused space (may be 0 bytes long) . . . next . | chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ `foot:' | Size of chunk, in bytes | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Size of next chunk, in bytes |A|0|0| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
/* The three exceptions to all this are: 1. The special chunk `top' doesn't bother using the trailing size field since there is no next contiguous chunk that would have to index off it. After initialization, `top' is forced to always exist. If it would become less than MINSIZE bytes long, it is replenished. 2. Chunks allocated via mmap, which have the second-lowest-order bit M (IS_MMAPPED) set in their size fields. Because they are allocated one-by-one, each must contain its own trailing size field. If the M bit is set, the other bits are ignored (because mmapped chunks are neither in an arena, nor adjacent to a freed chunk). The M bit is also used for chunks which originally came from a dumped heap via malloc_set_state in hooks.c. 3. Chunks in fastbins are treated as allocated chunks from the point of view of the chunk allocator. They are consolidated with their neighbors only in bulk, in malloc_consolidate. */
/* Top The top-most available chunk (i.e., the one bordering the end of available memory) is treated specially. It is never included in any bin, is used only if no other chunk is available, and is released back to the system if it is very large (see M_TRIM_THRESHOLD). Because top initially points to its own bin with initial zero size, thus forcing extension on the first malloc request, we avoid having any special code in malloc to check whether it even exists yet. But we still need to do so when getting memory from system, so we make initial_top treat the bin as a legal but unusable chunk during the interval between initialization and the first call to sysmalloc. (This is somewhat delicate, since it relies on the 2 preceding words to be zero during this interval as well.) */
/* Conveniently, the unsorted bin can be used as dummy top on first call */ #define initial_top(M) (unsorted_chunks(M))
程序第一次进行 malloc 的时候,heap 会被分为两块,一块给用户,剩下的那块就是 top chunk。其实,所谓的 top chunk 就是处于当前堆的物理地址最高的 chunk。top chunk 不属于任何一个 bin,它的作用在于当所有的 bin 都无法满足用户请求的大小时,如果 top chunk 大小不小于用户请求的大小,就将该 top chunk 分作两部分:用户请求的 chunk 和 剩余的部分(成为新的 top chunk)。否则,就对 heap 进行扩展后再进行分配。在 main arena 中通过 sbrk 扩展 heap,而在thread arena 中通过 mmap 分配新的 heap。
/* Check if a request is so large that it would wrap around zero when padded and aligned. To simplify some other code, the bound is made low enough so that adding MINSIZE will also not wrap around zero. */
/* size field is or'ed with NON_MAIN_ARENA if the chunk was obtained from a non-main arena. This is only set immediately before handing the chunk to the user, if necessary. */ #define NON_MAIN_ARENA 0x4
/* Check for chunk from main arena. */ #define chunk_main_arena(p) (((p)->mchunk_size & NON_MAIN_ARENA) == 0)
/* Mark a chunk as not being on the main arena. */ #define set_non_main_arena(p) ((p)->mchunk_size |= NON_MAIN_ARENA)
/* Bits to mask off when extracting size Note: IS_MMAPPED is intentionally not masked off from size field in macros for which mmapped chunks should never be seen. This should cause helpful core dumps to occur if it is tried by accident by people extending or adapting this malloc. */ #define SIZE_BITS (PREV_INUSE | IS_MMAPPED | NON_MAIN_ARENA)
获取 chunk size
1 2 3 4 5
/* Get size, ignoring use bits */ #define chunksize(p) (chunksize_nomask(p) & ~(SIZE_BITS))
/* Like chunksize, but do not mask SIZE_BITS. */ #define chunksize_nomask(p) ((p)->mchunk_size)
获取下一个物理相邻的 chunk
1 2
/* Ptr to next physical malloc_chunk. */ #define next_chunk(p) ((mchunkptr)(((char *) (p)) + chunksize(p)))
获取前一个 chunk 的信息
1 2 3 4 5 6 7 8
/* Size of the chunk below P. Only valid if !prev_inuse (P). */ #define prev_size(p) ((p)->mchunk_prev_size)
/* Set the size of the chunk below P. Only valid if !prev_inuse (P). */ #define set_prev_size(p, sz) ((p)->mchunk_prev_size = (sz))
/* Ptr to previous physical malloc_chunk. Only valid if !prev_inuse (P). */ #define prev_chunk(p) ((mchunkptr)(((char *) (p)) - prev_size(p)))
/* Set size at head, without disturbing its use bit */ // SIZE_BITS = 7 #define set_head_size(p, s) \ ((p)->mchunk_size = (((p)->mchunk_size & SIZE_BITS) | (s)))
/* Set size/use field */ #define set_head(p, s) ((p)->mchunk_size = (s))
/* Set size at footer (only when chunk is not in use) */ #define set_foot(p, s) \ (((mchunkptr)((char *) (p) + (s)))->mchunk_prev_size = (s))
获取指定偏移的 chunk
1 2
/* Treat space at ptr + offset as a chunk */ #define chunk_at_offset(p, s) ((mchunkptr)(((char *) (p)) + (s)))
指定偏移处 chunk 使用状态相关操作
1 2 3 4 5 6 7 8 9
/* check/set/clear inuse bits in known places */ #define inuse_bit_at_offset(p, s) \ (((mchunkptr)(((char *) (p)) + (s)))->mchunk_size & PREV_INUSE)
/* Fastbins An array of lists holding recently freed small chunks. Fastbins are not doubly linked. It is faster to single-link them, and since chunks are never removed from the middles of these lists, double linking is not necessary. Also, unlike regular bins, they are not even processed in FIFO order (they use faster LIFO) since ordering doesn't much matter in the transient contexts in which fastbins are normally used. Chunks in fastbins keep their inuse bit set, so they cannot be consolidated with other free chunks. malloc_consolidate releases all chunks in fastbins and consolidates them with other free chunks. */ typedefstructmalloc_chunk *mfastbinptr;
/* This is in malloc_state. /* Fastbins */ mfastbinptr fastbinsY[ NFASTBINS ]; */
为了更加高效地利用 fast bin,glibc 采用单向链表对其中的每个 bin 进行组织(只使用 fd 指针),并且每个 bin 采取 LIFO 策略(后进先出),最近释放的 chunk 会更早地被分配,所以会更加适合于局部性。也就是说,当用户需要的 chunk 的大小小于 fastbin 的最大大小时, ptmalloc 会首先判断 fastbin 中相应的 bin 中是否有对应大小的空闲块,如果有的话,就会直接从这个 bin 中获取 chunk。如果没有的话,ptmalloc 才会做接下来的一系列操作。
fast bin 中无论是添加还是移除 fast chunk,都是对“链表尾”进行操作,而不会对某个中间的 fast chunk 进行操作。
/* The maximum fastbin request size we support */ #define MAX_FAST_SIZE (80 * SIZE_SZ / 4)
/* Since the lowest 2 bits in max_fast don't matter in size comparisons, they are used as flags. */
/* FASTCHUNKS_BIT held in max_fast indicates that there are probably some fastbin chunks. It is set true on entering a chunk into any fastbin, and cleared only in malloc_consolidate. The truth value is inverted so that have_fastchunks will be true upon startup (since statics are zero-filled), simplifying initialization checks. */ //判断分配区是否有 fast bin chunk,1表示没有 #define FASTCHUNKS_BIT (1U)
/* NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous regions. Otherwise, contiguity is exploited in merging together, when possible, results from consecutive MORECORE calls. The initial value comes from MORECORE_CONTIGUOUS, but is changed dynamically if mmap is ever used as an sbrk substitute. */ // MORECORE是否返回连续的内存区域。 // 主分配区中的MORECORE其实为sbr(),默认返回连续虚拟地址空间 // 非主分配区使用mmap()分配大块虚拟内存,然后进行切分来模拟主分配区的行为 // 而默认情况下mmap映射区域是不保证虚拟地址空间连续的,所以非主分配区默认分配非连续虚拟地址空间。 #define NONCONTIGUOUS_BIT (2U)
/* ARENA_CORRUPTION_BIT is set if a memory corruption was detected on the arena. Such an arena is no longer used to allocate chunks. Chunks allocated in that arena before detecting corruption are not freed. */
/* Set value of max_fast. Use impossibly small value if 0. Precondition: there are no existing fastbin chunks. Setting the value clears fastchunk bit but preserves noncontiguous bit. */
当用户通过 malloc 请求的大小属于 fast chunk 的大小范围(注意:用户请求 size 加上 16 字节就是实际内存 chunk size)。在初始化的时候 fast bin 支持的最大内存大小以及所有 fast bin 链表都是空的,所以当最开始使用 malloc 申请内存的时候,即使申请的内存大小属于 fast chunk 的内存大小(即 16 到 80 字节),它也不会交由 fast bin 来处理,而是向下传递交由 small bin 来处理,如果 small bin 也为空的话就交给 unsorted bin 处理。那么 fast bin 是在哪?怎么进行初始化的呢?
当我们第一次调用 malloc 的时候,系统执行 _int_malloc 函数,该函数首先会发现当前 fast bin 为空,就转交给 small bin 处理,进而又发现 small bin 也为空,就调用 malloc_consolidate 函数对 malloc_state 结构体进行初始化,malloc_consolidate 函数主要完成以下几个功能:
首先判断当前 malloc_state 结构体中的 fast bin 是否为空,如果为空就说明整个 malloc_state (arena)都没有完成初始化,需要对 malloc_state 进行初始化。
malloc_state 的初始化操作由函数 malloc_init_state(av) 完成,该函数先初始化除 fast bin 之外的所有的 bin (构建双链表,详情见后文small bins介绍),再初始化 fast bins。
当再次执行 malloc 函数的时候,此时 fast bin 相关数据不为空了,就开始使用 fast bin,这部分代码如下:
staticvoid * _int_malloc (mstate av, size_t bytes) { // … /* If the size qualifies as a fastbin, first check corresponding bin. This code is safe to execute even if av is not yet initialized, so we can try it without checking, which saves some time on this fast path. */
/* FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free() that triggers automatic consolidation of possibly-surrounding fastbin chunks. This is a heuristic, so the exact value should not matter too much. It is defined at half the default trim threshold as a compromise heuristic to only attempt consolidation if it is likely to lead to trimming. However, it is not dynamically tunable, since consolidation reduces fragmentation surrounding large chunks even if trimming is not used. */
/* Chunks in fastbins keep their inuse bit set, so they cannot be consolidated with other free chunks. malloc_consolidate releases all chunks in fastbins and consolidates them with other free chunks. */
Small Bin
small bins 中每个 chunk 的大小与其所在的 bin 的 index 的关系为:chunk_size = 2 * SIZE_SZ *index,具体如下
或许,大家会很疑惑,那 fastbin 与 small bin 中 chunk 的大小会有很大一部分重合啊,那 small bin 中对应大小的 bin 是不是就没有什么作用啊? 其实不然,fast bin 中的 chunk 是有可能被放到 small bin 中去的,我们在后面分析具体的源代码时会有深刻的体会。
Large Bin
large bins 中一共包括 63 个 bin,每个 bin 中的 chunk 的大小不一致,而是处于一定区间范围内。此外,这 63 个 bin 被分成了 6 组,每组 bin 中的 chunk 大小之间的公差一致,具体如下:
组
数量
公差
1
32
64B
2
16
512B
3
8
4096B
4
4
32768B
5
2
262144B
6
1
不限制
这里我们以 32 位平台的 large bin 为例,第一个 large bin 的起始 chunk 大小为 512 字节,位于第一组,所以该 bin 可以存储的 chunk 的大小范围为 [512,512+64)。
关于 large bin 的宏如下,这里我们以 32 位平台下,第一个 large bin 的起始 chunk 大小为例,为 512 字节,那么 512>>6 = 8,所以其下标为 56+8=64。
/* Unsorted chunks All remainders from chunk splits, as well as all returned chunks, are first placed in the "unsorted" bin. They are then placed in regular bins after malloc gives them ONE chance to be used before binning. So, basically, the unsorted_chunks list acts as a queue, with chunks being placed on it in free (and malloc_consolidate), and taken off (to be either used or placed in bins) in malloc. The NON_MAIN_ARENA flag is never set for unsorted chunks, so it does not have to be taken into account in size comparisons. */
从下面的宏我们可以看出
1 2
/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */ #define unsorted_chunks(M) (bin_at(M, 1))
unsorted bin 处于我们之前所说的 bin 数组下标 1 处。故而 unsorted bin 只有一个链表。unsorted bin 中的空闲 chunk 处于乱序状态,主要有两个来源
当一个较大的 chunk 被分割成两半后,如果剩下的部分大于 MINSIZE,就会被放到 unsorted bin 中。
释放一个不属于 fast bin 的 chunk,并且该 chunk 不和 top chunk 紧邻时,该 chunk 会被首先放到 unsorted bin 中。关于 top chunk 的解释,请参考上面的介绍。
/* Take a chunk off a bin list */ // unlink p #define unlink(AV, P, BK, FD) { \ // 由于 P 已经在双向链表中,所以有两个地方记录其大小,所以检查一下其大小是否一致。 if (__builtin_expect (chunksize(P) != prev_size (next_chunk(P)), 0)) \ malloc_printerr ("corrupted size vs. prev_size"); \ FD = P->fd; \ BK = P->bk; \ // 防止攻击者简单篡改空闲的 chunk 的 fd 与 bk 来实现任意写的效果。 if (__builtin_expect (FD->bk != P || BK->fd != P, 0)) \ malloc_printerr (check_action, "corrupted double-linked list", P, AV); \ else { \ FD->bk = BK; \ BK->fd = FD; \ // 下面主要考虑 P 对应的 nextsize 双向链表的修改 if (!in_smallbin_range (chunksize_nomask (P)) \ // 如果P->fd_nextsize为 NULL,表明 P 未插入到 nextsize 链表中。 // 那么其实也就没有必要对 nextsize 字段进行修改了。 // 这里没有去判断 bk_nextsize 字段,可能会出问题。 && __builtin_expect (P->fd_nextsize != NULL, 0)) { \ // 类似于小的 chunk 的检查思路 if (__builtin_expect (P->fd_nextsize->bk_nextsize != P, 0) \ || __builtin_expect (P->bk_nextsize->fd_nextsize != P, 0)) \ malloc_printerr (check_action, \ "corrupted double-linked list (not small)", \ P, AV); \ // 这里说明 P 已经在 nextsize 链表中了。 // 如果 FD 没有在 nextsize 链表中 if (FD->fd_nextsize == NULL) { \ // 如果 nextsize 串起来的双链表只有 P 本身,那就直接拿走 P // 令 FD 为 nextsize 串起来的 if (P->fd_nextsize == P) \ FD->fd_nextsize = FD->bk_nextsize = FD; \ else { \ // 否则我们需要将 FD 插入到 nextsize 形成的双链表中 FD->fd_nextsize = P->fd_nextsize; \ FD->bk_nextsize = P->bk_nextsize; \ P->fd_nextsize->bk_nextsize = FD; \ P->bk_nextsize->fd_nextsize = FD; \ } \ } else { \ // 如果在的话,直接拿走即可 P->fd_nextsize->bk_nextsize = P->bk_nextsize; \ P->bk_nextsize->fd_nextsize = P->fd_nextsize; \ } \ } \ } \ }
这里我们以 small bin 的 unlink 为例子介绍一下。对于 large bin 的 unlink,与其类似,只是多了一个 nextsize 的处理。
可以看出, P 最后的 fd 和 bk 指针并没有发生变化,但是当我们去遍历整个双向链表时,已经遍历不到对应的链表了。这一点没有变化还是很有用处的,因为我们有时候可以使用这个方法来泄漏地址
libc 地址
P 位于双向链表头部,bk 泄漏
P 位于双向链表尾部,fd 泄漏
双向链表只包含一个空闲 chunk 时,P 位于双向链表中,fd 和 bk 均可以泄漏
泄漏堆地址,双向链表包含多个空闲 chunk
P 位于双向链表头部,fd 泄漏
P 位于双向链表中,fd 和 bk 均可以泄漏
P 位于双向链表尾部,bk 泄漏
注意
这里的头部指的是 bin 的 fd 指向的 chunk,即双向链表中最新加入的 chunk。
这里的尾部指的是 bin 的 bk 指向的 chunk,即双向链表中最先加入的 chunk。
同时,无论是对于 fd,bk 还是 fd_nextsize ,bk_nextsize,程序都会检测 fd 和 bk 是否满足对应的要求。
1 2 3 4 5 6 7 8 9 10
// fd bk if (__builtin_expect (FD->bk != P || BK->fd != P, 0)) \ malloc_printerr (check_action, "corrupted double-linked list", P, AV); \
// next_size related if (__builtin_expect (P->fd_nextsize->bk_nextsize != P, 0) \ || __builtin_expect (P->bk_nextsize->fd_nextsize != P, 0)) \ malloc_printerr (check_action, \ "corrupted double-linked list (not small)", \ P, AV);
看起来似乎很正常。我们以 fd 和 bk 为例,P 的 forward chunk 的 bk 很自然是 P ,同样 P 的 backward chunk 的 fd 也很自然是 P 。如果没有做相应的检查的话,我们可以修改 P 的 fd 与 bk,从而可以很容易地达到任意地址写的效果。关于更加详细的例子,可以参见利用部分的 unlink 。
if ((action & do_abort)) { if ((action & do_backtrace)) BEFORE_ABORT(do_abort, written, fd);
/* Kill the application. */ abort(); }
在abort 函数里,在 glibc 还是 2.23 版本时,会 fflush stream。
1 2 3 4 5 6 7
/* Flush all streams. We cannot close them now because the user might have registered a handler for SIGABRT. */ if (stage == 1) { ++stage; fflush (NULL); }
/* Retry with another arena only if we were able to find a usable arena before. */ if (!victim && ar_ptr != NULL) { LIBC_PROBE(memory_malloc_retry, 1, bytes); ar_ptr = arena_get_retry(ar_ptr, bytes); victim = _int_malloc(ar_ptr, bytes); }
如果申请到了 arena,那么在退出之前还得解锁。
1
if (ar_ptr != NULL) __libc_lock_unlock(ar_ptr->mutex);
static void *_int_malloc(mstate av, size_t bytes) { INTERNAL_SIZE_T nb; /* normalized request size */ unsigned int idx; /* associated bin index */ mbinptr bin; /* associated bin */
mchunkptr victim; /* inspected/selected chunk */ INTERNAL_SIZE_T size; /* its size */ int victim_index; /* its bin index */
mchunkptr remainder; /* remainder from a split */ unsigned long remainder_size; /* its size */
unsigned int block; /* bit map traverser */ unsigned int bit; /* bit map traverser */ unsigned int map; /* current word of binmap */
mchunkptr fwd; /* misc temp for linking */ mchunkptr bck; /* misc temp for linking */
const char *errstr = NULL;
/* Convert request size to internal form by adding SIZE_SZ bytes overhead plus possibly more to obtain necessary alignment and/or to obtain a size of at least MINSIZE, the smallest allocatable size. Also, checked_request2size traps (returning 0) request sizes that are so large that they wrap around zero when padded and aligned. */
checked_request2size(bytes, nb);
arena
1 2 3 4 5 6 7
/* There are no usable arenas. Fall back to sysmalloc to get a chunk from mmap. */ if (__glibc_unlikely(av == NULL)) { void *p = sysmalloc(nb, av); if (p != NULL) alloc_perturb(p, bytes); return p; }
/* If the size qualifies as a fastbin, first check corresponding bin. This code is safe to execute even if av is not yet initialized, so we can try it without checking, which saves some time on this fast path. */
/* If a small request, check regular bin. Since these "smallbins" hold one size each, no searching within bins is necessary. (For a large request, we need to wait until unsorted chunks are processed to find best fit. But for small ones, fits are exact anyway, so we can check now, which is faster.) */
if (in_smallbin_range(nb)) { // 获取 small bin 的索引 idx = smallbin_index(nb); // 获取对应 small bin 中的 chunk 指针 bin = bin_at(av, idx); // 先执行 victim = last(bin),获取 small bin 的最后一个 chunk // 如果 victim = bin ,那说明该 bin 为空。 // 如果不相等,那么会有两种情况 if ((victim = last(bin)) != bin) { // 第一种情况,small bin 还没有初始化。 if (victim == 0) /* initialization check */ // 执行初始化,将 fast bins 中的 chunk 进行合并 malloc_consolidate(av); // 第二种情况,small bin 中存在空闲的 chunk else { // 获取 small bin 中倒数第二个 chunk 。 bck = victim->bk; // 检查 bck->fd 是不是 victim,防止伪造 if (__glibc_unlikely(bck->fd != victim)) { errstr = "malloc(): smallbin double linked list corrupted"; goto errout; } // 设置 victim 对应的 inuse 位 set_inuse_bit_at_offset(victim, nb); // 修改 small bin 链表,将 small bin 的最后一个 chunk 取出来 bin->bk = bck; bck->fd = bin; // 如果不是 main_arena,设置对应的标志 if (av != &main_arena) set_non_main_arena(victim); // 细致的检查,非调试状态没有作用 check_malloced_chunk(av, victim, nb); // 将申请到的 chunk 转化为对应的 mem 状态 void *p = chunk2mem(victim); // 如果设置了 perturb_type , 则将获取到的chunk初始化为 perturb_type ^ 0xff alloc_perturb(p, bytes); return p; } } }
当 fast bin、small bin 中的 chunk 都不能满足用户请求 chunk 大小时,就会考虑是不是 large bin。但是,其实在 large bin 中并没有直接去扫描对应 bin 中的 chunk,而是先利用 malloc_consolidate(参见 malloc_state 相关函数) 函数处理 fast bin 中的 chunk,将有可能能够合并的 chunk 先进行合并后放到 unsorted bin 中,不能够合并的就直接放到 unsorted bin 中,然后再在下面的大循环中进行相应的处理。为什么不直接从相应的 bin 中取出 large chunk 呢?这是 ptmalloc 的机制,它会在分配 large chunk 之前对堆中碎片 chunk 进行合并,以便减少堆中的碎片。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/* If this is a large request, consolidate fastbins before continuing. While it might look excessive to kill all fastbins before even seeing if there is space available, this avoids fragmentation problems normally associated with fastbins. Also, in practice, programs tend to have runs of either small or large requests, but less often mixtures, so consolidation is not invoked all that often in most programs. And the programs that it is called frequently in otherwise tend to fragment. */
如果程序执行到了这里,那么说明 与 chunk 大小正好一致的 bin (fast bin, small bin) 中没有 chunk 可以直接满足需求 ,但是 large chunk 则是在这个大循环中处理。
在接下来的这个循环中,主要做了以下的操作
按照 FIFO 的方式逐个将 unsorted bin 中的 chunk 取出来
如果是 small request,则考虑是不是恰好满足,是的话,直接返回。
如果不是的话,放到对应的 bin 中。
尝试从 large bin 中分配用户所需的内存
该部分是一个大循环,这是为了尝试重新分配 small bin chunk,这是因为我们虽然会首先使用 large bin,top chunk 来尝试满足用户的请求,但是如果没有满足的话,由于我们在上面没有分配成功 small bin,我们并没有对 fast bin 中的 chunk 进行合并,所以这里会进行 fast bin chunk 的合并,进而使用一个大循环来尝试再次分配 small bin chunk。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* Process recently freed or remaindered chunks, taking one only if it is exact fit, or, if this a small request, the chunk is remainder from the most recent non-exact fit. Place other traversed chunks in bins. Note that this step is the only place in any routine where chunks are placed in bins. The outer loop here is needed because we might not realize until near the end of malloc that we should have consolidated, so must do so and retry. This happens at most once, and only when we would otherwise need to expand memory to service a "small" request. */
for (;;) { int iters = 0;
unsorted bin 遍历
先考虑 unsorted bin,再考虑 last remainder ,但是对于 small bin chunk 的请求会有所例外。
注意 unsorted bin 的遍历顺序为 bk。
1 2 3 4 5 6 7 8 9 10 11 12 13 14
// 如果 unsorted bin 不为空 // First In First Out while ((victim = unsorted_chunks(av)->bk) != unsorted_chunks(av)) { // victim 为 unsorted bin 的最后一个 chunk // bck 为 unsorted bin 的倒数第二个 chunk bck = victim->bk; // 判断得到的 chunk 是否满足要求,不能过小,也不能过大 // 一般 system_mem 的大小为132K if (__builtin_expect(chunksize_nomask(victim) <= 2 * SIZE_SZ, 0) || __builtin_expect(chunksize_nomask(victim) > av->system_mem, 0)) malloc_printerr(check_action, "malloc(): memory corruption", chunk2mem(victim), av); // 得到victim对应的chunk大小。 size = chunksize(victim);
SMALL REQUEST
如果用户的请求为 small bin chunk ,那么我们首先考虑 last remainder,如果 last remainder 是 unsorted bin 中的唯一一块的话, 并且 last remainder 的大小分割后还可以作为一个 chunk ,为什么没有等号?
/* If a small request, try to use last remainder if it is the only chunk in unsorted bin. This helps promote locality for runs of consecutive small requests. This is the only exception to best-fit, and applies only when there is no exact fit for a small chunk. */ //是否是smallbin范围;bck是否是链首;remainder是(分配完)剩余部分 if (in_smallbin_range(nb) && bck == unsorted_chunks(av) && victim == av->last_remainder && (unsignedlong) (size) > (unsignedlong) (nb + MINSIZE)) { /* split and reattach remainder */ // 获取新的 remainder 的大小 remainder_size = size - nb; // 获取新的 remainder 的位置 remainder = chunk_at_offset(victim, nb); // 更新 unsorted bin 的情况 // av是被取出chunk的下一个chunk(fd) unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; // 更新 av 中记录的 last_remainder av->last_remainder = remainder; // 更新last remainder的指针 remainder->bk = remainder->fd = unsorted_chunks(av); if (!in_smallbin_range(remainder_size)) { remainder->fd_nextsize = NULL; remainder->bk_nextsize = NULL; } // 设置victim的头部,inuse set_head(victim, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0)); // 设置 remainder 的头部 set_head(remainder, remainder_size | PREV_INUSE); // 设置记录 remainder 大小的 prev_size 字段,因为此时 remainder 处于空闲状态。 set_foot(remainder, remainder_size); // 细致的检查,非调试状态下没有作用 check_malloced_chunk(av, victim, nb); // 将 victim 从 chunk 模式转化为mem模式 void *p = chunk2mem(victim); // 如果设置了perturb_type, 则将获取到的chunk初始化为 perturb_type ^ 0xff alloc_perturb(p, bytes); return p; }
初始取出
1 2 3 4
/* remove from unsorted list */ //修改 unsortedchunk 链表 unsorted_chunks(av)->bk = bck; bck->fd = unsorted_chunks(av);
取出 chunk 大小刚好合适 (EXACT FIT)
如果从 unsorted bin 中取出来的 chunk 大小正好合适,就直接使用。这里应该已经把合并后恰好合适的 chunk 给分配出去了。
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/* Take now instead of binning if exact fit */ if (size == nb) {//大小正好合适 set_inuse_bit_at_offset(victim, size); // 如果不是 main_arena,设置对应的标志 if (av != &main_arena) set_non_main_arena(victim); // 细致的检查,非调试状态下没有作用 check_malloced_chunk(av, victim, nb); // 将 victim 从 chunk 模式转化为mem模式 void *p = chunk2mem(victim); // 如果设置了perturb_type, 则将获取到的chunk初始化为 perturb_type ^ 0xff alloc_perturb(p, bytes); //直接返回 chunk 指针 return p; }
将取出来 chunk 放入到 smallbin (PLACE CHUNK IN SMALL BIN)
把取出来的 chunk 放到对应的 small bin 中。
1 2 3 4 5 6 7 8
/* place chunk in bin */ //判断 size 是否在smallbin if (in_smallbin_range(size)) { // 获取 small bin 的索引 victim_index = smallbin_index(size); // 调整 small bin 的链表 bck = bin_at(av, victim_index); fwd = bck->fd;
// #define MAX_ITERS 10000 if (++iters >= MAX_ITERS) break; }
large chunk
注: 或许会很奇怪,为什么这里没有先去看 small chunk 是否满足新需求了呢?这是因为 small bin 在循环之前已经判断过了,这里如果有的话,就是合并后的才出现 chunk。但是在大循环外,large chunk 只是单纯地找到其索引,所以觉得在这里直接先判断是合理的,而且也为了下面可以再去找较大的 chunk。
如果请求的 chunk 在 large chunk 范围内,就在对应的 bin 中从小到大进行扫描,找到第一个合适的。
/* If a large request, scan through the chunks of current bin in sorted order to find smallest that fits. Use the skip list for this. */ //判断是否smallbin if (!in_smallbin_range(nb)) { bin = bin_at(av, idx); /* skip scan if empty or largest chunk is too small */ // 如果对应的 bin 为空或者其中的chunk最大的也很小,那就跳过 // first(bin)=bin->fd 表示当前链表中最大的chunk if ((victim = first(bin)) != bin && (unsignedlong) chunksize_nomask(victim) >= (unsignedlong) (nb)) { // 反向遍历链表,直到找到第一个不小于所需chunk大小的chunk victim = victim->bk_nextsize; while (((unsignedlong) (size = chunksize(victim)) < (unsignedlong) (nb))) victim = victim->bk_nextsize;
/* Avoid removing the first entry for a size so that the skip list does not have to be rerouted. */ // 如果最终取到的chunk不是该bin中的最后一个chunk,并且该chunk与其前面的chunk // 的大小相同,那么我们就取其前面的chunk,这样可以避免调整bk_nextsize,fd_nextsize // 链表。因为大小相同的chunk只有一个会被串在nextsize链上。 if (victim != last(bin) && chunksize_nomask(victim) == chunksize_nomask(victim->fd)) victim = victim->fd; // 计算分配后剩余的大小 remainder_size = size - nb; // 进行unlink(宏操作) unlink(av, victim, bck, fwd);
/* Exhaust */ // 剩下的大小不足以当做一个块 // 很好奇接下来会怎么办? if (remainder_size < MINSIZE) { set_inuse_bit_at_offset(victim, size); if (av != &main_arena) set_non_main_arena(victim); } /* Split */ // 剩下的大小还可以作为一个chunk,进行分割。 else { // 获取剩下那部分chunk的指针,称为remainder remainder = chunk_at_offset(victim, nb); /* We cannot assume the unsorted list is empty and therefore have to perform a complete insert here. */ // 插入unsorted bin中 bck = unsorted_chunks(av); fwd = bck->fd; // 判断 unsorted bin 是否被破坏。 if (__glibc_unlikely(fwd->bk != bck)) { errstr = "malloc(): corrupted unsorted chunks"; goto errout; } remainder->bk = bck; remainder->fd = fwd; bck->fd = remainder; fwd->bk = remainder; // 如果不处于small bin范围内,就设置对应的字段 if (!in_smallbin_range(remainder_size)) { remainder->fd_nextsize = NULL; remainder->bk_nextsize = NULL; } // 设置分配的chunk的标记 set_head(victim, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));
/* Search for a chunk by scanning bins, starting with next largest bin. This search is strictly by best-fit; i.e., the smallest (with ties going to approximately the least recently used) chunk that fits is selected. The bitmap avoids needing to check that most blocks are nonempty. The particular case of skipping all bins during warm-up phases when no chunks have been returned yet is faster than it might look. */
/* Skip rest of block if there are no more set bits in this block. */ // 如果bit>map,则表示该 map 中没有比当前所需要chunk大的空闲块 // 如果bit为0,那么说明,上面idx2bit带入的参数为0。 if (bit > map || bit == 0) { do { // 寻找下一个block,直到其对应的map不为0。 // 如果已经不存在的话,那就只能使用top chunk了 if (++block >= BINMAPSIZE) /* out of bins */ goto use_top; } while ((map = av->binmap[ block ]) == 0); // 获取其对应的bin,因为该map中的chunk大小都比所需的chunk大,而且 // map本身不为0,所以必然存在满足需求的chunk。 bin = bin_at(av, (block << BINMAPSHIFT)); bit = 1; }
找到合适的 BIN
1 2 3 4 5 6 7 8
/* Advance to bin with set bit. There must be one. */ // 从当前map的最小的bin一直找,直到找到合适的bin。 // 这里是一定存在的 while ((bit & map) == 0) { bin = next_bin(bin); bit <<= 1; assert(bit != 0); }
简单检查 CHUNK
1 2 3 4 5 6 7 8 9 10 11 12
/* Inspect the bin. It is likely to be non-empty */ // 获取对应的bin victim = last(bin);
/* If a false alarm (empty bin), clear the bit. */ // 如果victim=bin,那么我们就将map对应的位清0,然后获取下一个bin // 这种情况发生的概率应该很小。 if (victim == bin) { av->binmap[ block ] = map &= ~bit; /* Write through */ bin = next_bin(bin); bit <<= 1; }
/* We know the first chunk in this bin is big enough to use. */ assert((unsignedlong) (size) >= (unsignedlong) (nb)); // 计算分割后剩余的大小 remainder_size = size - nb;
/* unlink */ unlink(av, victim, bck, fwd);
/* Exhaust */ // 如果分割后不够一个chunk怎么办? if (remainder_size < MINSIZE) { set_inuse_bit_at_offset(victim, size); if (av != &main_arena) set_non_main_arena(victim); }
use_top: /* If large enough, split off the chunk bordering the end of memory (held in av->top). Note that this is in accord with the best-fit search rule. In effect, av->top is treated as larger (and thus less well fitting) than any other available chunk since it can be extended to be as large as necessary (up to system limitations). We require that av->top always exists (i.e., has size >= MINSIZE) after initialization, so if it would otherwise be exhausted by current request, it is replenished. (The main reason for ensuring it exists is that we may need MINSIZE space to put in fenceposts in sysmalloc.) */ // 获取当前的top chunk,并计算其对应的大小 victim = av->top; size = chunksize(victim); // 如果分割之后,top chunk 大小仍然满足 chunk 的最小大小,那么就可以直接进行分割。 if ((unsignedlong) (size) >= (unsignedlong) (nb + MINSIZE)) { remainder_size = size - nb; remainder = chunk_at_offset(victim, nb); av->top = remainder; // 这里设置 PREV_INUSE 是因为 top chunk 前面的 chunk 如果不是 fastbin,就必然会和 // top chunk 合并,所以这里设置了 PREV_INUSE。 set_head(victim, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0)); set_head(remainder, remainder_size | PREV_INUSE);
check_malloced_chunk(av, victim, nb); void *p = chunk2mem(victim); alloc_perturb(p, bytes); return p; } // 否则,判断是否有 fast chunk /* When we are using atomic ops to free fast chunks we can get here for all block sizes. */ elseif (have_fastchunks(av)) { // 先执行一次fast bin的合并 malloc_consolidate(av); /* restore original bin index */ // 判断需要的chunk是在small bin范围内还是large bin范围内 // 并计算对应的索引 // 等待下次再看看是否可以 if (in_smallbin_range(nb)) idx = smallbin_index(nb); else idx = largebin_index(nb); }
/* calloc(size_t n_elements, size_t element_size); Returns a pointer to n_elements * element_size bytes, with all locations set to zero. */ void* __libc_calloc(size_t, size_t);
2020.07.08 下面大概浏览一下未仔细品味
sysmalloc
正如该函数头的注释所言,该函数用于当前堆内存不足时,需要向系统申请更多的内存。
1 2 3 4 5 6
/* sysmalloc handles malloc cases requiring more memory from the system. On entry, it is assumed that av->top does not have enough space to service request for nb bytes, thus requiring that av->top be extended or replaced. */
staticvoid *sysmalloc(INTERNAL_SIZE_T nb, mstate av){ mchunkptr old_top; /* incoming value of av->top */ INTERNAL_SIZE_T old_size; /* its size */ char *old_end; /* its end address */
long size; /* arg to first MORECORE or mmap call */ char *brk; /* return value from MORECORE */
long correction; /* arg to 2nd MORECORE call */ char *snd_brk; /* 2nd return val */
INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */ INTERNAL_SIZE_T end_misalign; /* partial page left at end of new space */ char *aligned_brk; /* aligned offset into brk */
mchunkptr p; /* the allocated/returned chunk */ mchunkptr remainder; /* remainder frOm allocation */ unsignedlong remainder_size; /* its size */
#ifndef DEFAULT_MMAP_THRESHOLD #define DEFAULT_MMAP_THRESHOLD DEFAULT_MMAP_THRESHOLD_MIN #endif /* MMAP_THRESHOLD_MAX and _MIN are the bounds on the dynamically adjusted MMAP_THRESHOLD. */
#ifndef DEFAULT_MMAP_THRESHOLD_MAX /* For 32-bit platforms we cannot increase the maximum mmap threshold much because it is also the minimum value for the maximum heap size and its alignment. Going above 512k (i.e., 1M for new heaps) wastes too much address space. */ #if __WORDSIZE == 32 #define DEFAULT_MMAP_THRESHOLD_MAX (512 * 1024) #else #define DEFAULT_MMAP_THRESHOLD_MAX (4 * 1024 * 1024 * sizeof(long)) #endif #endif
/* If have mmap, and the request size meets the mmap threshold, and the system supports mmap, and there are few enough currently allocated mmapped regions, try to directly map this request rather than expanding top. */
if (av == NULL || ((unsignedlong)(nb) >= (unsignedlong)(mp_.mmap_threshold) && (mp_.n_mmaps < mp_.n_mmaps_max))) { char *mm; /* return value from mmap call*/
try_mmap: /* Round up size to nearest page. For mmapped chunks, the overhead is one SIZE_SZ unit larger than for normal chunks, because there is no following chunk whose prev_size field could be used. See the front_misalign handling below, for glibc there is no need for further alignments unless we have have high alignment. */ if (MALLOC_ALIGNMENT == 2 * SIZE_SZ) size = ALIGN_UP(nb + SIZE_SZ, pagesize); else size = ALIGN_UP(nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize); tried_mmap = true;
/* Don't try if size wraps around 0 */ if ((unsignedlong)(size) > (unsignedlong)(nb)) { mm = (char *)(MMAP(0, size, PROT_READ | PROT_WRITE, 0));
if (mm != MAP_FAILED) { /* The offset to the start of the mmapped region is stored in the prev_size field of the chunk. This allows us to adjust returned start address to meet alignment requirements here and in memalign(), and still be able to compute proper address argument for later munmap in free() and realloc(). */
if (MALLOC_ALIGNMENT == 2 * SIZE_SZ) { /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and MALLOC_ALIGN_MASK is 2*SIZE_SZ-1. Each mmap'ed area is page aligned and therefore definitely MALLOC_ALIGN_MASK-aligned. */ assert(((INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK) == 0); front_misalign = 0; } else front_misalign = (INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK; if (front_misalign > 0) { correction = MALLOC_ALIGNMENT - front_misalign; p = (mchunkptr)(mm + correction); set_prev_size(p, correction); set_head(p, (size - correction) | IS_MMAPPED); } else { p = (mchunkptr)mm; set_prev_size(p, 0); set_head(p, size | IS_MMAPPED); }
if (av != &main_arena) { heap_info *old_heap, *heap; size_t old_heap_size;
/* First try to extend the current heap. */ old_heap = heap_for_ptr(old_top); old_heap_size = old_heap->size; if ((long)(MINSIZE + nb - old_size) > 0 && grow_heap(old_heap, MINSIZE + nb - old_size) == 0) { av->system_mem += old_heap->size - old_heap_size; set_head(old_top, (((char *)old_heap + old_heap->size) - (char *)old_top) | PREV_INUSE); } elseif ((heap = new_heap(nb + (MINSIZE + sizeof(*heap)), mp_.top_pad))) { /* Use a newly allocated heap. */ heap->ar_ptr = av; heap->prev = old_heap; av->system_mem += heap->size; /* Set up the new top. */ top(av) = chunk_at_offset(heap, sizeof(*heap)); set_head(top(av), (heap->size - sizeof(*heap)) | PREV_INUSE);
/* Setup fencepost and free the old top chunk with a multiple of MALLOC_ALIGNMENT in size. */ /* The fencepost takes at least MINSIZE bytes, because it might become the top chunk again later. Note that a footer is set up, too, although the chunk is marked in use. */ old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK; set_head(chunk_at_offset(old_top, old_size + 2 * SIZE_SZ), 0 | PREV_INUSE); if (old_size >= MINSIZE) { set_head(chunk_at_offset(old_top, old_size), (2 * SIZE_SZ) | PREV_INUSE); set_foot(chunk_at_offset(old_top, old_size), (2 * SIZE_SZ)); set_head(old_top, old_size | PREV_INUSE | NON_MAIN_ARENA); _int_free(av, old_top, 1); } else { set_head(old_top, (old_size + 2 * SIZE_SZ) | PREV_INUSE); set_foot(old_top, (old_size + 2 * SIZE_SZ)); } } elseif (!tried_mmap) /* We can at least try to use to mmap memory. */ goto try_mmap; }
Main_arena 处理
计算内存
计算可以满足请求的内存大小。
1 2 3 4
else { /* av == main_arena */
/* Request enough space for nb + pad + overhead */ size = nb + mp_.top_pad + MINSIZE;
/* If contiguous, we can subtract out existing space that we hope to combine with new space. We add it back later only if we don't actually get contiguous space. */
if (contiguous(av)) size -= old_size;
对齐页大小
1 2 3 4 5 6 7 8 9
/* Round to a multiple of page size. If MORECORE is not contiguous, this ensures that we only call it with whole-page arguments. And if MORECORE is contiguous and this is not first time through, this preserves page-alignment of previous calls. Otherwise, we correct to page-align below. */
size = ALIGN_UP(size, pagesize);
申请内存
1 2 3 4 5 6 7 8 9 10
/* Don't try to call MORECORE if argument is so big as to appear negative. Note that since mmap takes size_t arg, it may succeed below even if we cannot call MORECORE. */
else { /* If have mmap, try using it as a backup when MORECORE fails or cannot be used. This is worth doing on systems that have "holes" in address space, so sbrk cannot extend to give contiguous space, but space is available elsewhere. Note that we ignore mmap max count and threshold limits, since the space will not be used as a segregated mmap region. */
/* Cannot merge with old top, so add its size back in */ if (contiguous(av)) size = ALIGN_UP(size + old_size, pagesize);
/* If we are relying on mmap as backup, then use larger units */ if ((unsignedlong)(size) < (unsignedlong)(MMAP_AS_MORECORE_SIZE)) size = MMAP_AS_MORECORE_SIZE;
/* Don't try if size wraps around 0 */ if ((unsignedlong)(size) > (unsignedlong)(nb)) { char *mbrk = (char *)(MMAP(0, size, PROT_READ | PROT_WRITE, 0));
if (mbrk != MAP_FAILED) { /* We do not need, and cannot use, another sbrk call to find end */ brk = mbrk; snd_brk = brk + size;
/* Record that we no longer have a contiguous sbrk region. After the first time mmap is used as backup, we do not ever rely on contiguous space since this could incorrectly bridge regions. */ set_noncontiguous(av); } } }
内存可能申请成功
1 2 3 4
if (brk != (char *)(MORECORE_FAILURE)) { if (mp_.sbrk_base == 0) mp_.sbrk_base = brk; av->system_mem += size;
情况 1
1 2 3 4 5 6
/* If MORECORE extends previous space, we can likewise extend top size. */
/* Otherwise, make adjustments: * If the first time through or noncontiguous, we need to call sbrk just to find out where the end of memory lies. * We need to ensure that all returned chunks from malloc will meet MALLOC_ALIGNMENT * If there was an intervening foreign sbrk, we need to adjust sbrk request size to account for fact that we will not be able to combine new space with existing space in old_top. * Almost all systems internally allocate whole pages at a time, in which case we might as well use the whole last page of request. So we allocate enough more memory to hit a page boundary now, which in turn causes future contiguous calls to page-align. */
/* handle contiguous cases */ if (contiguous(av)) { /* Count foreign sbrk as system_mem. */ if (old_size) av->system_mem += brk - old_end;
/* Guarantee alignment of first new chunk made from this space */
front_misalign = (INTERNAL_SIZE_T)chunk2mem(brk) & MALLOC_ALIGN_MASK; if (front_misalign > 0) { /* Skip over some bytes to arrive at an aligned position. We don't need to specially mark these wasted front bytes. They will never be accessed anyway because prev_inuse of av->top (and any chunk created from its start) is always true after initialization. */
/* If this isn't adjacent to existing space, then we will not be able to merge with old_top space, so must add to 2nd request. */
correction += old_size;
/* Extend the end address to hit a page boundary */ end_misalign = (INTERNAL_SIZE_T)(brk + size + correction); correction += (ALIGN_UP(end_misalign, pagesize)) - end_misalign;
/* If can't allocate correction, try to at least find out current brk. It might be enough to proceed without failing. Note that if second sbrk did NOT fail, we assume that space is contiguous with first sbrk. This is a safe assumption unless program is multithreaded but doesn't use locks and a foreign sbrk occurred between our first and second calls. */
if (snd_brk == (char *)(MORECORE_FAILURE)) { correction = 0; snd_brk = (char *)(MORECORE(0)); } else { /* Call the `morecore' hook if necessary. */ void (*hook)(void) = atomic_forced_read(__after_morecore_hook); if (__builtin_expect(hook != NULL, 0)) (*hook)(); } }
/* handle non-contiguous cases */ else { if (MALLOC_ALIGNMENT == 2 * SIZE_SZ) /* MORECORE/mmap must correctly align */ assert(((unsignedlong)chunk2mem(brk) & MALLOC_ALIGN_MASK) == 0); else { front_misalign = (INTERNAL_SIZE_T)chunk2mem(brk) & MALLOC_ALIGN_MASK; if (front_misalign > 0) { /* Skip over some bytes to arrive at an aligned position. We don't need to specially mark these wasted front bytes. They will never be accessed anyway because prev_inuse of av->top (and any chunk created from its start) is always true after initialization. */
/* Adjust top based on results of second sbrk */ if (snd_brk != (char *)(MORECORE_FAILURE)) { av->top = (mchunkptr)aligned_brk; set_head(av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE); av->system_mem += correction;
/* If not the first time through, we either have a gap due to foreign sbrk or a non-contiguous region. Insert a double fencepost at old_top to prevent consolidation with space we don't own. These fenceposts are artificial chunks that are marked as inuse and are in any case too small to use. We need two to make sizes and alignments work out. */
if (old_size != 0) { /* Shrink old_top to insert fenceposts, keeping size a multiple of MALLOC_ALIGNMENT. We know there is at least enough space in old_top to do this. */ old_size = (old_size - 4 * SIZE_SZ) & ~MALLOC_ALIGN_MASK; set_head(old_top, old_size | PREV_INUSE);
/* Note that the following assignments completely overwrite old_top when old_size was previously MINSIZE. This is intentional. We need the fencepost, even if old_top otherwise gets lost. */ set_head(chunk_at_offset(old_top, old_size), (2 * SIZE_SZ) | PREV_INUSE); set_head(chunk_at_offset(old_top, old_size + 2 * SIZE_SZ), (2 * SIZE_SZ) | PREV_INUSE);
/* If possible, release the rest. */ if (old_size >= MINSIZE) { _int_free(av, old_top, 1); } } } }
需要注意的是,在这里程序将旧的 top chunk 进行了释放,那么其会根据大小进入不同的 bin 或 tcache 中。
更新最大内存
1 2 3
if ((unsignedlong)av->system_mem > (unsignedlong)(av->max_system_mem)) av->max_system_mem = av->system_mem; check_malloc_state(av);
分配内存块
获取大小
1 2 3
/* finally, do the allocation */ p = av->top; size = chunksize(p);
切分 TOP
1 2 3 4 5 6 7 8 9 10
/* check that one of the above allocation paths succeeded */ if ((unsignedlong)(size) >= (unsignedlong)(nb + MINSIZE)) { remainder_size = size - nb; remainder = chunk_at_offset(p, nb); av->top = remainder; set_head(p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0)); set_head(remainder, remainder_size | PREV_INUSE); check_malloced_chunk(av, p, nb); return chunk2mem(p); }
捕捉所有错误
1 2 3
/* catch all failure paths */ __set_errno(ENOMEM); return0;
/* Little security check which won't hurt performance: the allocator never wrapps around at the end of the address space. Therefore we can exclude some size values which might appear here by accident or by "design" from some intruder. */ // 指针不能指向非法的地址, 必须小于等于-size,为什么??? // 指针必须得对齐,2*SIZE_SZ 这个对齐得仔细想想 if (__builtin_expect((uintptr_t) p > (uintptr_t) -size, 0) || __builtin_expect(misaligned_chunk(p), 0)) { errstr = "free(): invalid pointer"; errout: if (!have_lock && locked) __libc_lock_unlock(av->mutex); malloc_printerr(check_action, errstr, chunk2mem(p), av); return; } /* We know that each chunk is at least MINSIZE bytes in size or a multiple of MALLOC_ALIGNMENT. */ // 大小没有最小的chunk大,或者说,大小不是MALLOC_ALIGNMENT的整数倍 if (__glibc_unlikely(size < MINSIZE || !aligned_OK(size))) { errstr = "free(): invalid size"; goto errout; } // 检查该chunk是否处于使用状态,非调试状态下没有作用 check_inuse_chunk(av, p);
/* If eligible, place chunk on a fastbin so it can be found and used quickly in malloc. */
if ((unsignedlong) (size) <= (unsignedlong) (get_max_fast())
#if TRIM_FASTBINS /* If TRIM_FASTBINS set, don't place chunks bordering top into fastbins */ //默认 #define TRIM_FASTBINS 0,因此默认情况下下面的语句不会执行 // 如果当前chunk是fast chunk,并且下一个chunk是top chunk,则不能插入 // 因为下一个chunk是topchunk 直接与 topchunk 合并 && (chunk_at_offset(p, size) != av->top) #endif ) { // 下一个chunk的大小不能小于两倍的SIZE_SZ,并且 // 下一个chunk的大小不能大于system_mem, 一般为132k // 如果出现这样的情况,就报错。 if (__builtin_expect( chunksize_nomask(chunk_at_offset(p, size)) <= 2 * SIZE_SZ, 0) || __builtin_expect( chunksize(chunk_at_offset(p, size)) >= av->system_mem, 0)) { /* We might not have a lock at this point and concurrent modifications of system_mem might have let to a false positive. Redo the test after getting the lock. */ if (have_lock || ({ assert(locked == 0); __libc_lock_lock(av->mutex); locked = 1; chunksize_nomask(chunk_at_offset(p, size)) <= 2 * SIZE_SZ || chunksize(chunk_at_offset(p, size)) >= av->system_mem; })) { errstr = "free(): invalid next size (fast)"; goto errout; } if (!have_lock) { __libc_lock_unlock(av->mutex); locked = 0; } } // 将chunk的mem部分全部设置为perturb_byte free_perturb(chunk2mem(p), size - 2 * SIZE_SZ); // 设置fast chunk的标记位 set_fastchunks(av); // 根据大小获取fast bin的索引 unsignedint idx = fastbin_index(size); // 获取对应fastbin的头指针,被初始化后为NULL。 fb = &fastbin(av, idx);
/* Atomically link P to its fastbin: P->FD = *FB; *FB = P; */ // 使用原子操作将P插入到链表中 mchunkptr old = *fb, old2; unsignedint old_idx = ~0u; do { /* Check that the top of the bin is not the record we are going to add (i.e., double free). */ // so we can not double free one fastbin chunk // 防止对 fast bin double free // 防御方法是通过检查上一个chunk是否和新释放的chunk地址相同 // 绕过方法就是在中间夹杂一个其他chunk,比如需要doublefree A,释放顺序为: // free A、free B、free A if (__builtin_expect(old == p, 0)) { errstr = "double free or corruption (fasttop)"; goto errout; } /* Check that size of fastbin chunk at the top is the same as size of the chunk that we are adding. We can dereference OLD only if we have the lock, otherwise it might have already been deallocated. See use of OLD_IDX below for the actual check. */ if (have_lock && old != NULL) old_idx = fastbin_index(chunksize(old)); p->fd = old2 = old; } while ((old = catomic_compare_and_exchange_val_rel(fb, p, old2)) != old2); // 确保fast bin的加入前与加入后相同 if (have_lock && old != NULL && __builtin_expect(old_idx != idx, 0)) { errstr = "invalid fastbin entry (free)"; goto errout; } }
// 如果下一个chunk不是top chunk if (nextchunk != av->top) { /* get and clear inuse bit */ // 获取下一个 chunk 的使用状态 nextinuse = inuse_bit_at_offset(nextchunk, nextsize); // 如果不在使用,合并,否则清空当前chunk的使用状态。 /* consolidate forward */ if (!nextinuse) { unlink(av, nextchunk, bck, fwd); size += nextsize; } else clear_inuse_bit_at_offset(nextchunk, 0);
/* Place the chunk in unsorted chunk list. Chunks are not placed into regular bins until after they have been given one chance to be used in malloc. */ // 把 chunk 放在 unsorted chunk 链表的头部 // unsorted bin 链头 bck = unsorted_chunks(av); // unsorted bin 顺数第一个(最新放入) fwd = bck->fd; // 简单的检查 if (__glibc_unlikely(fwd->bk != bck)) { errstr = "free(): corrupted unsorted chunks"; goto errout; } p->fd = fwd; p->bk = bck; // 如果是 large chunk,那就设置nextsize指针字段为NULL。 if (!in_smallbin_range(size)) { p->fd_nextsize = NULL; p->bk_nextsize = NULL; } bck->fd = p; fwd->bk = p;
/* If the chunk borders the current high end of memory, consolidate into top */ // 如果要释放的chunk的下一个chunk是top chunk,那就合并到 top chunk else { size += nextsize; set_head(p, size | PREV_INUSE); av->top = p; check_chunk(av, p); }
/* If freeing a large space, consolidate possibly-surrounding chunks. Then, if the total unused topmost memory exceeds trim threshold, ask malloc_trim to reduce top. Unless max_fast is 0, we don't know if there are fastbins bordering top, so we cannot tell for sure whether threshold has been reached unless fastbins are consolidated. But we don't want to consolidate on each free. As a compromise, consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD is reached. */ // 如果合并后的 chunk 的大小大于FASTBIN_CONSOLIDATION_THRESHOLD // 一般合并到 top chunk 都会执行这部分代码。 // 那就向系统返还内存 if ((unsignedlong) (size) >= FASTBIN_CONSOLIDATION_THRESHOLD) { // 如果有 fast chunk 就进行合并 if (have_fastchunks(av)) malloc_consolidate(av); // 主分配区 if (av == &main_arena) { #ifndef MORECORE_CANNOT_TRIM // top chunk 大于当前的收缩阙值 if ((unsignedlong) (chunksize(av->top)) >= (unsignedlong) (mp_.trim_threshold)) systrim(mp_.top_pad, av); #endif// 非主分配区,则直接收缩heap } else { /* Always try heap_trim(), even if the top chunk is not large, because the corresponding heap might go away. */ heap_info *heap = heap_for_ptr(top(av));