linuxメモリ管理--実際の割当て関数buffered_rmqueue
高速割り当てでも低速割り当てでも、実際にメモリを割り当てたのはbuffered_です.rmqueue()関数は、他はどこから割り当てるのが適切かを選択します.
まず各パラメータについてお話しします.
struct zone *preferred_zoneは、割り当てが許容できる最大zoneタイプを表します.
struct zone*zoneは、そのzoneにメモリを割り当てることを示す.
int orderは割当ページの階数を表します
gfp_t gfp_flags割り当ての識別
struct zone構造体にstruct per_がありますcpu_pageset __percpu *pageset; 冷熱分配器に使用されるメンバーで、ホットページはcpuのキャッシュにすでに存在することを示す.
cpuキャッシュページ配列
パートナーシステムからページを取得し、cpuのキャッシュに埋め込みます.
対応するタイプのページ数の変更
これはbuddyの重要な関数です.高次割当てでメモリブロックが得られた場合、例えば8次割当てでメモリブロックが得られた場合です.私たちに必要なのは低価格です.例えば6です.では、次の関数を呼び出して、8次分配で得られたメモリブロックを7次に掛けて、そのメモリブロックから半分を切り取って、6次にします.このとき、私たちが分配しなければならないメモリ階層であることを比較してみると、直接戻ります.
パラメータを説明します.
struct zone*zone:すべての操作はこのzoneで完了します.
struct page*page:上位に割り当てられたページブロック
int low:必要なメモリ階層
int high:この階層に割り当てられたメモリ
struct free_area*area:zoneの上位アイドルページ配列項目です
int migratetype:移行タイプ
この関数に移動すると、上で指定した移行タイプがパートナーシステムからメモリを割り当てるのに失敗したため、代替移行リストを使用します.
上記の代替移行タイプに基づいて巡回
まず各パラメータについてお話しします.
struct zone *preferred_zoneは、割り当てが許容できる最大zoneタイプを表します.
struct zone*zoneは、そのzoneにメモリを割り当てることを示す.
int orderは割当ページの階数を表します
gfp_t gfp_flags割り当ての識別
page = buffered_rmqueue(preferred_zone, zone, order,
gfp_mask, migratetype);
/*
* Really, prep_compound_page() should be called from __rmqueue_bulk(). But
* we cheat by calling it from here, in the order > 0 path. Saves a branch
* or two.
*/
static inline
struct page *buffered_rmqueue(struct zone *preferred_zone,
struct zone *zone, int order, gfp_t gfp_flags,
int migratetype)
{
unsigned long flags;
struct page *page;
int cold = !!(gfp_flags & __GFP_COLD);//
again:
if (likely(order == 0)) {//
struct per_cpu_pages *pcp;
struct list_head *list;
local_irq_save(flags);// CPU ,
pcp = &this_cpu_ptr(zone->pageset)->pcp;// cpu
list = &pcp->lists[migratetype];// , freelist
if (list_empty(list)) {// , ; cpu
pcp->count += rmqueue_bulk(zone, 0,
pcp->batch, list,
migratetype, cold);// ,
if (unlikely(list_empty(list)))
goto failed;
}
if (cold)
page = list_entry(list->prev, struct page, lru);
else
page = list_entry(list->next, struct page, lru);
list_del(&page->lru);
pcp->count--;
} else {
if (unlikely(gfp_flags & __GFP_NOFAIL)) {
/*
* __GFP_NOFAIL is not to be used in new code.
*
* All __GFP_NOFAIL callers should be fixed so that they
* properly detect and handle allocation failures.
*
* We most definitely don't want callers attempting to
* allocate greater than order-1 page units with
* __GFP_NOFAIL.
*/
WARN_ON_ONCE(order > 1);
}
spin_lock_irqsave(&zone->lock, flags);
page = __rmqueue(zone, order, migratetype);
spin_unlock(&zone->lock);
if (!page)
goto failed;
__mod_zone_freepage_state(zone, -(1 << order),
get_pageblock_migratetype(page));
}
__count_zone_vm_events(PGALLOC, zone, 1 << order);
zone_statistics(preferred_zone, zone, gfp_flags);
local_irq_restore(flags);
VM_BUG_ON(bad_range(zone, page));
if (prep_new_page(page, order, gfp_flags))
goto again;
return page;
failed:
local_irq_restore(flags);
return NULL;
}
struct zone構造体にstruct per_がありますcpu_pageset __percpu *pageset; 冷熱分配器に使用されるメンバーで、ホットページはcpuのキャッシュにすでに存在することを示す.
struct per_cpu_pageset {
struct per_cpu_pages pcp;
#ifdef CONFIG_NUMA
s8 expire;
#endif
#ifdef CONFIG_SMP
s8 stat_threshold;
s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS];
#endif
};
cpuキャッシュページ配列
struct per_cpu_pages {
int count; /* number of pages in the list */
int high; /* high watermark, emptying needed */
int batch; /* chunk size for buddy add/remove */ , 。 ,
/* Lists of pages, one per migrate type stored on the pcp-lists */
struct list_head lists[MIGRATE_PCPTYPES];//
};
パートナーシステムからページを取得し、cpuのキャッシュに埋め込みます.
/*
* Obtain a specified number of elements from the buddy allocator, all under
* a single hold of the lock, for efficiency. Add them to the supplied list.
* Returns the number of new pages which were placed at *list.
*/
static int rmqueue_bulk(struct zone *zone, unsigned int order,
unsigned long count, struct list_head *list,
int migratetype, int cold)
{
int mt = migratetype, i;
spin_lock(&zone->lock);
for (i = 0; i < count; ++i) {// ,
struct page *page = __rmqueue(zone, order, migratetype);//
if (unlikely(page == NULL))
break;
/*
* Split buddy pages returned by expand() are received here
* in physical page order. The page is added to the callers and
* list and the list head then moves forward. From the callers
* perspective, the linked list is ordered by page number in
* some conditions. This is useful for IO devices that can
* merge IO requests if the physical pages are ordered
* properly.
*/
if (likely(cold == 0))
list_add(&page->lru, list);// ,
else
list_add_tail(&page->lru, list);//
if (IS_ENABLED(CONFIG_CMA)) {// CONFIG_CMA
mt = get_pageblock_migratetype(page);//
if (!is_migrate_cma(mt) && !is_migrate_isolate(mt))// MIGRATE_CMA MIGRATE_CMA
mt = migratetype;
}
set_freepage_migratetype(page, mt); // page
list = &page->lru;//
if (is_migrate_cma(mt))// MIGRATE_CMA
__mod_zone_page_state(zone, NR_FREE_CMA_PAGES,
-(1 << order));// cma
}
__mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));//
spin_unlock(&zone->lock);
return i;// cpu
}
対応するタイプのページ数の変更
static inline void __mod_zone_page_state(struct zone *zone,
enum zone_stat_item item, int delta)
{
zone_page_state_add(delta, zone, item);
}
static inline void zone_page_state_add(long x, struct zone *zone,
enum zone_stat_item item)
{
atomic_long_add(x, &zone->vm_stat[item]);
atomic_long_add(x, &vm_stat[item]);
}
/*
* Do the hard work of removing an element from the buddy allocator.
* Call me with the zone->lock already held.
*/
static struct page *__rmqueue(struct zone *zone, unsigned int order,
int migratetype)
{
struct page *page;
retry_reserve:
page = __rmqueue_smallest(zone, order, migratetype);// , zone
if (unlikely(!page) && migratetype != MIGRATE_RESERVE) {// ,
page = __rmqueue_fallback(zone, order, migratetype);// ,
/*
* Use MIGRATE_RESERVE rather than fail an allocation. goto
* is used because __rmqueue_smallest is an inline function
* and we want just one call site
*/
if (!page) {// , MIGRATE_RESERVE
migratetype = MIGRATE_RESERVE;
goto retry_reserve;//
}
}
trace_mm_page_alloc_zone_locked(page, order, migratetype);
return page;
}
/*
* Go through the free lists for the given migratetype and remove
* the smallest available page from the freelists
*/
static inline
struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
int migratetype)
{
unsigned int current_order;
struct free_area * area;
struct page *page;
/* Find a page of the appropriate size in the preferred list */
for (current_order = order; current_order < MAX_ORDER; ++current_order) {//
area = &(zone->free_area[current_order]);
if (list_empty(&area->free_list[migratetype]))//
continue;
//
page = list_entry(area->free_list[migratetype].next,
struct page, lru);
list_del(&page->lru);
rmv_page_order(page);// , buddy , page->_mapcount = -1
area->nr_free--;// ,nr_free ,
expand(zone, page, order, current_order, area, migratetype);// , ,
return page;
}
return NULL;
}
これはbuddyの重要な関数です.高次割当てでメモリブロックが得られた場合、例えば8次割当てでメモリブロックが得られた場合です.私たちに必要なのは低価格です.例えば6です.では、次の関数を呼び出して、8次分配で得られたメモリブロックを7次に掛けて、そのメモリブロックから半分を切り取って、6次にします.このとき、私たちが分配しなければならないメモリ階層であることを比較してみると、直接戻ります.
パラメータを説明します.
struct zone*zone:すべての操作はこのzoneで完了します.
struct page*page:上位に割り当てられたページブロック
int low:必要なメモリ階層
int high:この階層に割り当てられたメモリ
struct free_area*area:zoneの上位アイドルページ配列項目です
int migratetype:移行タイプ
/*
* The order of subdivision here is critical for the IO subsystem.
* Please do not alter this order without good reasons and regression
* testing. Specifically, as large blocks of memory are subdivided,
* the order in which smaller blocks are delivered depends on the order
* they're subdivided in this function. This is the primary factor
* influencing the order in which pages are delivered to the IO
* subsystem according to empirical testing, and this is also justified
* by considering the behavior of a buddy system containing a single
* large block of memory acted on by a series of small allocations.
* This behavior is a critical factor in sglist merging's success.
*
* -- nyc
*/
static inline void expand(struct zone *zone, struct page *page,
int low, int high, struct free_area *area,
int migratetype)
{
unsigned long size = 1 << high;
while (high > low) {//
area--;// ,
high--;//
size >>= 1;//
VM_BUG_ON(bad_range(zone, &page[size]));
#ifdef CONFIG_DEBUG_PAGEALLOC
if (high < debug_guardpage_minorder()) {
/*
* Mark as guard pages (or page), that will allow to
* merge back to allocator when buddy will be freed.
* Corresponding page table entries will not be touched,
* pages will stay not present in virtual address space
*/
INIT_LIST_HEAD(&page[size].lru);
set_page_guard_flag(&page[size]);
set_page_private(&page[size], high);
/* Guard pages are not available for any usage */
__mod_zone_freepage_state(zone, -(1 << high),
migratetype);
continue;
}
#endif
list_add(&page[size].lru, &area->free_list[migratetype]);//
area->nr_free++;//
set_page_order(&page[size], high);// private , buddy ,
}
}
この関数に移動すると、上で指定した移行タイプがパートナーシステムからメモリを割り当てるのに失敗したため、代替移行リストを使用します.
/*
* This array describes the order lists are fallen back to when
* the free lists for the desirable migrate type are depleted
*/
static int fallbacks[MIGRATE_TYPES][4] = {
[MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE },
[MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE },
#ifdef CONFIG_CMA
[MIGRATE_MOVABLE] = { MIGRATE_CMA, MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE },
[MIGRATE_CMA] = { MIGRATE_RESERVE }, /* Never used */
#else
[MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE },
#endif
[MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */
#ifdef CONFIG_MEMORY_ISOLATION
[MIGRATE_ISOLATE] = { MIGRATE_RESERVE }, /* Never used */
#endif
};
上記の代替移行タイプに基づいて巡回
/* Remove an element from the buddy allocator from the fallback list */
static inline struct page *
__rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
{
struct free_area * area;
int current_order;
struct page *page;
int migratetype, i;
/* Find the largest possible block of pages in the other list */
for (current_order = MAX_ORDER-1; current_order >= order;
--current_order) {// , ,
for (i = 0;; i++) {
migratetype = fallbacks[start_migratetype][i];
/* MIGRATE_RESERVE handled later if necessary */
if (migratetype == MIGRATE_RESERVE)// ,
break;
area = &(zone->free_area[current_order]);//
if (list_empty(&area->free_list[migratetype]))// ,
continue;
page = list_entry(area->free_list[migratetype].next,
struct page, lru);// ,
area->nr_free--;
/*
* If breaking a large block of pages, move all free
* pages to the preferred allocation list. If falling
* back for a reclaimable kernel allocation, be more
* aggressive about taking ownership of free pages
*
* On the other hand, never change migration
* type of MIGRATE_CMA pageblocks nor move CMA
* pages on different free lists. We don't
* want unmovable pages to be allocated from
* MIGRATE_CMA areas.
*/// ,
// ,pageblock_order ( , MAX_ORDER - 1);pageblock_nr_pages
if (!is_migrate_cma(migratetype) &&// CMA
(unlikely(current_order >= pageblock_order / 2) || // , start_migratetype
start_migratetype == MIGRATE_RECLAIMABLE || // , ,
page_group_by_mobility_disabled)) {
int pages;
pages = move_freepages_block(zone, page,
start_migratetype);// start_migratetype
/* Claim the whole block if over half of it is free */
if (pages >= (1 << (pageblock_order-1)) ||
page_group_by_mobility_disabled)
set_pageblock_migratetype(page,
start_migratetype);// , move_freepage_block()
migratetype = start_migratetype;
}
/* Remove the page from the freelists */
list_del(&page->lru);
rmv_page_order(page);// buddy , page buddy
/* Take ownership for orders >= pageblock_order */
if (current_order >= pageblock_order &&
!is_migrate_cma(migratetype))
change_pageblock_range(page, current_order,
start_migratetype);// pageblock start_migratetype
expand(zone, page, order, current_order, area,
is_migrate_cma(migratetype)
? migratetype : start_migratetype);// ,
trace_mm_page_alloc_extfrag(page, order, current_order,
start_migratetype, migratetype);
return page;
}
}
return NULL;
}
int move_freepages_block(struct zone *zone, struct page *page,
int migratetype)
{
unsigned long start_pfn, end_pfn;
struct page *start_page, *end_page;
start_pfn = page_to_pfn(page);//
start_pfn = start_pfn & ~(pageblock_nr_pages-1);//pageblock_nr_pages
start_page = pfn_to_page(start_pfn);
end_page = start_page + pageblock_nr_pages - 1;// pgeblock_nr_pages , , pageblock_nr_pages ,
end_pfn = start_pfn + pageblock_nr_pages - 1;
/* Do not cross zone boundaries */
if (!zone_spans_pfn(zone, start_pfn))
start_page = page;
if (!zone_spans_pfn(zone, end_pfn))// zone , zone
return 0;
return move_freepages(zone, start_page, end_page, migratetype);// move_freepages()
}
/*
* Move the free pages in a range to the free lists of the requested type.
* Note that start_page and end_pages are not aligned on a pageblock
* boundary. If alignment is required, use move_freepages_block()
*/// ?? pageblock_nr_pages , ??????????
int move_freepages(struct zone *zone,
struct page *start_page, struct page *end_page,
int migratetype)
{
struct page *page;
unsigned long order;
int pages_moved = 0;
#ifndef CONFIG_HOLES_IN_ZONE
/*
* page_zone is not safe to call in this context when
* CONFIG_HOLES_IN_ZONE is set. This bug check is probably redundant
* anyway as we check zone boundaries in move_freepages_block().
* Remove at a later date when no bug reports exist related to
* grouping pages by mobility
*/
BUG_ON(page_zone(start_page) != page_zone(end_page));
#endif
for (page = start_page; page <= end_page;) {
/* Make sure we are not inadvertently changing nodes */
VM_BUG_ON(page_to_nid(page) != zone_to_nid(zone));
if (!pfn_valid_within(page_to_pfn(page))) {
page++;
continue;
}
if (!PageBuddy(page)) {//
page++;
continue;
}
order = page_order(page);//
list_move(&page->lru,
&zone->free_area[order].free_list[migratetype]);//
set_freepage_migratetype(page, migratetype);// ,page->index = migratetype
page += 1 << order;// 2^order
pages_moved += 1 << order;
}
return pages_moved;// ,
}
static void change_pageblock_range(struct page *pageblock_page,
int start_order, int migratetype)
{
int nr_pageblocks = 1 << (start_order - pageblock_order);// pageblock_order
while (nr_pageblocks--) {// pageblock_order
set_pageblock_migratetype(pageblock_page, migratetype);//
pageblock_page += pageblock_nr_pages;//
}
}