linuxメモリ管理-paging_init関数
17009 ワード
ソースコードは2.6.37カーネル、x 86_64アーキテクチャ、メモリモデルSparse Memory
paging_init関数setup_Arch関数で呼び出され、すべてのノードのpg_を初期化します.data_t構造,およびノードに対応する管理領域zone構造,page構造.
呼び出しの大まかな手順は、次のとおりです.
start_kernel() --> setup_arch() --> paging_init() --> free_area_init_nodes() --> free_area_init_node() --> free_area_init_core() --> memmap_init()
具体的には以下の通りです.
paging_init()setup_Arch()で呼び出され、次のように定義されます.
paging_Init()がfree_を呼び出しましたarea_init_Nodes関数は、すべてのノードのpg_を初期化します.data_tとzone、pageのデータは、管理エリア情報を印刷します.
そしてfree_area_init_Nodes関数では各ノードをループし、ループでfree_が呼び出されます.area_init_Node関数は、ノードに対応するpg_を初期化するdata_tとzone、pageのデータ:
呼び出しを続行free_area_init_core関数、ノードのpg_の初期化を続行data_t構造、zoneおよびpage構造の初期化、
free_area_init_core関数はzoneを初期化するコアです.
free_area_init_core関数呼び出しmemmap_init関数を使用してpage構造を初期化します.
こうしてpaging_Init関数、pg_data_t,zone,pageなどの構造は初期化が完了した.
paging_init関数setup_Arch関数で呼び出され、すべてのノードのpg_を初期化します.data_t構造,およびノードに対応する管理領域zone構造,page構造.
呼び出しの大まかな手順は、次のとおりです.
start_kernel() --> setup_arch() --> paging_init() --> free_area_init_nodes() --> free_area_init_node() --> free_area_init_core() --> memmap_init()
具体的には以下の通りです.
paging_init()setup_Arch()で呼び出され、次のように定義されます.
void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES]; /* MAX_NR_ZONES = 4 */
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; /* 16M */
max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; /* 4G */
max_zone_pfns[ZONE_NORMAL] = max_pfn; /* */
/* node page mem_sections
* early_node_map memory region page mem_section 。
* mem_section 。
* kernel mem_section page。
* , page, mem_section 。
* sparse_memory_present_with_active_regions node page mem_sections.
*/
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
/*
* clear the default setting with node 0
* note: don't use nodes_clear here, that is really clearing when
* numa support is not compiled in, and later node_set_state
* will not set it back.
*/
node_clear_state(0, N_NORMAL_MEMORY);
/* pg_data_t zone、page */
free_area_init_nodes(max_zone_pfns);
}
paging_Init()がfree_を呼び出しましたarea_init_Nodes関数は、すべてのノードのpg_を初期化します.data_tとzone、pageのデータは、管理エリア情報を印刷します.
/**
* free_area_init_nodes - Initialise all pg_data_t and zone data
* @max_zone_pfn: an array of max PFNs for each zone
*
* This will call free_area_init_node() for each active node in the system.
* Using the page ranges provided by add_active_range(), the size of each
* zone in each node and their holes is calculated. If the maximum PFN
* between two adjacent zones match, it is assumed that the zone is empty.
* For example, if arch_max_dma_pfn == arch_max_dma32_pfn, it is assumed
* that arch_max_dma32_pfn has no pages. It is also assumed that a zone
* starts where the previous one ended. For example, ZONE_DMA32 starts
* at arch_max_dma_pfn.
*/
/* pg_data_t zone、page */
void __init free_area_init_nodes(unsigned long *max_zone_pfn)
{
unsigned long nid;
int i;
/* Sort early_node_map as initialisation assumes it is sorted */
sort_node_map();
/*
* arch_zone_lowest_possible_pfn arch_zone_highest_possible_pfn
* arch_zone_lowest_possible_pfn = { pfn-0 64K, 16M , 4G , 0 }
* arch_zone_highest_possible_pfn = { 16M , 4G , max_pfn, 0 }
* { DMA , DMA_32, NORMAL , MOVABLE}
*/
/* Record where the zone boundaries are */
memset(arch_zone_lowest_possible_pfn, 0,
sizeof(arch_zone_lowest_possible_pfn));
memset(arch_zone_highest_possible_pfn, 0,
sizeof(arch_zone_highest_possible_pfn));
/* early_node_map pfn */
arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions();
arch_zone_highest_possible_pfn[0] = max_zone_pfn[0]; /* max_zone_pfn = {16M, 4G, max_pfn} */
for (i = 1; i < MAX_NR_ZONES; i++) {
if (i == ZONE_MOVABLE)
continue;
arch_zone_lowest_possible_pfn[i] =
arch_zone_highest_possible_pfn[i-1];
arch_zone_highest_possible_pfn[i] =
max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]);
}
arch_zone_lowest_possible_pfn[ZONE_MOVABLE] = 0; /* arch_zone[ZONE_MOVABLE] */
arch_zone_highest_possible_pfn[ZONE_MOVABLE] = 0;
/* Find the PFNs that ZONE_MOVABLE begins at in each node */
/* cpu node zone_movable_pfn。
* ZONE_MOVABLE kernel ,
* page ,
* page kernel core, 。
* find_zone_movable_pfns_for_nodes
* early_node_map node
* node movable page .
*/
memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn));
find_zone_movable_pfns_for_nodes(zone_movable_pfn);
/* DMA、DMA_32、ZONE_NORMAL */
/* Print out the zone ranges */
printk("Zone PFN ranges:
");
for (i = 0; i < MAX_NR_ZONES; i++) {
if (i == ZONE_MOVABLE)
continue;
printk(" %-8s ", zone_names[i]);
if (arch_zone_lowest_possible_pfn[i] ==
arch_zone_highest_possible_pfn[i])
printk("empty
");
else
printk("%0#10lx -> %0#10lx
",
arch_zone_lowest_possible_pfn[i],
arch_zone_highest_possible_pfn[i]);
}
/* ZONE_MOVABLE */
/* Print out the PFNs ZONE_MOVABLE begins at in each node */
printk("Movable zone start PFN for each node
");
for (i = 0; i < MAX_NUMNODES; i++) {
if (zone_movable_pfn[i])
printk(" Node %d: %lu
", i, zone_movable_pfn[i]);
}
/* early_node_map */
/* Print out the early_node_map[] */
printk("early_node_map[%d] active PFN ranges
", nr_nodemap_entries);
for (i = 0; i < nr_nodemap_entries; i++)
printk(" %3d: %0#10lx -> %0#10lx
", early_node_map[i].nid,
early_node_map[i].start_pfn,
early_node_map[i].end_pfn);
/* Initialise every node */
mminit_verify_pageflags_layout();
setup_nr_node_ids();
/* */
for_each_online_node(nid) {
pg_data_t *pgdat = NODE_DATA(nid); /* pg_data_t */
/* free_area_init_node nid pg_data_t zone、page */
free_area_init_node(nid, NULL,
find_min_pfn_for_node(nid), NULL); /* find_min_pfn_for_node
early_node_map pfn */
/* Any memory on that node */
if (pgdat->node_present_pages)
node_set_state(nid, N_HIGH_MEMORY); /* regular memory */
check_for_regular_memory(pgdat); /* CONFIG_HIGHMEM */
}
}
そしてfree_area_init_Nodes関数では各ノードをループし、ループでfree_が呼び出されます.area_init_Node関数は、ノードに対応するpg_を初期化するdata_tとzone、pageのデータ:
/* nid pg_data_t zone、page
* @ nid
* @ zone_size null
* @ node_start_pfn nid pfn
* @ zholes_size null
*/
void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
unsigned long node_start_pfn, unsigned long *zholes_size)
{
pg_data_t *pgdat = NODE_DATA(nid); /* pg_data_t */
pgdat->node_id = nid; /* */
pgdat->node_start_pfn = node_start_pfn; /* */
/* node pages ,
* pg_data_t node_spanned_pages ,
* node_present_pages ,
*/
calculate_node_totalpages(pgdat, zones_size, zholes_size);
/* CONFIG_FLAT_NODE_MEM_MAP , */
alloc_node_mem_map(pgdat);
#ifdef CONFIG_FLAT_NODE_MEM_MAP
printk(KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx
",
nid, (unsigned long)pgdat,
(unsigned long)pgdat->node_mem_map);
#endif
/* free_area_init_core pg_data_t , zone page */
free_area_init_core(pgdat, zones_size, zholes_size);
}
呼び出しを続行free_area_init_core関数、ノードのpg_の初期化を続行data_t構造、zoneおよびpage構造の初期化、
free_area_init_core関数はzoneを初期化するコアです.
/*
* Set up the zone data structures:
* - mark all pages reserved
* - mark all memory queues empty
* - clear the memory bitmaps
*/
/* pg_data_t , zone page */
static void __paginginit free_area_init_core(struct pglist_data *pgdat,
unsigned long *zones_size, unsigned long *zholes_size)
{
enum zone_type j;
int nid = pgdat->node_id;
unsigned long zone_start_pfn = pgdat->node_start_pfn;
int ret;
pgdat_resize_init(pgdat); /* pgdat->node_size_lock */
pgdat->nr_zones = 0;
init_waitqueue_head(&pgdat->kswapd_wait); /* pgdat->kswapd_wait */
pgdat->kswapd_max_order = 0; /* , 2^kswapd_max_order */
pgdat_page_cgroup_init(pgdat); /* */
/* */
for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j;
unsigned long size, realsize, memmap_pages;
enum lru_list l;
/* size , */
size = zone_spanned_pages_in_node(nid, j, zones_size);
/* realsize , */
realsize = size - zone_absent_pages_in_node(nid, j,
zholes_size);
/*
* Adjust realsize so that it accounts for how much memory
* is used by this zone for memmap. This affects the watermark
* and per-cpu initialisations
*/
/* realsize , page */
memmap_pages = /* memmap_pags page */
PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT;
if (realsize >= memmap_pages) {
realsize -= memmap_pages;
if (memmap_pages)
printk(KERN_DEBUG
" %s zone: %lu pages used for memmap
",
zone_names[j], memmap_pages);
} else /* page */
printk(KERN_WARNING
" %s zone: %lu pages exceeds realsize %lu
",
zone_names[j], memmap_pages, realsize);
/* realsize , DMA */
/* Account for reserved pages */
if (j == 0 && realsize > dma_reserve) {
realsize -= dma_reserve;
printk(KERN_DEBUG " %s zone: %lu pages reserved
",
zone_names[0], dma_reserve);
}
if (!is_highmem_idx(j))
nr_kernel_pages += realsize;
nr_all_pages += realsize;
zone->spanned_pages = size; /* zone->spanned_pages */
zone->present_pages = realsize; /* zone->present+pages */
#ifdef CONFIG_NUMA
zone->node = nid; /* zone */
/* */
zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio)
/ 100;
/* slab */
zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100;
#endif
zone->name = zone_names[j]; /* zone */
/* */
spin_lock_init(&zone->lock);
spin_lock_init(&zone->lru_lock);
zone_seqlock_init(zone);
zone->zone_pgdat = pgdat; /* pg_data_t */
zone_pcp_init(zone); /* cpu */
/* lru */
for_each_lru(l) {
INIT_LIST_HEAD(&zone->lru[l].list);
zone->reclaim_stat.nr_saved_scan[l] = 0;
}
zone->reclaim_stat.recent_rotated[0] = 0;
zone->reclaim_stat.recent_rotated[1] = 0;
zone->reclaim_stat.recent_scanned[0] = 0;
zone->reclaim_stat.recent_scanned[1] = 0;
zap_zone_vm_stats(zone); /* zone->vm_stat 0 */
zone->flags = 0;
if (!size)
continue;
set_pageblock_order(pageblock_default_order()); /* pageblock_default_order() 9*/
setup_usemap(pgdat, zone, size); /* CONFIG_SPARSEMEM */
/* pgdat->nr_zones zone->zone_start_pfn
* zone->free_area
* zone->wait_table
*/
ret = init_currently_empty_zone(zone, zone_start_pfn,
size, MEMMAP_EARLY);
BUG_ON(ret);
memmap_init(size, nid, j, zone_start_pfn); /* zone page */
zone_start_pfn += size; /* zone_start_pfn zone */
}
}
free_area_init_core関数呼び出しmemmap_init関数を使用してpage構造を初期化します.
#define memmap_init(size, nid, zone, start_pfn) \
memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY)
/*
* Initially all pages are reserved - free ones are freed
* up by free_all_bootmem() once the early boot process is
* done. Non-atomic initialization, single-pass.
*/
/* zone page */
void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
unsigned long start_pfn, enum memmap_context context)
{
struct page *page;
unsigned long end_pfn = start_pfn + size;
unsigned long pfn;
struct zone *z;
if (highest_memmap_pfn < end_pfn - 1) /* mem_map */
highest_memmap_pfn = end_pfn - 1;
z = &NODE_DATA(nid)->node_zones[zone]; /* zone */
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
/*
* There can be holes in boot-time mem_map[]s
* handed to this function. They do not
* exist on hotplugged memory.
*/
if (context == MEMMAP_EARLY) {
if (!early_pfn_valid(pfn))
continue;
if (!early_pfn_in_nid(pfn, nid))
continue;
}
page = pfn_to_page(pfn); /* pfn page , page */
set_page_links(page, zone, nid, pfn);/* page->flags zone、node、section */
mminit_verify_page_links(page, zone, nid, pfn);/* DEBUG , */
init_page_count(page); /* page->_count 1 */
reset_page_mapcount(page); /* page->_mapcount -1 */
SetPageReserved(page); /* */
/*
* Mark the block movable so that blocks are reserved for
* movable at startup. This will force kernel allocations
* to reserve their blocks rather than leaking throughout
* the address space during boot when many long-lived
* kernel allocations are made. Later some blocks near
* the start are marked MIGRATE_RESERVE by
* setup_zone_migrate_reserve()
*
* bitmap is created for zone's valid pfn range. but memmap
* can be created for invalid pages (for alignment)
* check here not to call set_pageblock_migratetype() against
* pfn out of zone.
*/
if ((z->zone_start_pfn <= pfn)
&& (pfn < z->zone_start_pfn + z->spanned_pages)
&& !(pfn & (pageblock_nr_pages - 1)))
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
INIT_LIST_HEAD(&page->lru); /* lru */
#ifdef WANT_PAGE_VIRTUAL
/* The shift won't overflow because ZONE_NORMAL is below 4G. */
if (!is_highmem_idx(zone))
/* page->virtual */
set_page_address(page, __va(pfn << PAGE_SHIFT));
#endif
}
}
こうしてpaging_Init関数、pg_data_t,zone,pageなどの構造は初期化が完了した.