linuxメモリ管理-paging_init関数

17009 ワード

ソースコードは2.6.37カーネル、x 86_64アーキテクチャ、メモリモデルSparse Memory
paging_init関数setup_Arch関数で呼び出され、すべてのノードのpg_を初期化します.data_t構造,およびノードに対応する管理領域zone構造,page構造.
呼び出しの大まかな手順は、次のとおりです.
start_kernel()   --> setup_arch()      --> paging_init()         --> free_area_init_nodes()            --> free_area_init_node()               --> free_area_init_core()                  --> memmap_init()
具体的には以下の通りです.
paging_init()setup_Arch()で呼び出され、次のように定義されます.
void __init paging_init(void)
{
        unsigned long max_zone_pfns[MAX_NR_ZONES]; /* MAX_NR_ZONES = 4 */

        memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
        max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; /* 16M */
        max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; /* 4G */
        max_zone_pfns[ZONE_NORMAL] = max_pfn; /*        */

        /*    node      page  mem_sections 
         *    early_node_map      memory region page      mem_section    。
         *   mem_section             。
         * kernel   mem_section               page。
         *         ,          page,        mem_section   。
         *  sparse_memory_present_with_active_regions        node      page  mem_sections.
         */
        sparse_memory_present_with_active_regions(MAX_NUMNODES);
        sparse_init();

        /*   
         * clear the default setting with node 0
         * note: don't use nodes_clear here, that is really clearing when
         *       numa support is not compiled in, and later node_set_state
         *       will not set it back.
         */
        node_clear_state(0, N_NORMAL_MEMORY);

        /*      pg_data_t zone、page    */
        free_area_init_nodes(max_zone_pfns);
}

paging_Init()がfree_を呼び出しましたarea_init_Nodes関数は、すべてのノードのpg_を初期化します.data_tとzone、pageのデータは、管理エリア情報を印刷します.
/**
 * free_area_init_nodes - Initialise all pg_data_t and zone data
 * @max_zone_pfn: an array of max PFNs for each zone
 *
 * This will call free_area_init_node() for each active node in the system.
 * Using the page ranges provided by add_active_range(), the size of each
 * zone in each node and their holes is calculated. If the maximum PFN
 * between two adjacent zones match, it is assumed that the zone is empty.
 * For example, if arch_max_dma_pfn == arch_max_dma32_pfn, it is assumed
 * that arch_max_dma32_pfn has no pages. It is also assumed that a zone
 * starts where the previous one ended. For example, ZONE_DMA32 starts
 * at arch_max_dma_pfn.
 */
/*           pg_data_t zone、page    */
void __init free_area_init_nodes(unsigned long *max_zone_pfn)
{
        unsigned long nid; 
        int i;

        /* Sort early_node_map as initialisation assumes it is sorted */
        sort_node_map();

        /*   
         *     arch_zone_lowest_possible_pfn arch_zone_highest_possible_pfn         
         *   arch_zone_lowest_possible_pfn  = {  pfn-0 64K, 16M   , 4G     , 0      }
         *    arch_zone_highest_possible_pfn = {     16M      , 4G    , max_pfn, 0      }
         *                                     {     DMA      , DMA_32, NORMAL , MOVABLE}
         */
        /* Record where the zone boundaries are */
	memset(arch_zone_lowest_possible_pfn, 0,
                                sizeof(arch_zone_lowest_possible_pfn));
        memset(arch_zone_highest_possible_pfn, 0,
                                sizeof(arch_zone_highest_possible_pfn));
        /*   early_node_map    pfn */
        arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions();
        arch_zone_highest_possible_pfn[0] = max_zone_pfn[0]; /* max_zone_pfn = {16M, 4G, max_pfn} */
        for (i = 1; i < MAX_NR_ZONES; i++) {
                if (i == ZONE_MOVABLE)
                        continue;
                arch_zone_lowest_possible_pfn[i] =
                        arch_zone_highest_possible_pfn[i-1];
                arch_zone_highest_possible_pfn[i] =
                        max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]);
        }
        arch_zone_lowest_possible_pfn[ZONE_MOVABLE] = 0; /*   arch_zone[ZONE_MOVABLE]      */
        arch_zone_highest_possible_pfn[ZONE_MOVABLE] = 0;

        /* Find the PFNs that ZONE_MOVABLE begins at in each node */
        /*      cpu node  zone_movable_pfn。
         *  ZONE_MOVABLE  kernel            ,
         *               page      ,
         *       page   kernel core,       。
	 *  find_zone_movable_pfns_for_nodes       
         *  early_node_map    node          
         *     node movable page   .
         */
        memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn));
        find_zone_movable_pfns_for_nodes(zone_movable_pfn);

        /*   DMA、DMA_32、ZONE_NORMAL    */
        /* Print out the zone ranges */
        printk("Zone PFN ranges:
"); for (i = 0; i < MAX_NR_ZONES; i++) { if (i == ZONE_MOVABLE) continue; printk(" %-8s ", zone_names[i]); if (arch_zone_lowest_possible_pfn[i] == arch_zone_highest_possible_pfn[i]) printk("empty
"); else printk("%0#10lx -> %0#10lx
", arch_zone_lowest_possible_pfn[i], arch_zone_highest_possible_pfn[i]); } /* ZONE_MOVABLE */ /* Print out the PFNs ZONE_MOVABLE begins at in each node */ printk("Movable zone start PFN for each node
"); for (i = 0; i < MAX_NUMNODES; i++) { if (zone_movable_pfn[i]) printk(" Node %d: %lu
", i, zone_movable_pfn[i]); } /* early_node_map */ /* Print out the early_node_map[] */ printk("early_node_map[%d] active PFN ranges
", nr_nodemap_entries); for (i = 0; i < nr_nodemap_entries; i++) printk(" %3d: %0#10lx -> %0#10lx
", early_node_map[i].nid, early_node_map[i].start_pfn, early_node_map[i].end_pfn); /* Initialise every node */ mminit_verify_pageflags_layout(); setup_nr_node_ids(); /* */ for_each_online_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); /* pg_data_t */ /* free_area_init_node nid pg_data_t zone、page */ free_area_init_node(nid, NULL, find_min_pfn_for_node(nid), NULL); /* find_min_pfn_for_node early_node_map pfn */ /* Any memory on that node */ if (pgdat->node_present_pages) node_set_state(nid, N_HIGH_MEMORY); /* regular memory */ check_for_regular_memory(pgdat); /* CONFIG_HIGHMEM */ } }

そしてfree_area_init_Nodes関数では各ノードをループし、ループでfree_が呼び出されます.area_init_Node関数は、ノードに対応するpg_を初期化するdata_tとzone、pageのデータ:
/*      nid   pg_data_t zone、page   
 * @ nid       
 * @ zone_size  null
 * @ node_start_pfn  nid     pfn
 * @ zholes_size  null
 */     
void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
                unsigned long node_start_pfn, unsigned long *zholes_size)
{       
        pg_data_t *pgdat = NODE_DATA(nid); /*       pg_data_t   */

        pgdat->node_id = nid; /*         */
        pgdat->node_start_pfn = node_start_pfn; /*               */

        /*      node     pages  ,
         *   pg_data_t node_spanned_pages          ,   
         *                node_present_pages        ,    
         */
        calculate_node_totalpages(pgdat, zones_size, zholes_size);

        /*      CONFIG_FLAT_NODE_MEM_MAP    ,       */
        alloc_node_mem_map(pgdat);
#ifdef CONFIG_FLAT_NODE_MEM_MAP
        printk(KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx
", nid, (unsigned long)pgdat, (unsigned long)pgdat->node_mem_map); #endif /* free_area_init_core pg_data_t , zone page */ free_area_init_core(pgdat, zones_size, zholes_size); }

呼び出しを続行free_area_init_core関数、ノードのpg_の初期化を続行data_t構造、zoneおよびpage構造の初期化、
free_area_init_core関数はzoneを初期化するコアです.
/*
 * Set up the zone data structures:
 *   - mark all pages reserved
 *   - mark all memory queues empty
 *   - clear the memory bitmaps
 */
/*      pg_data_t  ,   zone  page   */
static void __paginginit free_area_init_core(struct pglist_data *pgdat,
                unsigned long *zones_size, unsigned long *zholes_size)
{
        enum zone_type j;
        int nid = pgdat->node_id;
        unsigned long zone_start_pfn = pgdat->node_start_pfn;
        int ret;

        pgdat_resize_init(pgdat); /*    pgdat->node_size_lock    */
        pgdat->nr_zones = 0;
        init_waitqueue_head(&pgdat->kswapd_wait); /*    pgdat->kswapd_wait     */
        pgdat->kswapd_max_order = 0; /*                   , 2^kswapd_max_order */
        pgdat_page_cgroup_init(pgdat); /*     */

        /*         */
        for (j = 0; j < MAX_NR_ZONES; j++) {
                struct zone *zone = pgdat->node_zones + j;
                unsigned long size, realsize, memmap_pages;
                enum lru_list l;

                /* size          ,    */
                size = zone_spanned_pages_in_node(nid, j, zones_size);
                /* realsize         ,     */
                realsize = size - zone_absent_pages_in_node(nid, j,
                                                                zholes_size);

		/*
                 * Adjust realsize so that it accounts for how much memory
                 * is used by this zone for memmap. This affects the watermark
                 * and per-cpu initialisations
                 */
                /*   realsize   ,   page           */
                memmap_pages =  /* memmap_pags          page         */
                        PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT;
                if (realsize >= memmap_pages) {
                        realsize -= memmap_pages;
                        if (memmap_pages)
                                printk(KERN_DEBUG
                                       "  %s zone: %lu pages used for memmap
", zone_names[j], memmap_pages); } else /* page */ printk(KERN_WARNING " %s zone: %lu pages exceeds realsize %lu
", zone_names[j], memmap_pages, realsize); /* realsize , DMA */ /* Account for reserved pages */ if (j == 0 && realsize > dma_reserve) { realsize -= dma_reserve; printk(KERN_DEBUG " %s zone: %lu pages reserved
", zone_names[0], dma_reserve); } if (!is_highmem_idx(j)) nr_kernel_pages += realsize; nr_all_pages += realsize; zone->spanned_pages = size; /* zone->spanned_pages */ zone->present_pages = realsize; /* zone->present+pages */ #ifdef CONFIG_NUMA zone->node = nid; /* zone */ /* */ zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio) / 100; /* slab */ zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100; #endif zone->name = zone_names[j]; /* zone */ /* */ spin_lock_init(&zone->lock); spin_lock_init(&zone->lru_lock); zone_seqlock_init(zone); zone->zone_pgdat = pgdat; /* pg_data_t */ zone_pcp_init(zone); /* cpu */ /* lru */ for_each_lru(l) { INIT_LIST_HEAD(&zone->lru[l].list); zone->reclaim_stat.nr_saved_scan[l] = 0; } zone->reclaim_stat.recent_rotated[0] = 0; zone->reclaim_stat.recent_rotated[1] = 0; zone->reclaim_stat.recent_scanned[0] = 0; zone->reclaim_stat.recent_scanned[1] = 0; zap_zone_vm_stats(zone); /* zone->vm_stat 0 */ zone->flags = 0; if (!size) continue; set_pageblock_order(pageblock_default_order()); /* pageblock_default_order() 9*/ setup_usemap(pgdat, zone, size); /* CONFIG_SPARSEMEM */ /* pgdat->nr_zones zone->zone_start_pfn * zone->free_area * zone->wait_table */ ret = init_currently_empty_zone(zone, zone_start_pfn, size, MEMMAP_EARLY); BUG_ON(ret); memmap_init(size, nid, j, zone_start_pfn); /* zone page */ zone_start_pfn += size; /* zone_start_pfn zone */ } }

free_area_init_core関数呼び出しmemmap_init関数を使用してpage構造を初期化します.
#define memmap_init(size, nid, zone, start_pfn) \
        memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY)
/*
 * Initially all pages are reserved - free ones are freed
 * up by free_all_bootmem() once the early boot process is
 * done. Non-atomic initialization, single-pass.
 */
/*     zone   page    */
void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
                unsigned long start_pfn, enum memmap_context context)
{
      struct page *page;
      unsigned long end_pfn = start_pfn + size;
    unsigned long pfn;
        struct zone *z;

        if (highest_memmap_pfn < end_pfn - 1)  /*     mem_map     */
                highest_memmap_pfn = end_pfn - 1;

        z = &NODE_DATA(nid)->node_zones[zone]; /*   zone    */
        for (pfn = start_pfn; pfn < end_pfn; pfn++) {
                /*
                 * There can be holes in boot-time mem_map[]s
                 * handed to this function.  They do not
                 * exist on hotplugged memory.
                 */
                if (context == MEMMAP_EARLY) {
                        if (!early_pfn_valid(pfn))
                                continue;
                        if (!early_pfn_in_nid(pfn, nid))
                                continue;
                }
                page = pfn_to_page(pfn); /*   pfn   page  ,   page      */
                set_page_links(page, zone, nid, pfn);/*   page->flags   zone、node、section     */

 		mminit_verify_page_links(page, zone, nid, pfn);/* DEBUG ,    */
                init_page_count(page); /*   page->_count     1 */
                reset_page_mapcount(page); /*   page->_mapcount -1 */
                SetPageReserved(page); /*      */

                /*
                 * Mark the block movable so that blocks are reserved for
                 * movable at startup. This will force kernel allocations
                 * to reserve their blocks rather than leaking throughout
                 * the address space during boot when many long-lived
                 * kernel allocations are made. Later some blocks near
                 * the start are marked MIGRATE_RESERVE by
                 * setup_zone_migrate_reserve()
                 *
                 * bitmap is created for zone's valid pfn range. but memmap
                 * can be created for invalid pages (for alignment)
                 * check here not to call set_pageblock_migratetype() against
                 * pfn out of zone.
                 */
                if ((z->zone_start_pfn <= pfn)
                    && (pfn < z->zone_start_pfn + z->spanned_pages)
                    && !(pfn & (pageblock_nr_pages - 1)))
                        set_pageblock_migratetype(page, MIGRATE_MOVABLE);

                INIT_LIST_HEAD(&page->lru); /*    lru   */
#ifdef WANT_PAGE_VIRTUAL
                /* The shift won't overflow because ZONE_NORMAL is below 4G. */
                if (!is_highmem_idx(zone))
                        /*   page->virtual         */
                        set_page_address(page, __va(pfn << PAGE_SHIFT));
#endif
        }
}

こうしてpaging_Init関数、pg_data_t,zone,pageなどの構造は初期化が完了した.