Merge branch 'drm-patches' of master.kernel.org:/pub/scm/linux/kernel/git/airlied...
[linux-drm-fsl-dcu.git] / arch / i386 / mm / init.c
index ae436882af7a11dce76b34ee711fa6ce80c0502e..1a7197e89eb4ce5e39cbf4eb9fa8ac72dfd7697f 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/init.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
+#include <linux/pfn.h>
 #include <linux/poison.h>
 #include <linux/bootmem.h>
 #include <linux/slab.h>
@@ -42,6 +43,7 @@
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
+#include <asm/paravirt.h>
 
 unsigned int __VMALLOC_RESERVE = 128 << 20;
 
@@ -61,17 +63,18 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
        pmd_t *pmd_table;
                
 #ifdef CONFIG_X86_PAE
-       pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
-       paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
-       set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
-       pud = pud_offset(pgd, 0);
-       if (pmd_table != pmd_offset(pud, 0)) 
-               BUG();
-#else
+       if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
+               pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+
+               paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
+               set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
+               pud = pud_offset(pgd, 0);
+               if (pmd_table != pmd_offset(pud, 0))
+                       BUG();
+       }
+#endif
        pud = pud_offset(pgd, 0);
        pmd_table = pmd_offset(pud, 0);
-#endif
-
        return pmd_table;
 }
 
@@ -81,14 +84,12 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
  */
 static pte_t * __init one_page_table_init(pmd_t *pmd)
 {
-       if (pmd_none(*pmd)) {
+       if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
                pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+
                paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT);
                set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
-               if (page_table != pte_offset_kernel(pmd, 0))
-                       BUG();  
-
-               return page_table;
+               BUG_ON(page_table != pte_offset_kernel(pmd, 0));
        }
        
        return pte_offset_kernel(pmd, 0);
@@ -108,7 +109,6 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
 static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
 {
        pgd_t *pgd;
-       pud_t *pud;
        pmd_t *pmd;
        int pgd_idx, pmd_idx;
        unsigned long vaddr;
@@ -119,13 +119,10 @@ static void __init page_table_range_init (unsigned long start, unsigned long end
        pgd = pgd_base + pgd_idx;
 
        for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
-               if (pgd_none(*pgd)) 
-                       one_md_table_init(pgd);
-               pud = pud_offset(pgd, vaddr);
-               pmd = pmd_offset(pud, vaddr);
+               pmd = one_md_table_init(pgd);
+               pmd = pmd + pmd_index(vaddr);
                for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
-                       if (pmd_none(*pmd)) 
-                               one_page_table_init(pmd);
+                       one_page_table_init(pmd);
 
                        vaddr += PMD_SIZE;
                }
@@ -167,20 +164,22 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
                        /* Map with big pages if possible, otherwise create normal page tables. */
                        if (cpu_has_pse) {
                                unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
-
                                if (is_kernel_text(address) || is_kernel_text(address2))
                                        set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
                                else
                                        set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
+
                                pfn += PTRS_PER_PTE;
                        } else {
                                pte = one_page_table_init(pmd);
 
-                               for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
-                                               if (is_kernel_text(address))
-                                                       set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
-                                               else
-                                                       set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
+                               for (pte_ofs = 0;
+                                    pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn;
+                                    pte++, pfn++, pte_ofs++, address += PAGE_SIZE) {
+                                       if (is_kernel_text(address))
+                                               set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+                                       else
+                                               set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
                                }
                        }
                }
@@ -337,24 +336,78 @@ extern void __init remap_numa_kva(void);
 #define remap_numa_kva() do {} while (0)
 #endif
 
-static void __init pagetable_init (void)
+void __init native_pagetable_setup_start(pgd_t *base)
 {
-       unsigned long vaddr;
-       pgd_t *pgd_base = swapper_pg_dir;
-
 #ifdef CONFIG_X86_PAE
        int i;
-       /* Init entries of the first-level page table to the zero page */
-       for (i = 0; i < PTRS_PER_PGD; i++)
-               set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
+
+       /*
+        * Init entries of the first-level page table to the
+        * zero page, if they haven't already been set up.
+        *
+        * In a normal native boot, we'll be running on a
+        * pagetable rooted in swapper_pg_dir, but not in PAE
+        * mode, so this will end up clobbering the mappings
+        * for the lower 24Mbytes of the address space,
+        * without affecting the kernel address space.
+        */
+       for (i = 0; i < USER_PTRS_PER_PGD; i++)
+               set_pgd(&base[i],
+                       __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
+
+       /* Make sure kernel address space is empty so that a pagetable
+          will be allocated for it. */
+       memset(&base[USER_PTRS_PER_PGD], 0,
+              KERNEL_PGD_PTRS * sizeof(pgd_t));
 #else
        paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT);
 #endif
+}
+
+void __init native_pagetable_setup_done(pgd_t *base)
+{
+#ifdef CONFIG_X86_PAE
+       /*
+        * Add low memory identity-mappings - SMP needs it when
+        * starting up on an AP from real-mode. In the non-PAE
+        * case we already have these mappings through head.S.
+        * All user-space mappings are explicitly cleared after
+        * SMP startup.
+        */
+       set_pgd(&base[0], base[USER_PTRS_PER_PGD]);
+#endif
+}
+
+/*
+ * Build a proper pagetable for the kernel mappings.  Up until this
+ * point, we've been running on some set of pagetables constructed by
+ * the boot process.
+ *
+ * If we're booting on native hardware, this will be a pagetable
+ * constructed in arch/i386/kernel/head.S, and not running in PAE mode
+ * (even if we'll end up running in PAE).  The root of the pagetable
+ * will be swapper_pg_dir.
+ *
+ * If we're booting paravirtualized under a hypervisor, then there are
+ * more options: we may already be running PAE, and the pagetable may
+ * or may not be based in swapper_pg_dir.  In any case,
+ * paravirt_pagetable_setup_start() will set up swapper_pg_dir
+ * appropriately for the rest of the initialization to work.
+ *
+ * In general, pagetable_init() assumes that the pagetable may already
+ * be partially populated, and so it avoids stomping on any existing
+ * mappings.
+ */
+static void __init pagetable_init (void)
+{
+       unsigned long vaddr, end;
+       pgd_t *pgd_base = swapper_pg_dir;
+
+       paravirt_pagetable_setup_start(pgd_base);
 
        /* Enable PSE if available */
-       if (cpu_has_pse) {
+       if (cpu_has_pse)
                set_in_cr4(X86_CR4_PSE);
-       }
 
        /* Enable PGE if available */
        if (cpu_has_pge) {
@@ -371,20 +424,12 @@ static void __init pagetable_init (void)
         * created - mappings will be set by set_fixmap():
         */
        vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
-       page_table_range_init(vaddr, 0, pgd_base);
+       end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
+       page_table_range_init(vaddr, end, pgd_base);
 
        permanent_kmaps_init(pgd_base);
 
-#ifdef CONFIG_X86_PAE
-       /*
-        * Add low memory identity-mappings - SMP needs it when
-        * starting up on an AP from real-mode. In the non-PAE
-        * case we already have these mappings through head.S.
-        * All user-space mappings are explicitly cleared after
-        * SMP startup.
-        */
-       set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]);
-#endif
+       paravirt_pagetable_setup_done(pgd_base);
 }
 
 #if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP)
@@ -700,6 +745,8 @@ struct kmem_cache *pmd_cache;
 
 void __init pgtable_cache_init(void)
 {
+       size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t);
+
        if (PTRS_PER_PMD > 1) {
                pmd_cache = kmem_cache_create("pmd",
                                        PTRS_PER_PMD*sizeof(pmd_t),
@@ -709,13 +756,23 @@ void __init pgtable_cache_init(void)
                                        NULL);
                if (!pmd_cache)
                        panic("pgtable_cache_init(): cannot create pmd cache");
+
+               if (!SHARED_KERNEL_PMD) {
+                       /* If we're in PAE mode and have a non-shared
+                          kernel pmd, then the pgd size must be a
+                          page size.  This is because the pgd_list
+                          links through the page structure, so there
+                          can only be one pgd per page for this to
+                          work. */
+                       pgd_size = PAGE_SIZE;
+               }
        }
        pgd_cache = kmem_cache_create("pgd",
-                               PTRS_PER_PGD*sizeof(pgd_t),
-                               PTRS_PER_PGD*sizeof(pgd_t),
+                               pgd_size,
+                               pgd_size,
                                0,
                                pgd_ctor,
-                               PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
+                               (!SHARED_KERNEL_PMD) ? pgd_dtor : NULL);
        if (!pgd_cache)
                panic("pgtable_cache_init(): Cannot create pgd cache");
 }
@@ -751,13 +808,25 @@ static int noinline do_test_wp_bit(void)
 
 void mark_rodata_ro(void)
 {
-       unsigned long addr = (unsigned long)__start_rodata;
+       unsigned long start = PFN_ALIGN(_text);
+       unsigned long size = PFN_ALIGN(_etext) - start;
 
-       for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE)
-               change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO);
+#ifdef CONFIG_HOTPLUG_CPU
+       /* It must still be possible to apply SMP alternatives. */
+       if (num_possible_cpus() <= 1)
+#endif
+       {
+               change_page_attr(virt_to_page(start),
+                                size >> PAGE_SHIFT, PAGE_KERNEL_RX);
+               printk("Write protecting the kernel text: %luk\n", size >> 10);
+       }
 
-       printk("Write protecting the kernel read-only data: %uk\n",
-                       (__end_rodata - __start_rodata) >> 10);
+       start += size;
+       size = (unsigned long)__end_rodata - start;
+       change_page_attr(virt_to_page(start),
+                        size >> PAGE_SHIFT, PAGE_KERNEL_RO);
+       printk("Write protecting the kernel read-only data: %luk\n",
+              size >> 10);
 
        /*
         * change_page_attr() requires a global_flush_tlb() call after it.
@@ -780,7 +849,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
                free_page(addr);
                totalram_pages++;
        }
-       printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
+       printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
 }
 
 void free_initmem(void)