Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux
[linux-drm-fsl-dcu.git] / mm / memcontrol.c
index bcb38718e174f1af77f362f584e049762ba552f7..d06cae2de783acf462162e27f8770ff7bf60f4ad 100644 (file)
@@ -366,13 +366,6 @@ mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone)
  *
  * If memcg is bound to a traditional hierarchy, the css of root_mem_cgroup
  * is returned.
- *
- * XXX: The above description of behavior on the default hierarchy isn't
- * strictly true yet as replace_page_cache_page() can modify the
- * association before @page is released even on the default hierarchy;
- * however, the current and planned usages don't mix the the two functions
- * and replace_page_cache_page() will soon be updated to make the invariant
- * actually true.
  */
 struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page)
 {
@@ -2774,6 +2767,18 @@ static unsigned long tree_stat(struct mem_cgroup *memcg,
        return val;
 }
 
+static unsigned long tree_events(struct mem_cgroup *memcg,
+                                enum mem_cgroup_events_index idx)
+{
+       struct mem_cgroup *iter;
+       unsigned long val = 0;
+
+       for_each_mem_cgroup_tree(iter, memcg)
+               val += mem_cgroup_read_events(iter, idx);
+
+       return val;
+}
+
 static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
 {
        unsigned long val;
@@ -4633,7 +4638,8 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
        pte_t *pte;
        spinlock_t *ptl;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
                        mc.precharge += HPAGE_PMD_NR;
                spin_unlock(ptl);
@@ -4821,7 +4827,8 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
        union mc_target target;
        struct page *page;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                if (mc.precharge < HPAGE_PMD_NR) {
                        spin_unlock(ptl);
                        return 0;
@@ -5103,6 +5110,59 @@ static int memory_events_show(struct seq_file *m, void *v)
        return 0;
 }
 
+static int memory_stat_show(struct seq_file *m, void *v)
+{
+       struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+       int i;
+
+       /*
+        * Provide statistics on the state of the memory subsystem as
+        * well as cumulative event counters that show past behavior.
+        *
+        * This list is ordered following a combination of these gradients:
+        * 1) generic big picture -> specifics and details
+        * 2) reflecting userspace activity -> reflecting kernel heuristics
+        *
+        * Current memory state:
+        */
+
+       seq_printf(m, "anon %llu\n",
+                  (u64)tree_stat(memcg, MEM_CGROUP_STAT_RSS) * PAGE_SIZE);
+       seq_printf(m, "file %llu\n",
+                  (u64)tree_stat(memcg, MEM_CGROUP_STAT_CACHE) * PAGE_SIZE);
+       seq_printf(m, "sock %llu\n",
+                  (u64)tree_stat(memcg, MEMCG_SOCK) * PAGE_SIZE);
+
+       seq_printf(m, "file_mapped %llu\n",
+                  (u64)tree_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED) *
+                  PAGE_SIZE);
+       seq_printf(m, "file_dirty %llu\n",
+                  (u64)tree_stat(memcg, MEM_CGROUP_STAT_DIRTY) *
+                  PAGE_SIZE);
+       seq_printf(m, "file_writeback %llu\n",
+                  (u64)tree_stat(memcg, MEM_CGROUP_STAT_WRITEBACK) *
+                  PAGE_SIZE);
+
+       for (i = 0; i < NR_LRU_LISTS; i++) {
+               struct mem_cgroup *mi;
+               unsigned long val = 0;
+
+               for_each_mem_cgroup_tree(mi, memcg)
+                       val += mem_cgroup_nr_lru_pages(mi, BIT(i));
+               seq_printf(m, "%s %llu\n",
+                          mem_cgroup_lru_names[i], (u64)val * PAGE_SIZE);
+       }
+
+       /* Accumulated memory events */
+
+       seq_printf(m, "pgfault %lu\n",
+                  tree_events(memcg, MEM_CGROUP_EVENTS_PGFAULT));
+       seq_printf(m, "pgmajfault %lu\n",
+                  tree_events(memcg, MEM_CGROUP_EVENTS_PGMAJFAULT));
+
+       return 0;
+}
+
 static struct cftype memory_files[] = {
        {
                .name = "current",
@@ -5133,6 +5193,11 @@ static struct cftype memory_files[] = {
                .file_offset = offsetof(struct mem_cgroup, events_file),
                .seq_show = memory_events_show,
        },
+       {
+               .name = "stat",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .seq_show = memory_stat_show,
+       },
        { }     /* terminate */
 };
 
@@ -5464,7 +5529,8 @@ void mem_cgroup_uncharge_list(struct list_head *page_list)
 void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage)
 {
        struct mem_cgroup *memcg;
-       int isolated;
+       unsigned int nr_pages;
+       bool compound;
 
        VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
        VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
@@ -5484,11 +5550,21 @@ void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage)
        if (!memcg)
                return;
 
-       lock_page_lru(oldpage, &isolated);
-       oldpage->mem_cgroup = NULL;
-       unlock_page_lru(oldpage, isolated);
+       /* Force-charge the new page. The old one will be freed soon */
+       compound = PageTransHuge(newpage);
+       nr_pages = compound ? hpage_nr_pages(newpage) : 1;
+
+       page_counter_charge(&memcg->memory, nr_pages);
+       if (do_memsw_account())
+               page_counter_charge(&memcg->memsw, nr_pages);
+       css_get_many(&memcg->css, nr_pages);
 
        commit_charge(newpage, memcg, true);
+
+       local_irq_disable();
+       mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages);
+       memcg_check_events(memcg, newpage);
+       local_irq_enable();
 }
 
 DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);
@@ -5559,6 +5635,8 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
        if (in_softirq())
                gfp_mask = GFP_NOWAIT;
 
+       this_cpu_add(memcg->stat->count[MEMCG_SOCK], nr_pages);
+
        if (try_charge(memcg, gfp_mask, nr_pages) == 0)
                return true;
 
@@ -5578,6 +5656,8 @@ void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
                return;
        }
 
+       this_cpu_sub(memcg->stat->count[MEMCG_SOCK], nr_pages);
+
        page_counter_uncharge(&memcg->memory, nr_pages);
        css_put_many(&memcg->css, nr_pages);
 }
@@ -5761,6 +5841,28 @@ long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg)
        return nr_swap_pages;
 }
 
+bool mem_cgroup_swap_full(struct page *page)
+{
+       struct mem_cgroup *memcg;
+
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
+
+       if (vm_swap_full())
+               return true;
+       if (!do_swap_account || !cgroup_subsys_on_dfl(memory_cgrp_subsys))
+               return false;
+
+       memcg = page->mem_cgroup;
+       if (!memcg)
+               return false;
+
+       for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg))
+               if (page_counter_read(&memcg->swap) * 2 >= memcg->swap.limit)
+                       return true;
+
+       return false;
+}
+
 /* for remember boot option*/
 #ifdef CONFIG_MEMCG_SWAP_ENABLED
 static int really_do_swap_account __initdata = 1;