Merge remote-tracking branches 'regulator/fix/da9210' and 'regulator/fix/rk808' into...
[linux-drm-fsl-dcu.git] / mm / vmscan.c
index dcd90c891d8e53895d117f219001329e8cdeab46..5e8eadd71bac71bee1dd9a121a3d44f3a4373c56 100644 (file)
@@ -91,6 +91,9 @@ struct scan_control {
        /* Can pages be swapped as part of reclaim? */
        unsigned int may_swap:1;
 
+       /* Can cgroups be reclaimed below their normal consumption range? */
+       unsigned int may_thrash:1;
+
        unsigned int hibernation_mode:1;
 
        /* One of the zones is ready for compaction */
@@ -229,10 +232,10 @@ EXPORT_SYMBOL(unregister_shrinker);
 
 #define SHRINK_BATCH 128
 
-static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
-                                 struct shrinker *shrinker,
-                                 unsigned long nr_scanned,
-                                 unsigned long nr_eligible)
+static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
+                                   struct shrinker *shrinker,
+                                   unsigned long nr_scanned,
+                                   unsigned long nr_eligible)
 {
        unsigned long freed = 0;
        unsigned long long delta;
@@ -341,9 +344,10 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
 }
 
 /**
- * shrink_node_slabs - shrink slab caches of a given node
+ * shrink_slab - shrink slab caches
  * @gfp_mask: allocation context
  * @nid: node whose slab caches to target
+ * @memcg: memory cgroup whose slab caches to target
  * @nr_scanned: pressure numerator
  * @nr_eligible: pressure denominator
  *
@@ -352,6 +356,12 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
  * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set,
  * unaware shrinkers will receive a node id of 0 instead.
  *
+ * @memcg specifies the memory cgroup to target. If it is not NULL,
+ * only shrinkers with SHRINKER_MEMCG_AWARE set will be called to scan
+ * objects from the memory cgroup specified. Otherwise all shrinkers
+ * are called, and memcg aware shrinkers are supposed to scan the
+ * global list then.
+ *
  * @nr_scanned and @nr_eligible form a ratio that indicate how much of
  * the available objects should be scanned.  Page reclaim for example
  * passes the number of pages scanned and the number of pages on the
@@ -362,13 +372,17 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
  *
  * Returns the number of reclaimed slab objects.
  */
-unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid,
-                               unsigned long nr_scanned,
-                               unsigned long nr_eligible)
+static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
+                                struct mem_cgroup *memcg,
+                                unsigned long nr_scanned,
+                                unsigned long nr_eligible)
 {
        struct shrinker *shrinker;
        unsigned long freed = 0;
 
+       if (memcg && !memcg_kmem_is_active(memcg))
+               return 0;
+
        if (nr_scanned == 0)
                nr_scanned = SWAP_CLUSTER_MAX;
 
@@ -387,12 +401,16 @@ unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid,
                struct shrink_control sc = {
                        .gfp_mask = gfp_mask,
                        .nid = nid,
+                       .memcg = memcg,
                };
 
+               if (memcg && !(shrinker->flags & SHRINKER_MEMCG_AWARE))
+                       continue;
+
                if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
                        sc.nid = 0;
 
-               freed += shrink_slabs(&sc, shrinker, nr_scanned, nr_eligible);
+               freed += do_shrink_slab(&sc, shrinker, nr_scanned, nr_eligible);
        }
 
        up_read(&shrinker_rwsem);
@@ -401,6 +419,29 @@ out:
        return freed;
 }
 
+void drop_slab_node(int nid)
+{
+       unsigned long freed;
+
+       do {
+               struct mem_cgroup *memcg = NULL;
+
+               freed = 0;
+               do {
+                       freed += shrink_slab(GFP_KERNEL, nid, memcg,
+                                            1000, 1000);
+               } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
+       } while (freed > 10);
+}
+
+void drop_slab(void)
+{
+       int nid;
+
+       for_each_online_node(nid)
+               drop_slab_node(nid);
+}
+
 static inline int is_page_cache_freeable(struct page *page)
 {
        /*
@@ -497,7 +538,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
        }
        if (mapping->a_ops->writepage == NULL)
                return PAGE_ACTIVATE;
-       if (!may_write_to_queue(mapping->backing_dev_info, sc))
+       if (!may_write_to_queue(inode_to_bdi(mapping->host), sc))
                return PAGE_KEEP;
 
        if (clear_page_dirty_for_io(page)) {
@@ -876,7 +917,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                 */
                mapping = page_mapping(page);
                if (((dirty || writeback) && mapping &&
-                    bdi_write_congested(mapping->backing_dev_info)) ||
+                    bdi_write_congested(inode_to_bdi(mapping->host))) ||
                    (writeback && PageReclaim(page)))
                        nr_congested++;
 
@@ -1903,8 +1944,12 @@ static void get_scan_count(struct lruvec *lruvec, int swappiness,
         * latencies, so it's better to scan a minimum amount there as
         * well.
         */
-       if (current_is_kswapd() && !zone_reclaimable(zone))
-               force_scan = true;
+       if (current_is_kswapd()) {
+               if (!zone_reclaimable(zone))
+                       force_scan = true;
+               if (!mem_cgroup_lruvec_online(lruvec))
+                       force_scan = true;
+       }
        if (!global_reclaim(sc))
                force_scan = true;
 
@@ -2269,6 +2314,7 @@ static inline bool should_continue_reclaim(struct zone *zone,
 static bool shrink_zone(struct zone *zone, struct scan_control *sc,
                        bool is_classzone)
 {
+       struct reclaim_state *reclaim_state = current->reclaim_state;
        unsigned long nr_reclaimed, nr_scanned;
        bool reclaimable = false;
 
@@ -2287,15 +2333,28 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
                memcg = mem_cgroup_iter(root, NULL, &reclaim);
                do {
                        unsigned long lru_pages;
+                       unsigned long scanned;
                        struct lruvec *lruvec;
                        int swappiness;
 
+                       if (mem_cgroup_low(root, memcg)) {
+                               if (!sc->may_thrash)
+                                       continue;
+                               mem_cgroup_events(memcg, MEMCG_LOW, 1);
+                       }
+
                        lruvec = mem_cgroup_zone_lruvec(zone, memcg);
                        swappiness = mem_cgroup_swappiness(memcg);
+                       scanned = sc->nr_scanned;
 
                        shrink_lruvec(lruvec, swappiness, sc, &lru_pages);
                        zone_lru_pages += lru_pages;
 
+                       if (memcg && is_classzone)
+                               shrink_slab(sc->gfp_mask, zone_to_nid(zone),
+                                           memcg, sc->nr_scanned - scanned,
+                                           lru_pages);
+
                        /*
                         * Direct reclaim and kswapd have to scan all memory
                         * cgroups to fulfill the overall scan target for the
@@ -2311,26 +2370,20 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
                                mem_cgroup_iter_break(root, memcg);
                                break;
                        }
-                       memcg = mem_cgroup_iter(root, memcg, &reclaim);
-               } while (memcg);
+               } while ((memcg = mem_cgroup_iter(root, memcg, &reclaim)));
 
                /*
                 * Shrink the slab caches in the same proportion that
                 * the eligible LRU pages were scanned.
                 */
-               if (global_reclaim(sc) && is_classzone) {
-                       struct reclaim_state *reclaim_state;
-
-                       shrink_node_slabs(sc->gfp_mask, zone_to_nid(zone),
-                                         sc->nr_scanned - nr_scanned,
-                                         zone_lru_pages);
-
-                       reclaim_state = current->reclaim_state;
-                       if (reclaim_state) {
-                               sc->nr_reclaimed +=
-                                       reclaim_state->reclaimed_slab;
-                               reclaim_state->reclaimed_slab = 0;
-                       }
+               if (global_reclaim(sc) && is_classzone)
+                       shrink_slab(sc->gfp_mask, zone_to_nid(zone), NULL,
+                                   sc->nr_scanned - nr_scanned,
+                                   zone_lru_pages);
+
+               if (reclaim_state) {
+                       sc->nr_reclaimed += reclaim_state->reclaimed_slab;
+                       reclaim_state->reclaimed_slab = 0;
                }
 
                vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
@@ -2515,10 +2568,11 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                                          struct scan_control *sc)
 {
+       int initial_priority = sc->priority;
        unsigned long total_scanned = 0;
        unsigned long writeback_threshold;
        bool zones_reclaimable;
-
+retry:
        delayacct_freepages_start();
 
        if (global_reclaim(sc))
@@ -2568,6 +2622,13 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
        if (sc->compaction_ready)
                return 1;
 
+       /* Untapped cgroup reserves?  Don't OOM, retry. */
+       if (!sc->may_thrash) {
+               sc->priority = initial_priority;
+               sc->may_thrash = 1;
+               goto retry;
+       }
+
        /* Any of the zones still reclaimable?  Don't OOM. */
        if (zones_reclaimable)
                return 1;
@@ -3175,7 +3236,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
                 */
                if (waitqueue_active(&pgdat->pfmemalloc_wait) &&
                                pfmemalloc_watermark_ok(pgdat))
-                       wake_up(&pgdat->pfmemalloc_wait);
+                       wake_up_all(&pgdat->pfmemalloc_wait);
 
                /*
                 * Fragmentation may mean that the system cannot be rebalanced