mm: add overcommit_kbytes sysctl variable
authorJerome Marchand <jmarchan@redhat.com>
Tue, 21 Jan 2014 23:49:14 +0000 (15:49 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 22 Jan 2014 00:19:44 +0000 (16:19 -0800)
Some applications that run on HPC clusters are designed around the
availability of RAM and the overcommit ratio is fine tuned to get the
maximum usage of memory without swapping.  With growing memory, the
1%-of-all-RAM grain provided by overcommit_ratio has become too coarse
for these workload (on a 2TB machine it represents no less than 20GB).

This patch adds the new overcommit_kbytes sysctl variable that allow a
much finer grain.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: fix nommu build]
Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Documentation/sysctl/vm.txt
Documentation/vm/overcommit-accounting
include/linux/mm.h
include/linux/mman.h
kernel/sysctl.c
mm/mmap.c
mm/nommu.c
mm/util.c

index 1fbd4eb7b64aff335bed40a3d9ebbde95a26bcc1..9f5481bdc5a43f942fb833f3131901249d2d5c50 100644 (file)
@@ -47,6 +47,7 @@ Currently, these files are in /proc/sys/vm:
 - numa_zonelist_order
 - oom_dump_tasks
 - oom_kill_allocating_task
+- overcommit_kbytes
 - overcommit_memory
 - overcommit_ratio
 - page-cluster
@@ -574,6 +575,17 @@ The default value is 0.
 
 ==============================================================
 
+overcommit_kbytes:
+
+When overcommit_memory is set to 2, the committed address space is not
+permitted to exceed swap plus this amount of physical RAM. See below.
+
+Note: overcommit_kbytes is the counterpart of overcommit_ratio. Only one
+of them may be specified at a time. Setting one disables the other (which
+then appears as 0 when read).
+
+==============================================================
+
 overcommit_memory:
 
 This value contains a flag that enables memory overcommitment.
index 8eaa2fc4b8fae253930a798f38394438198dbf5a..cbfaaa674118daaf46e6467cf865d4cd06aae23e 100644 (file)
@@ -14,8 +14,8 @@ The Linux kernel supports the following overcommit handling modes
 
 2      -       Don't overcommit. The total address space commit
                for the system is not permitted to exceed swap + a
-               configurable percentage (default is 50) of physical RAM.
-               Depending on the percentage you use, in most situations
+               configurable amount (default is 50%) of physical RAM.
+               Depending on the amount you use, in most situations
                this means a process will not be killed while accessing
                pages but will receive errors on memory allocation as
                appropriate.
@@ -26,7 +26,8 @@ The Linux kernel supports the following overcommit handling modes
 
 The overcommit policy is set via the sysctl `vm.overcommit_memory'.
 
-The overcommit percentage is set via `vm.overcommit_ratio'.
+The overcommit amount can be set via `vm.overcommit_ratio' (percentage)
+or `vm.overcommit_kbytes' (absolute value).
 
 The current overcommit limit and amount committed are viewable in
 /proc/meminfo as CommitLimit and Committed_AS respectively.
index 4c0c01afc19b8fdf535de0dfc2a5ff22d07db22f..a512dd836931c1f8fc336e6a61c0175c8e270dbc 100644 (file)
@@ -57,6 +57,15 @@ extern int sysctl_legacy_va_layout;
 extern unsigned long sysctl_user_reserve_kbytes;
 extern unsigned long sysctl_admin_reserve_kbytes;
 
+extern int sysctl_overcommit_memory;
+extern int sysctl_overcommit_ratio;
+extern unsigned long sysctl_overcommit_kbytes;
+
+extern int overcommit_ratio_handler(struct ctl_table *, int, void __user *,
+                                   size_t *, loff_t *);
+extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *,
+                                   size_t *, loff_t *);
+
 #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
 
 /* to align the pointer to the (next) page boundary */
index 7f7f8dae4b1deec32eb1079428f4b13b87dd25c7..16373c8f5f5788c12a2b404d3793f0dd67d979c5 100644 (file)
@@ -9,6 +9,7 @@
 
 extern int sysctl_overcommit_memory;
 extern int sysctl_overcommit_ratio;
+extern unsigned long sysctl_overcommit_kbytes;
 extern struct percpu_counter vm_committed_as;
 
 #ifdef CONFIG_SMP
index c8da99f905cf522a34dd7ff059bde584e4d8c90a..332cefcdb04b4a004b280aebdd4d7de61a036dea 100644 (file)
@@ -95,8 +95,6 @@
 #if defined(CONFIG_SYSCTL)
 
 /* External variables not in a header file. */
-extern int sysctl_overcommit_memory;
-extern int sysctl_overcommit_ratio;
 extern int max_threads;
 extern int suid_dumpable;
 #ifdef CONFIG_COREDUMP
@@ -1121,7 +1119,14 @@ static struct ctl_table vm_table[] = {
                .data           = &sysctl_overcommit_ratio,
                .maxlen         = sizeof(sysctl_overcommit_ratio),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = overcommit_ratio_handler,
+       },
+       {
+               .procname       = "overcommit_kbytes",
+               .data           = &sysctl_overcommit_kbytes,
+               .maxlen         = sizeof(sysctl_overcommit_kbytes),
+               .mode           = 0644,
+               .proc_handler   = overcommit_kbytes_handler,
        },
        {
                .procname       = "page-cluster", 
index 834b2d785f1e2f2fdce59a608f28a94b02b5d82d..39552de6e1db11c9c60f4a53df9b1739afec3320 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -86,6 +86,7 @@ EXPORT_SYMBOL(vm_get_page_prot);
 
 int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;  /* heuristic overcommit */
 int sysctl_overcommit_ratio __read_mostly = 50;        /* default is 50% */
+unsigned long sysctl_overcommit_kbytes __read_mostly;
 int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
 unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */
 unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */
index fec093adad9c1d65799206805f3d95fe19a1da88..8740213b1647019c9fdbaaf355da3f2e5e632d99 100644 (file)
@@ -60,6 +60,7 @@ unsigned long highest_memmap_pfn;
 struct percpu_counter vm_committed_as;
 int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
 int sysctl_overcommit_ratio = 50; /* default is 50% */
+unsigned long sysctl_overcommit_kbytes __read_mostly;
 int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
 int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS;
 unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */
index 808f375648e77b6c1057aeee2c6d19ea26af981e..a24aa22f2473690c1e2fa95514f778c6c0c616a7 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -404,13 +404,45 @@ struct address_space *page_mapping(struct page *page)
        return mapping;
 }
 
+int overcommit_ratio_handler(struct ctl_table *table, int write,
+                            void __user *buffer, size_t *lenp,
+                            loff_t *ppos)
+{
+       int ret;
+
+       ret = proc_dointvec(table, write, buffer, lenp, ppos);
+       if (ret == 0 && write)
+               sysctl_overcommit_kbytes = 0;
+       return ret;
+}
+
+int overcommit_kbytes_handler(struct ctl_table *table, int write,
+                            void __user *buffer, size_t *lenp,
+                            loff_t *ppos)
+{
+       int ret;
+
+       ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
+       if (ret == 0 && write)
+               sysctl_overcommit_ratio = 0;
+       return ret;
+}
+
 /*
  * Committed memory limit enforced when OVERCOMMIT_NEVER policy is used
  */
 unsigned long vm_commit_limit(void)
 {
-       return ((totalram_pages - hugetlb_total_pages())
-               * sysctl_overcommit_ratio / 100) + total_swap_pages;
+       unsigned long allowed;
+
+       if (sysctl_overcommit_kbytes)
+               allowed = sysctl_overcommit_kbytes >> (PAGE_SHIFT - 10);
+       else
+               allowed = ((totalram_pages - hugetlb_total_pages())
+                          * sysctl_overcommit_ratio / 100);
+       allowed += total_swap_pages;
+
+       return allowed;
 }