memcg: do not allow to disable tcp accounting after limit is set
[linux-drm-fsl-dcu.git] / net / ipv4 / tcp_memcontrol.c
1 #include <net/tcp.h>
2 #include <net/tcp_memcontrol.h>
3 #include <net/sock.h>
4 #include <net/ip.h>
5 #include <linux/nsproxy.h>
6 #include <linux/memcontrol.h>
7 #include <linux/module.h>
8
9 int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
10 {
11         /*
12          * The root cgroup does not use page_counters, but rather,
13          * rely on the data already collected by the network
14          * subsystem
15          */
16         struct mem_cgroup *parent = parent_mem_cgroup(memcg);
17         struct page_counter *counter_parent = NULL;
18         struct cg_proto *cg_proto, *parent_cg;
19
20         cg_proto = tcp_prot.proto_cgroup(memcg);
21         if (!cg_proto)
22                 return 0;
23
24         cg_proto->sysctl_mem[0] = sysctl_tcp_mem[0];
25         cg_proto->sysctl_mem[1] = sysctl_tcp_mem[1];
26         cg_proto->sysctl_mem[2] = sysctl_tcp_mem[2];
27         cg_proto->memory_pressure = 0;
28         cg_proto->memcg = memcg;
29
30         parent_cg = tcp_prot.proto_cgroup(parent);
31         if (parent_cg)
32                 counter_parent = &parent_cg->memory_allocated;
33
34         page_counter_init(&cg_proto->memory_allocated, counter_parent);
35         percpu_counter_init(&cg_proto->sockets_allocated, 0, GFP_KERNEL);
36
37         return 0;
38 }
39 EXPORT_SYMBOL(tcp_init_cgroup);
40
41 void tcp_destroy_cgroup(struct mem_cgroup *memcg)
42 {
43         struct cg_proto *cg_proto;
44
45         cg_proto = tcp_prot.proto_cgroup(memcg);
46         if (!cg_proto)
47                 return;
48
49         percpu_counter_destroy(&cg_proto->sockets_allocated);
50
51         if (cg_proto->active)
52                 static_key_slow_dec(&memcg_socket_limit_enabled);
53
54 }
55 EXPORT_SYMBOL(tcp_destroy_cgroup);
56
57 static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
58 {
59         struct cg_proto *cg_proto;
60         int i;
61         int ret;
62
63         cg_proto = tcp_prot.proto_cgroup(memcg);
64         if (!cg_proto)
65                 return -EINVAL;
66
67         ret = page_counter_limit(&cg_proto->memory_allocated, nr_pages);
68         if (ret)
69                 return ret;
70
71         for (i = 0; i < 3; i++)
72                 cg_proto->sysctl_mem[i] = min_t(long, nr_pages,
73                                                 sysctl_tcp_mem[i]);
74
75         if (!cg_proto->active) {
76                 /*
77                  * The active flag needs to be written after the static_key
78                  * update. This is what guarantees that the socket activation
79                  * function is the last one to run. See sock_update_memcg() for
80                  * details, and note that we don't mark any socket as belonging
81                  * to this memcg until that flag is up.
82                  *
83                  * We need to do this, because static_keys will span multiple
84                  * sites, but we can't control their order. If we mark a socket
85                  * as accounted, but the accounting functions are not patched in
86                  * yet, we'll lose accounting.
87                  *
88                  * We never race with the readers in sock_update_memcg(),
89                  * because when this value change, the code to process it is not
90                  * patched in yet.
91                  */
92                 static_key_slow_inc(&memcg_socket_limit_enabled);
93                 cg_proto->active = true;
94         }
95
96         return 0;
97 }
98
99 enum {
100         RES_USAGE,
101         RES_LIMIT,
102         RES_MAX_USAGE,
103         RES_FAILCNT,
104 };
105
106 static DEFINE_MUTEX(tcp_limit_mutex);
107
108 static ssize_t tcp_cgroup_write(struct kernfs_open_file *of,
109                                 char *buf, size_t nbytes, loff_t off)
110 {
111         struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
112         unsigned long nr_pages;
113         int ret = 0;
114
115         buf = strstrip(buf);
116
117         switch (of_cft(of)->private) {
118         case RES_LIMIT:
119                 /* see memcontrol.c */
120                 ret = page_counter_memparse(buf, "-1", &nr_pages);
121                 if (ret)
122                         break;
123                 mutex_lock(&tcp_limit_mutex);
124                 ret = tcp_update_limit(memcg, nr_pages);
125                 mutex_unlock(&tcp_limit_mutex);
126                 break;
127         default:
128                 ret = -EINVAL;
129                 break;
130         }
131         return ret ?: nbytes;
132 }
133
134 static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
135 {
136         struct mem_cgroup *memcg = mem_cgroup_from_css(css);
137         struct cg_proto *cg_proto = tcp_prot.proto_cgroup(memcg);
138         u64 val;
139
140         switch (cft->private) {
141         case RES_LIMIT:
142                 if (!cg_proto)
143                         return PAGE_COUNTER_MAX;
144                 val = cg_proto->memory_allocated.limit;
145                 val *= PAGE_SIZE;
146                 break;
147         case RES_USAGE:
148                 if (!cg_proto)
149                         val = atomic_long_read(&tcp_memory_allocated);
150                 else
151                         val = page_counter_read(&cg_proto->memory_allocated);
152                 val *= PAGE_SIZE;
153                 break;
154         case RES_FAILCNT:
155                 if (!cg_proto)
156                         return 0;
157                 val = cg_proto->memory_allocated.failcnt;
158                 break;
159         case RES_MAX_USAGE:
160                 if (!cg_proto)
161                         return 0;
162                 val = cg_proto->memory_allocated.watermark;
163                 val *= PAGE_SIZE;
164                 break;
165         default:
166                 BUG();
167         }
168         return val;
169 }
170
171 static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of,
172                                 char *buf, size_t nbytes, loff_t off)
173 {
174         struct mem_cgroup *memcg;
175         struct cg_proto *cg_proto;
176
177         memcg = mem_cgroup_from_css(of_css(of));
178         cg_proto = tcp_prot.proto_cgroup(memcg);
179         if (!cg_proto)
180                 return nbytes;
181
182         switch (of_cft(of)->private) {
183         case RES_MAX_USAGE:
184                 page_counter_reset_watermark(&cg_proto->memory_allocated);
185                 break;
186         case RES_FAILCNT:
187                 cg_proto->memory_allocated.failcnt = 0;
188                 break;
189         }
190
191         return nbytes;
192 }
193
194 static struct cftype tcp_files[] = {
195         {
196                 .name = "kmem.tcp.limit_in_bytes",
197                 .write = tcp_cgroup_write,
198                 .read_u64 = tcp_cgroup_read,
199                 .private = RES_LIMIT,
200         },
201         {
202                 .name = "kmem.tcp.usage_in_bytes",
203                 .read_u64 = tcp_cgroup_read,
204                 .private = RES_USAGE,
205         },
206         {
207                 .name = "kmem.tcp.failcnt",
208                 .private = RES_FAILCNT,
209                 .write = tcp_cgroup_reset,
210                 .read_u64 = tcp_cgroup_read,
211         },
212         {
213                 .name = "kmem.tcp.max_usage_in_bytes",
214                 .private = RES_MAX_USAGE,
215                 .write = tcp_cgroup_reset,
216                 .read_u64 = tcp_cgroup_read,
217         },
218         { }     /* terminate */
219 };
220
221 static int __init tcp_memcontrol_init(void)
222 {
223         WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, tcp_files));
224         return 0;
225 }
226 __initcall(tcp_memcontrol_init);