initramfs: fix initramfs size calculation
[linux-drm-fsl-dcu.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58
59 #include <net/arp.h>
60 #include <net/ip.h>
61 #include <net/route.h>
62 #include <net/ip_fib.h>
63 #include <net/rtnetlink.h>
64 #include <net/net_namespace.h>
65
66 static struct ipv4_devconf ipv4_devconf = {
67         .data = {
68                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
69                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
70                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
71                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
72         },
73 };
74
75 static struct ipv4_devconf ipv4_devconf_dflt = {
76         .data = {
77                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
78                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
79                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
80                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
81                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
82         },
83 };
84
85 #define IPV4_DEVCONF_DFLT(net, attr) \
86         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
87
88 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
89         [IFA_LOCAL]             = { .type = NLA_U32 },
90         [IFA_ADDRESS]           = { .type = NLA_U32 },
91         [IFA_BROADCAST]         = { .type = NLA_U32 },
92         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
93 };
94
95 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
96
97 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
98 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
99                          int destroy);
100 #ifdef CONFIG_SYSCTL
101 static void devinet_sysctl_register(struct in_device *idev);
102 static void devinet_sysctl_unregister(struct in_device *idev);
103 #else
104 static inline void devinet_sysctl_register(struct in_device *idev)
105 {
106 }
107 static inline void devinet_sysctl_unregister(struct in_device *idev)
108 {
109 }
110 #endif
111
112 /* Locks all the inet devices. */
113
114 static struct in_ifaddr *inet_alloc_ifa(void)
115 {
116         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
117 }
118
119 static void inet_rcu_free_ifa(struct rcu_head *head)
120 {
121         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
122         if (ifa->ifa_dev)
123                 in_dev_put(ifa->ifa_dev);
124         kfree(ifa);
125 }
126
127 static inline void inet_free_ifa(struct in_ifaddr *ifa)
128 {
129         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
130 }
131
132 void in_dev_finish_destroy(struct in_device *idev)
133 {
134         struct net_device *dev = idev->dev;
135
136         WARN_ON(idev->ifa_list);
137         WARN_ON(idev->mc_list);
138 #ifdef NET_REFCNT_DEBUG
139         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
140                idev, dev ? dev->name : "NIL");
141 #endif
142         dev_put(dev);
143         if (!idev->dead)
144                 pr_err("Freeing alive in_device %p\n", idev);
145         else
146                 kfree(idev);
147 }
148 EXPORT_SYMBOL(in_dev_finish_destroy);
149
150 static struct in_device *inetdev_init(struct net_device *dev)
151 {
152         struct in_device *in_dev;
153
154         ASSERT_RTNL();
155
156         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
157         if (!in_dev)
158                 goto out;
159         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
160                         sizeof(in_dev->cnf));
161         in_dev->cnf.sysctl = NULL;
162         in_dev->dev = dev;
163         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
164         if (!in_dev->arp_parms)
165                 goto out_kfree;
166         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
167                 dev_disable_lro(dev);
168         /* Reference in_dev->dev */
169         dev_hold(dev);
170         /* Account for reference dev->ip_ptr (below) */
171         in_dev_hold(in_dev);
172
173         devinet_sysctl_register(in_dev);
174         ip_mc_init_dev(in_dev);
175         if (dev->flags & IFF_UP)
176                 ip_mc_up(in_dev);
177
178         /* we can receive as soon as ip_ptr is set -- do this last */
179         rcu_assign_pointer(dev->ip_ptr, in_dev);
180 out:
181         return in_dev;
182 out_kfree:
183         kfree(in_dev);
184         in_dev = NULL;
185         goto out;
186 }
187
188 static void in_dev_rcu_put(struct rcu_head *head)
189 {
190         struct in_device *idev = container_of(head, struct in_device, rcu_head);
191         in_dev_put(idev);
192 }
193
194 static void inetdev_destroy(struct in_device *in_dev)
195 {
196         struct in_ifaddr *ifa;
197         struct net_device *dev;
198
199         ASSERT_RTNL();
200
201         dev = in_dev->dev;
202
203         in_dev->dead = 1;
204
205         ip_mc_destroy_dev(in_dev);
206
207         while ((ifa = in_dev->ifa_list) != NULL) {
208                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
209                 inet_free_ifa(ifa);
210         }
211
212         dev->ip_ptr = NULL;
213
214         devinet_sysctl_unregister(in_dev);
215         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
216         arp_ifdown(dev);
217
218         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
219 }
220
221 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
222 {
223         rcu_read_lock();
224         for_primary_ifa(in_dev) {
225                 if (inet_ifa_match(a, ifa)) {
226                         if (!b || inet_ifa_match(b, ifa)) {
227                                 rcu_read_unlock();
228                                 return 1;
229                         }
230                 }
231         } endfor_ifa(in_dev);
232         rcu_read_unlock();
233         return 0;
234 }
235
236 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
237                          int destroy, struct nlmsghdr *nlh, u32 pid)
238 {
239         struct in_ifaddr *promote = NULL;
240         struct in_ifaddr *ifa, *ifa1 = *ifap;
241         struct in_ifaddr *last_prim = in_dev->ifa_list;
242         struct in_ifaddr *prev_prom = NULL;
243         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
244
245         ASSERT_RTNL();
246
247         /* 1. Deleting primary ifaddr forces deletion all secondaries
248          * unless alias promotion is set
249          **/
250
251         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
252                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
253
254                 while ((ifa = *ifap1) != NULL) {
255                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
256                             ifa1->ifa_scope <= ifa->ifa_scope)
257                                 last_prim = ifa;
258
259                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
260                             ifa1->ifa_mask != ifa->ifa_mask ||
261                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
262                                 ifap1 = &ifa->ifa_next;
263                                 prev_prom = ifa;
264                                 continue;
265                         }
266
267                         if (!do_promote) {
268                                 *ifap1 = ifa->ifa_next;
269
270                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
271                                 blocking_notifier_call_chain(&inetaddr_chain,
272                                                 NETDEV_DOWN, ifa);
273                                 inet_free_ifa(ifa);
274                         } else {
275                                 promote = ifa;
276                                 break;
277                         }
278                 }
279         }
280
281         /* 2. Unlink it */
282
283         *ifap = ifa1->ifa_next;
284
285         /* 3. Announce address deletion */
286
287         /* Send message first, then call notifier.
288            At first sight, FIB update triggered by notifier
289            will refer to already deleted ifaddr, that could confuse
290            netlink listeners. It is not true: look, gated sees
291            that route deleted and if it still thinks that ifaddr
292            is valid, it will try to restore deleted routes... Grr.
293            So that, this order is correct.
294          */
295         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
296         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
297
298         if (promote) {
299
300                 if (prev_prom) {
301                         prev_prom->ifa_next = promote->ifa_next;
302                         promote->ifa_next = last_prim->ifa_next;
303                         last_prim->ifa_next = promote;
304                 }
305
306                 promote->ifa_flags &= ~IFA_F_SECONDARY;
307                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
308                 blocking_notifier_call_chain(&inetaddr_chain,
309                                 NETDEV_UP, promote);
310                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
311                         if (ifa1->ifa_mask != ifa->ifa_mask ||
312                             !inet_ifa_match(ifa1->ifa_address, ifa))
313                                         continue;
314                         fib_add_ifaddr(ifa);
315                 }
316
317         }
318         if (destroy)
319                 inet_free_ifa(ifa1);
320 }
321
322 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
323                          int destroy)
324 {
325         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
326 }
327
328 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
329                              u32 pid)
330 {
331         struct in_device *in_dev = ifa->ifa_dev;
332         struct in_ifaddr *ifa1, **ifap, **last_primary;
333
334         ASSERT_RTNL();
335
336         if (!ifa->ifa_local) {
337                 inet_free_ifa(ifa);
338                 return 0;
339         }
340
341         ifa->ifa_flags &= ~IFA_F_SECONDARY;
342         last_primary = &in_dev->ifa_list;
343
344         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
345              ifap = &ifa1->ifa_next) {
346                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
347                     ifa->ifa_scope <= ifa1->ifa_scope)
348                         last_primary = &ifa1->ifa_next;
349                 if (ifa1->ifa_mask == ifa->ifa_mask &&
350                     inet_ifa_match(ifa1->ifa_address, ifa)) {
351                         if (ifa1->ifa_local == ifa->ifa_local) {
352                                 inet_free_ifa(ifa);
353                                 return -EEXIST;
354                         }
355                         if (ifa1->ifa_scope != ifa->ifa_scope) {
356                                 inet_free_ifa(ifa);
357                                 return -EINVAL;
358                         }
359                         ifa->ifa_flags |= IFA_F_SECONDARY;
360                 }
361         }
362
363         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
364                 net_srandom(ifa->ifa_local);
365                 ifap = last_primary;
366         }
367
368         ifa->ifa_next = *ifap;
369         *ifap = ifa;
370
371         /* Send message first, then call notifier.
372            Notifier will trigger FIB update, so that
373            listeners of netlink will know about new ifaddr */
374         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
375         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
376
377         return 0;
378 }
379
380 static int inet_insert_ifa(struct in_ifaddr *ifa)
381 {
382         return __inet_insert_ifa(ifa, NULL, 0);
383 }
384
385 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
386 {
387         struct in_device *in_dev = __in_dev_get_rtnl(dev);
388
389         ASSERT_RTNL();
390
391         if (!in_dev) {
392                 inet_free_ifa(ifa);
393                 return -ENOBUFS;
394         }
395         ipv4_devconf_setall(in_dev);
396         if (ifa->ifa_dev != in_dev) {
397                 WARN_ON(ifa->ifa_dev);
398                 in_dev_hold(in_dev);
399                 ifa->ifa_dev = in_dev;
400         }
401         if (ipv4_is_loopback(ifa->ifa_local))
402                 ifa->ifa_scope = RT_SCOPE_HOST;
403         return inet_insert_ifa(ifa);
404 }
405
406 struct in_device *inetdev_by_index(struct net *net, int ifindex)
407 {
408         struct net_device *dev;
409         struct in_device *in_dev = NULL;
410
411         rcu_read_lock();
412         dev = dev_get_by_index_rcu(net, ifindex);
413         if (dev)
414                 in_dev = in_dev_get(dev);
415         rcu_read_unlock();
416         return in_dev;
417 }
418 EXPORT_SYMBOL(inetdev_by_index);
419
420 /* Called only from RTNL semaphored context. No locks. */
421
422 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
423                                     __be32 mask)
424 {
425         ASSERT_RTNL();
426
427         for_primary_ifa(in_dev) {
428                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
429                         return ifa;
430         } endfor_ifa(in_dev);
431         return NULL;
432 }
433
434 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
435 {
436         struct net *net = sock_net(skb->sk);
437         struct nlattr *tb[IFA_MAX+1];
438         struct in_device *in_dev;
439         struct ifaddrmsg *ifm;
440         struct in_ifaddr *ifa, **ifap;
441         int err = -EINVAL;
442
443         ASSERT_RTNL();
444
445         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
446         if (err < 0)
447                 goto errout;
448
449         ifm = nlmsg_data(nlh);
450         in_dev = inetdev_by_index(net, ifm->ifa_index);
451         if (in_dev == NULL) {
452                 err = -ENODEV;
453                 goto errout;
454         }
455
456         __in_dev_put(in_dev);
457
458         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
459              ifap = &ifa->ifa_next) {
460                 if (tb[IFA_LOCAL] &&
461                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
462                         continue;
463
464                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
465                         continue;
466
467                 if (tb[IFA_ADDRESS] &&
468                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
469                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
470                         continue;
471
472                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
473                 return 0;
474         }
475
476         err = -EADDRNOTAVAIL;
477 errout:
478         return err;
479 }
480
481 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
482 {
483         struct nlattr *tb[IFA_MAX+1];
484         struct in_ifaddr *ifa;
485         struct ifaddrmsg *ifm;
486         struct net_device *dev;
487         struct in_device *in_dev;
488         int err;
489
490         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
491         if (err < 0)
492                 goto errout;
493
494         ifm = nlmsg_data(nlh);
495         err = -EINVAL;
496         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
497                 goto errout;
498
499         dev = __dev_get_by_index(net, ifm->ifa_index);
500         err = -ENODEV;
501         if (dev == NULL)
502                 goto errout;
503
504         in_dev = __in_dev_get_rtnl(dev);
505         err = -ENOBUFS;
506         if (in_dev == NULL)
507                 goto errout;
508
509         ifa = inet_alloc_ifa();
510         if (ifa == NULL)
511                 /*
512                  * A potential indev allocation can be left alive, it stays
513                  * assigned to its device and is destroy with it.
514                  */
515                 goto errout;
516
517         ipv4_devconf_setall(in_dev);
518         in_dev_hold(in_dev);
519
520         if (tb[IFA_ADDRESS] == NULL)
521                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
522
523         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
524         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
525         ifa->ifa_flags = ifm->ifa_flags;
526         ifa->ifa_scope = ifm->ifa_scope;
527         ifa->ifa_dev = in_dev;
528
529         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
530         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
531
532         if (tb[IFA_BROADCAST])
533                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
534
535         if (tb[IFA_LABEL])
536                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
537         else
538                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
539
540         return ifa;
541
542 errout:
543         return ERR_PTR(err);
544 }
545
546 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
547 {
548         struct net *net = sock_net(skb->sk);
549         struct in_ifaddr *ifa;
550
551         ASSERT_RTNL();
552
553         ifa = rtm_to_ifaddr(net, nlh);
554         if (IS_ERR(ifa))
555                 return PTR_ERR(ifa);
556
557         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
558 }
559
560 /*
561  *      Determine a default network mask, based on the IP address.
562  */
563
564 static inline int inet_abc_len(__be32 addr)
565 {
566         int rc = -1;    /* Something else, probably a multicast. */
567
568         if (ipv4_is_zeronet(addr))
569                 rc = 0;
570         else {
571                 __u32 haddr = ntohl(addr);
572
573                 if (IN_CLASSA(haddr))
574                         rc = 8;
575                 else if (IN_CLASSB(haddr))
576                         rc = 16;
577                 else if (IN_CLASSC(haddr))
578                         rc = 24;
579         }
580
581         return rc;
582 }
583
584
585 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
586 {
587         struct ifreq ifr;
588         struct sockaddr_in sin_orig;
589         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
590         struct in_device *in_dev;
591         struct in_ifaddr **ifap = NULL;
592         struct in_ifaddr *ifa = NULL;
593         struct net_device *dev;
594         char *colon;
595         int ret = -EFAULT;
596         int tryaddrmatch = 0;
597
598         /*
599          *      Fetch the caller's info block into kernel space
600          */
601
602         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
603                 goto out;
604         ifr.ifr_name[IFNAMSIZ - 1] = 0;
605
606         /* save original address for comparison */
607         memcpy(&sin_orig, sin, sizeof(*sin));
608
609         colon = strchr(ifr.ifr_name, ':');
610         if (colon)
611                 *colon = 0;
612
613         dev_load(net, ifr.ifr_name);
614
615         switch (cmd) {
616         case SIOCGIFADDR:       /* Get interface address */
617         case SIOCGIFBRDADDR:    /* Get the broadcast address */
618         case SIOCGIFDSTADDR:    /* Get the destination address */
619         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
620                 /* Note that these ioctls will not sleep,
621                    so that we do not impose a lock.
622                    One day we will be forced to put shlock here (I mean SMP)
623                  */
624                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
625                 memset(sin, 0, sizeof(*sin));
626                 sin->sin_family = AF_INET;
627                 break;
628
629         case SIOCSIFFLAGS:
630                 ret = -EACCES;
631                 if (!capable(CAP_NET_ADMIN))
632                         goto out;
633                 break;
634         case SIOCSIFADDR:       /* Set interface address (and family) */
635         case SIOCSIFBRDADDR:    /* Set the broadcast address */
636         case SIOCSIFDSTADDR:    /* Set the destination address */
637         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
638                 ret = -EACCES;
639                 if (!capable(CAP_NET_ADMIN))
640                         goto out;
641                 ret = -EINVAL;
642                 if (sin->sin_family != AF_INET)
643                         goto out;
644                 break;
645         default:
646                 ret = -EINVAL;
647                 goto out;
648         }
649
650         rtnl_lock();
651
652         ret = -ENODEV;
653         dev = __dev_get_by_name(net, ifr.ifr_name);
654         if (!dev)
655                 goto done;
656
657         if (colon)
658                 *colon = ':';
659
660         in_dev = __in_dev_get_rtnl(dev);
661         if (in_dev) {
662                 if (tryaddrmatch) {
663                         /* Matthias Andree */
664                         /* compare label and address (4.4BSD style) */
665                         /* note: we only do this for a limited set of ioctls
666                            and only if the original address family was AF_INET.
667                            This is checked above. */
668                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
669                              ifap = &ifa->ifa_next) {
670                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
671                                     sin_orig.sin_addr.s_addr ==
672                                                         ifa->ifa_address) {
673                                         break; /* found */
674                                 }
675                         }
676                 }
677                 /* we didn't get a match, maybe the application is
678                    4.3BSD-style and passed in junk so we fall back to
679                    comparing just the label */
680                 if (!ifa) {
681                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
682                              ifap = &ifa->ifa_next)
683                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
684                                         break;
685                 }
686         }
687
688         ret = -EADDRNOTAVAIL;
689         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
690                 goto done;
691
692         switch (cmd) {
693         case SIOCGIFADDR:       /* Get interface address */
694                 sin->sin_addr.s_addr = ifa->ifa_local;
695                 goto rarok;
696
697         case SIOCGIFBRDADDR:    /* Get the broadcast address */
698                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
699                 goto rarok;
700
701         case SIOCGIFDSTADDR:    /* Get the destination address */
702                 sin->sin_addr.s_addr = ifa->ifa_address;
703                 goto rarok;
704
705         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
706                 sin->sin_addr.s_addr = ifa->ifa_mask;
707                 goto rarok;
708
709         case SIOCSIFFLAGS:
710                 if (colon) {
711                         ret = -EADDRNOTAVAIL;
712                         if (!ifa)
713                                 break;
714                         ret = 0;
715                         if (!(ifr.ifr_flags & IFF_UP))
716                                 inet_del_ifa(in_dev, ifap, 1);
717                         break;
718                 }
719                 ret = dev_change_flags(dev, ifr.ifr_flags);
720                 break;
721
722         case SIOCSIFADDR:       /* Set interface address (and family) */
723                 ret = -EINVAL;
724                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
725                         break;
726
727                 if (!ifa) {
728                         ret = -ENOBUFS;
729                         ifa = inet_alloc_ifa();
730                         if (!ifa)
731                                 break;
732                         if (colon)
733                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
734                         else
735                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
736                 } else {
737                         ret = 0;
738                         if (ifa->ifa_local == sin->sin_addr.s_addr)
739                                 break;
740                         inet_del_ifa(in_dev, ifap, 0);
741                         ifa->ifa_broadcast = 0;
742                         ifa->ifa_scope = 0;
743                 }
744
745                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
746
747                 if (!(dev->flags & IFF_POINTOPOINT)) {
748                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
749                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
750                         if ((dev->flags & IFF_BROADCAST) &&
751                             ifa->ifa_prefixlen < 31)
752                                 ifa->ifa_broadcast = ifa->ifa_address |
753                                                      ~ifa->ifa_mask;
754                 } else {
755                         ifa->ifa_prefixlen = 32;
756                         ifa->ifa_mask = inet_make_mask(32);
757                 }
758                 ret = inet_set_ifa(dev, ifa);
759                 break;
760
761         case SIOCSIFBRDADDR:    /* Set the broadcast address */
762                 ret = 0;
763                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
764                         inet_del_ifa(in_dev, ifap, 0);
765                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
766                         inet_insert_ifa(ifa);
767                 }
768                 break;
769
770         case SIOCSIFDSTADDR:    /* Set the destination address */
771                 ret = 0;
772                 if (ifa->ifa_address == sin->sin_addr.s_addr)
773                         break;
774                 ret = -EINVAL;
775                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
776                         break;
777                 ret = 0;
778                 inet_del_ifa(in_dev, ifap, 0);
779                 ifa->ifa_address = sin->sin_addr.s_addr;
780                 inet_insert_ifa(ifa);
781                 break;
782
783         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
784
785                 /*
786                  *      The mask we set must be legal.
787                  */
788                 ret = -EINVAL;
789                 if (bad_mask(sin->sin_addr.s_addr, 0))
790                         break;
791                 ret = 0;
792                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
793                         __be32 old_mask = ifa->ifa_mask;
794                         inet_del_ifa(in_dev, ifap, 0);
795                         ifa->ifa_mask = sin->sin_addr.s_addr;
796                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
797
798                         /* See if current broadcast address matches
799                          * with current netmask, then recalculate
800                          * the broadcast address. Otherwise it's a
801                          * funny address, so don't touch it since
802                          * the user seems to know what (s)he's doing...
803                          */
804                         if ((dev->flags & IFF_BROADCAST) &&
805                             (ifa->ifa_prefixlen < 31) &&
806                             (ifa->ifa_broadcast ==
807                              (ifa->ifa_local|~old_mask))) {
808                                 ifa->ifa_broadcast = (ifa->ifa_local |
809                                                       ~sin->sin_addr.s_addr);
810                         }
811                         inet_insert_ifa(ifa);
812                 }
813                 break;
814         }
815 done:
816         rtnl_unlock();
817 out:
818         return ret;
819 rarok:
820         rtnl_unlock();
821         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
822         goto out;
823 }
824
825 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
826 {
827         struct in_device *in_dev = __in_dev_get_rtnl(dev);
828         struct in_ifaddr *ifa;
829         struct ifreq ifr;
830         int done = 0;
831
832         if (!in_dev)
833                 goto out;
834
835         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
836                 if (!buf) {
837                         done += sizeof(ifr);
838                         continue;
839                 }
840                 if (len < (int) sizeof(ifr))
841                         break;
842                 memset(&ifr, 0, sizeof(struct ifreq));
843                 if (ifa->ifa_label)
844                         strcpy(ifr.ifr_name, ifa->ifa_label);
845                 else
846                         strcpy(ifr.ifr_name, dev->name);
847
848                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
849                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
850                                                                 ifa->ifa_local;
851
852                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
853                         done = -EFAULT;
854                         break;
855                 }
856                 buf  += sizeof(struct ifreq);
857                 len  -= sizeof(struct ifreq);
858                 done += sizeof(struct ifreq);
859         }
860 out:
861         return done;
862 }
863
864 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
865 {
866         __be32 addr = 0;
867         struct in_device *in_dev;
868         struct net *net = dev_net(dev);
869
870         rcu_read_lock();
871         in_dev = __in_dev_get_rcu(dev);
872         if (!in_dev)
873                 goto no_in_dev;
874
875         for_primary_ifa(in_dev) {
876                 if (ifa->ifa_scope > scope)
877                         continue;
878                 if (!dst || inet_ifa_match(dst, ifa)) {
879                         addr = ifa->ifa_local;
880                         break;
881                 }
882                 if (!addr)
883                         addr = ifa->ifa_local;
884         } endfor_ifa(in_dev);
885
886         if (addr)
887                 goto out_unlock;
888 no_in_dev:
889
890         /* Not loopback addresses on loopback should be preferred
891            in this case. It is importnat that lo is the first interface
892            in dev_base list.
893          */
894         for_each_netdev_rcu(net, dev) {
895                 in_dev = __in_dev_get_rcu(dev);
896                 if (!in_dev)
897                         continue;
898
899                 for_primary_ifa(in_dev) {
900                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
901                             ifa->ifa_scope <= scope) {
902                                 addr = ifa->ifa_local;
903                                 goto out_unlock;
904                         }
905                 } endfor_ifa(in_dev);
906         }
907 out_unlock:
908         rcu_read_unlock();
909         return addr;
910 }
911 EXPORT_SYMBOL(inet_select_addr);
912
913 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
914                               __be32 local, int scope)
915 {
916         int same = 0;
917         __be32 addr = 0;
918
919         for_ifa(in_dev) {
920                 if (!addr &&
921                     (local == ifa->ifa_local || !local) &&
922                     ifa->ifa_scope <= scope) {
923                         addr = ifa->ifa_local;
924                         if (same)
925                                 break;
926                 }
927                 if (!same) {
928                         same = (!local || inet_ifa_match(local, ifa)) &&
929                                 (!dst || inet_ifa_match(dst, ifa));
930                         if (same && addr) {
931                                 if (local || !dst)
932                                         break;
933                                 /* Is the selected addr into dst subnet? */
934                                 if (inet_ifa_match(addr, ifa))
935                                         break;
936                                 /* No, then can we use new local src? */
937                                 if (ifa->ifa_scope <= scope) {
938                                         addr = ifa->ifa_local;
939                                         break;
940                                 }
941                                 /* search for large dst subnet for addr */
942                                 same = 0;
943                         }
944                 }
945         } endfor_ifa(in_dev);
946
947         return same ? addr : 0;
948 }
949
950 /*
951  * Confirm that local IP address exists using wildcards:
952  * - in_dev: only on this interface, 0=any interface
953  * - dst: only in the same subnet as dst, 0=any dst
954  * - local: address, 0=autoselect the local address
955  * - scope: maximum allowed scope value for the local address
956  */
957 __be32 inet_confirm_addr(struct in_device *in_dev,
958                          __be32 dst, __be32 local, int scope)
959 {
960         __be32 addr = 0;
961         struct net_device *dev;
962         struct net *net;
963
964         if (scope != RT_SCOPE_LINK)
965                 return confirm_addr_indev(in_dev, dst, local, scope);
966
967         net = dev_net(in_dev->dev);
968         rcu_read_lock();
969         for_each_netdev_rcu(net, dev) {
970                 in_dev = __in_dev_get_rcu(dev);
971                 if (in_dev) {
972                         addr = confirm_addr_indev(in_dev, dst, local, scope);
973                         if (addr)
974                                 break;
975                 }
976         }
977         rcu_read_unlock();
978
979         return addr;
980 }
981
982 /*
983  *      Device notifier
984  */
985
986 int register_inetaddr_notifier(struct notifier_block *nb)
987 {
988         return blocking_notifier_chain_register(&inetaddr_chain, nb);
989 }
990 EXPORT_SYMBOL(register_inetaddr_notifier);
991
992 int unregister_inetaddr_notifier(struct notifier_block *nb)
993 {
994         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
995 }
996 EXPORT_SYMBOL(unregister_inetaddr_notifier);
997
998 /* Rename ifa_labels for a device name change. Make some effort to preserve
999  * existing alias numbering and to create unique labels if possible.
1000 */
1001 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1002 {
1003         struct in_ifaddr *ifa;
1004         int named = 0;
1005
1006         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1007                 char old[IFNAMSIZ], *dot;
1008
1009                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1010                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1011                 if (named++ == 0)
1012                         goto skip;
1013                 dot = strchr(old, ':');
1014                 if (dot == NULL) {
1015                         sprintf(old, ":%d", named);
1016                         dot = old;
1017                 }
1018                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1019                         strcat(ifa->ifa_label, dot);
1020                 else
1021                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1022 skip:
1023                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1024         }
1025 }
1026
1027 static inline bool inetdev_valid_mtu(unsigned mtu)
1028 {
1029         return mtu >= 68;
1030 }
1031
1032 /* Called only under RTNL semaphore */
1033
1034 static int inetdev_event(struct notifier_block *this, unsigned long event,
1035                          void *ptr)
1036 {
1037         struct net_device *dev = ptr;
1038         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1039
1040         ASSERT_RTNL();
1041
1042         if (!in_dev) {
1043                 if (event == NETDEV_REGISTER) {
1044                         in_dev = inetdev_init(dev);
1045                         if (!in_dev)
1046                                 return notifier_from_errno(-ENOMEM);
1047                         if (dev->flags & IFF_LOOPBACK) {
1048                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1049                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1050                         }
1051                 } else if (event == NETDEV_CHANGEMTU) {
1052                         /* Re-enabling IP */
1053                         if (inetdev_valid_mtu(dev->mtu))
1054                                 in_dev = inetdev_init(dev);
1055                 }
1056                 goto out;
1057         }
1058
1059         switch (event) {
1060         case NETDEV_REGISTER:
1061                 printk(KERN_DEBUG "inetdev_event: bug\n");
1062                 dev->ip_ptr = NULL;
1063                 break;
1064         case NETDEV_UP:
1065                 if (!inetdev_valid_mtu(dev->mtu))
1066                         break;
1067                 if (dev->flags & IFF_LOOPBACK) {
1068                         struct in_ifaddr *ifa = inet_alloc_ifa();
1069
1070                         if (ifa) {
1071                                 ifa->ifa_local =
1072                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1073                                 ifa->ifa_prefixlen = 8;
1074                                 ifa->ifa_mask = inet_make_mask(8);
1075                                 in_dev_hold(in_dev);
1076                                 ifa->ifa_dev = in_dev;
1077                                 ifa->ifa_scope = RT_SCOPE_HOST;
1078                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1079                                 inet_insert_ifa(ifa);
1080                         }
1081                 }
1082                 ip_mc_up(in_dev);
1083                 /* fall through */
1084         case NETDEV_CHANGEADDR:
1085                 /* Send gratuitous ARP to notify of link change */
1086                 if (IN_DEV_ARP_NOTIFY(in_dev)) {
1087                         struct in_ifaddr *ifa = in_dev->ifa_list;
1088
1089                         if (ifa)
1090                                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1091                                          ifa->ifa_address, dev,
1092                                          ifa->ifa_address, NULL,
1093                                          dev->dev_addr, NULL);
1094                 }
1095                 break;
1096         case NETDEV_DOWN:
1097                 ip_mc_down(in_dev);
1098                 break;
1099         case NETDEV_PRE_TYPE_CHANGE:
1100                 ip_mc_unmap(in_dev);
1101                 break;
1102         case NETDEV_POST_TYPE_CHANGE:
1103                 ip_mc_remap(in_dev);
1104                 break;
1105         case NETDEV_CHANGEMTU:
1106                 if (inetdev_valid_mtu(dev->mtu))
1107                         break;
1108                 /* disable IP when MTU is not enough */
1109         case NETDEV_UNREGISTER:
1110                 inetdev_destroy(in_dev);
1111                 break;
1112         case NETDEV_CHANGENAME:
1113                 /* Do not notify about label change, this event is
1114                  * not interesting to applications using netlink.
1115                  */
1116                 inetdev_changename(dev, in_dev);
1117
1118                 devinet_sysctl_unregister(in_dev);
1119                 devinet_sysctl_register(in_dev);
1120                 break;
1121         }
1122 out:
1123         return NOTIFY_DONE;
1124 }
1125
1126 static struct notifier_block ip_netdev_notifier = {
1127         .notifier_call = inetdev_event,
1128 };
1129
1130 static inline size_t inet_nlmsg_size(void)
1131 {
1132         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1133                + nla_total_size(4) /* IFA_ADDRESS */
1134                + nla_total_size(4) /* IFA_LOCAL */
1135                + nla_total_size(4) /* IFA_BROADCAST */
1136                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1137 }
1138
1139 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1140                             u32 pid, u32 seq, int event, unsigned int flags)
1141 {
1142         struct ifaddrmsg *ifm;
1143         struct nlmsghdr  *nlh;
1144
1145         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1146         if (nlh == NULL)
1147                 return -EMSGSIZE;
1148
1149         ifm = nlmsg_data(nlh);
1150         ifm->ifa_family = AF_INET;
1151         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1152         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1153         ifm->ifa_scope = ifa->ifa_scope;
1154         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1155
1156         if (ifa->ifa_address)
1157                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1158
1159         if (ifa->ifa_local)
1160                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1161
1162         if (ifa->ifa_broadcast)
1163                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1164
1165         if (ifa->ifa_label[0])
1166                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1167
1168         return nlmsg_end(skb, nlh);
1169
1170 nla_put_failure:
1171         nlmsg_cancel(skb, nlh);
1172         return -EMSGSIZE;
1173 }
1174
1175 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1176 {
1177         struct net *net = sock_net(skb->sk);
1178         int h, s_h;
1179         int idx, s_idx;
1180         int ip_idx, s_ip_idx;
1181         struct net_device *dev;
1182         struct in_device *in_dev;
1183         struct in_ifaddr *ifa;
1184         struct hlist_head *head;
1185         struct hlist_node *node;
1186
1187         s_h = cb->args[0];
1188         s_idx = idx = cb->args[1];
1189         s_ip_idx = ip_idx = cb->args[2];
1190
1191         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1192                 idx = 0;
1193                 head = &net->dev_index_head[h];
1194                 rcu_read_lock();
1195                 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1196                         if (idx < s_idx)
1197                                 goto cont;
1198                         if (h > s_h || idx > s_idx)
1199                                 s_ip_idx = 0;
1200                         in_dev = __in_dev_get_rcu(dev);
1201                         if (!in_dev)
1202                                 goto cont;
1203
1204                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1205                              ifa = ifa->ifa_next, ip_idx++) {
1206                                 if (ip_idx < s_ip_idx)
1207                                         continue;
1208                                 if (inet_fill_ifaddr(skb, ifa,
1209                                              NETLINK_CB(cb->skb).pid,
1210                                              cb->nlh->nlmsg_seq,
1211                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1212                                         rcu_read_unlock();
1213                                         goto done;
1214                                 }
1215                         }
1216 cont:
1217                         idx++;
1218                 }
1219                 rcu_read_unlock();
1220         }
1221
1222 done:
1223         cb->args[0] = h;
1224         cb->args[1] = idx;
1225         cb->args[2] = ip_idx;
1226
1227         return skb->len;
1228 }
1229
1230 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1231                       u32 pid)
1232 {
1233         struct sk_buff *skb;
1234         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1235         int err = -ENOBUFS;
1236         struct net *net;
1237
1238         net = dev_net(ifa->ifa_dev->dev);
1239         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1240         if (skb == NULL)
1241                 goto errout;
1242
1243         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1244         if (err < 0) {
1245                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1246                 WARN_ON(err == -EMSGSIZE);
1247                 kfree_skb(skb);
1248                 goto errout;
1249         }
1250         rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1251         return;
1252 errout:
1253         if (err < 0)
1254                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1255 }
1256
1257 #ifdef CONFIG_SYSCTL
1258
1259 static void devinet_copy_dflt_conf(struct net *net, int i)
1260 {
1261         struct net_device *dev;
1262
1263         rcu_read_lock();
1264         for_each_netdev_rcu(net, dev) {
1265                 struct in_device *in_dev;
1266
1267                 in_dev = __in_dev_get_rcu(dev);
1268                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1269                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1270         }
1271         rcu_read_unlock();
1272 }
1273
1274 /* called with RTNL locked */
1275 static void inet_forward_change(struct net *net)
1276 {
1277         struct net_device *dev;
1278         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1279
1280         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1281         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1282
1283         for_each_netdev(net, dev) {
1284                 struct in_device *in_dev;
1285                 if (on)
1286                         dev_disable_lro(dev);
1287                 rcu_read_lock();
1288                 in_dev = __in_dev_get_rcu(dev);
1289                 if (in_dev)
1290                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1291                 rcu_read_unlock();
1292         }
1293 }
1294
1295 static int devinet_conf_proc(ctl_table *ctl, int write,
1296                              void __user *buffer,
1297                              size_t *lenp, loff_t *ppos)
1298 {
1299         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1300
1301         if (write) {
1302                 struct ipv4_devconf *cnf = ctl->extra1;
1303                 struct net *net = ctl->extra2;
1304                 int i = (int *)ctl->data - cnf->data;
1305
1306                 set_bit(i, cnf->state);
1307
1308                 if (cnf == net->ipv4.devconf_dflt)
1309                         devinet_copy_dflt_conf(net, i);
1310         }
1311
1312         return ret;
1313 }
1314
1315 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1316                                   void __user *buffer,
1317                                   size_t *lenp, loff_t *ppos)
1318 {
1319         int *valp = ctl->data;
1320         int val = *valp;
1321         loff_t pos = *ppos;
1322         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1323
1324         if (write && *valp != val) {
1325                 struct net *net = ctl->extra2;
1326
1327                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1328                         if (!rtnl_trylock()) {
1329                                 /* Restore the original values before restarting */
1330                                 *valp = val;
1331                                 *ppos = pos;
1332                                 return restart_syscall();
1333                         }
1334                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1335                                 inet_forward_change(net);
1336                         } else if (*valp) {
1337                                 struct ipv4_devconf *cnf = ctl->extra1;
1338                                 struct in_device *idev =
1339                                         container_of(cnf, struct in_device, cnf);
1340                                 dev_disable_lro(idev->dev);
1341                         }
1342                         rtnl_unlock();
1343                         rt_cache_flush(net, 0);
1344                 }
1345         }
1346
1347         return ret;
1348 }
1349
1350 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1351                          void __user *buffer,
1352                          size_t *lenp, loff_t *ppos)
1353 {
1354         int *valp = ctl->data;
1355         int val = *valp;
1356         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1357         struct net *net = ctl->extra2;
1358
1359         if (write && *valp != val)
1360                 rt_cache_flush(net, 0);
1361
1362         return ret;
1363 }
1364
1365 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1366         { \
1367                 .procname       = name, \
1368                 .data           = ipv4_devconf.data + \
1369                                   IPV4_DEVCONF_ ## attr - 1, \
1370                 .maxlen         = sizeof(int), \
1371                 .mode           = mval, \
1372                 .proc_handler   = proc, \
1373                 .extra1         = &ipv4_devconf, \
1374         }
1375
1376 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1377         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1378
1379 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1380         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1381
1382 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1383         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1384
1385 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1386         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1387
1388 static struct devinet_sysctl_table {
1389         struct ctl_table_header *sysctl_header;
1390         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1391         char *dev_name;
1392 } devinet_sysctl = {
1393         .devinet_vars = {
1394                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1395                                              devinet_sysctl_forward),
1396                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1397
1398                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1399                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1400                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1401                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1402                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1403                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1404                                         "accept_source_route"),
1405                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1406                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1407                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1408                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1409                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1410                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1411                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1412                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1413                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1414                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1415                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1416                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1417                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1418
1419                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1420                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1421                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1422                                               "force_igmp_version"),
1423                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1424                                               "promote_secondaries"),
1425         },
1426 };
1427
1428 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1429                                         struct ipv4_devconf *p)
1430 {
1431         int i;
1432         struct devinet_sysctl_table *t;
1433
1434 #define DEVINET_CTL_PATH_DEV    3
1435
1436         struct ctl_path devinet_ctl_path[] = {
1437                 { .procname = "net",  },
1438                 { .procname = "ipv4", },
1439                 { .procname = "conf", },
1440                 { /* to be set */ },
1441                 { },
1442         };
1443
1444         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1445         if (!t)
1446                 goto out;
1447
1448         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1449                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1450                 t->devinet_vars[i].extra1 = p;
1451                 t->devinet_vars[i].extra2 = net;
1452         }
1453
1454         /*
1455          * Make a copy of dev_name, because '.procname' is regarded as const
1456          * by sysctl and we wouldn't want anyone to change it under our feet
1457          * (see SIOCSIFNAME).
1458          */
1459         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1460         if (!t->dev_name)
1461                 goto free;
1462
1463         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1464
1465         t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1466                         t->devinet_vars);
1467         if (!t->sysctl_header)
1468                 goto free_procname;
1469
1470         p->sysctl = t;
1471         return 0;
1472
1473 free_procname:
1474         kfree(t->dev_name);
1475 free:
1476         kfree(t);
1477 out:
1478         return -ENOBUFS;
1479 }
1480
1481 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1482 {
1483         struct devinet_sysctl_table *t = cnf->sysctl;
1484
1485         if (t == NULL)
1486                 return;
1487
1488         cnf->sysctl = NULL;
1489         unregister_sysctl_table(t->sysctl_header);
1490         kfree(t->dev_name);
1491         kfree(t);
1492 }
1493
1494 static void devinet_sysctl_register(struct in_device *idev)
1495 {
1496         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1497         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1498                                         &idev->cnf);
1499 }
1500
1501 static void devinet_sysctl_unregister(struct in_device *idev)
1502 {
1503         __devinet_sysctl_unregister(&idev->cnf);
1504         neigh_sysctl_unregister(idev->arp_parms);
1505 }
1506
1507 static struct ctl_table ctl_forward_entry[] = {
1508         {
1509                 .procname       = "ip_forward",
1510                 .data           = &ipv4_devconf.data[
1511                                         IPV4_DEVCONF_FORWARDING - 1],
1512                 .maxlen         = sizeof(int),
1513                 .mode           = 0644,
1514                 .proc_handler   = devinet_sysctl_forward,
1515                 .extra1         = &ipv4_devconf,
1516                 .extra2         = &init_net,
1517         },
1518         { },
1519 };
1520
1521 static __net_initdata struct ctl_path net_ipv4_path[] = {
1522         { .procname = "net", },
1523         { .procname = "ipv4", },
1524         { },
1525 };
1526 #endif
1527
1528 static __net_init int devinet_init_net(struct net *net)
1529 {
1530         int err;
1531         struct ipv4_devconf *all, *dflt;
1532 #ifdef CONFIG_SYSCTL
1533         struct ctl_table *tbl = ctl_forward_entry;
1534         struct ctl_table_header *forw_hdr;
1535 #endif
1536
1537         err = -ENOMEM;
1538         all = &ipv4_devconf;
1539         dflt = &ipv4_devconf_dflt;
1540
1541         if (!net_eq(net, &init_net)) {
1542                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1543                 if (all == NULL)
1544                         goto err_alloc_all;
1545
1546                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1547                 if (dflt == NULL)
1548                         goto err_alloc_dflt;
1549
1550 #ifdef CONFIG_SYSCTL
1551                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1552                 if (tbl == NULL)
1553                         goto err_alloc_ctl;
1554
1555                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1556                 tbl[0].extra1 = all;
1557                 tbl[0].extra2 = net;
1558 #endif
1559         }
1560
1561 #ifdef CONFIG_SYSCTL
1562         err = __devinet_sysctl_register(net, "all", all);
1563         if (err < 0)
1564                 goto err_reg_all;
1565
1566         err = __devinet_sysctl_register(net, "default", dflt);
1567         if (err < 0)
1568                 goto err_reg_dflt;
1569
1570         err = -ENOMEM;
1571         forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1572         if (forw_hdr == NULL)
1573                 goto err_reg_ctl;
1574         net->ipv4.forw_hdr = forw_hdr;
1575 #endif
1576
1577         net->ipv4.devconf_all = all;
1578         net->ipv4.devconf_dflt = dflt;
1579         return 0;
1580
1581 #ifdef CONFIG_SYSCTL
1582 err_reg_ctl:
1583         __devinet_sysctl_unregister(dflt);
1584 err_reg_dflt:
1585         __devinet_sysctl_unregister(all);
1586 err_reg_all:
1587         if (tbl != ctl_forward_entry)
1588                 kfree(tbl);
1589 err_alloc_ctl:
1590 #endif
1591         if (dflt != &ipv4_devconf_dflt)
1592                 kfree(dflt);
1593 err_alloc_dflt:
1594         if (all != &ipv4_devconf)
1595                 kfree(all);
1596 err_alloc_all:
1597         return err;
1598 }
1599
1600 static __net_exit void devinet_exit_net(struct net *net)
1601 {
1602 #ifdef CONFIG_SYSCTL
1603         struct ctl_table *tbl;
1604
1605         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1606         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1607         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1608         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1609         kfree(tbl);
1610 #endif
1611         kfree(net->ipv4.devconf_dflt);
1612         kfree(net->ipv4.devconf_all);
1613 }
1614
1615 static __net_initdata struct pernet_operations devinet_ops = {
1616         .init = devinet_init_net,
1617         .exit = devinet_exit_net,
1618 };
1619
1620 void __init devinet_init(void)
1621 {
1622         register_pernet_subsys(&devinet_ops);
1623
1624         register_gifconf(PF_INET, inet_gifconf);
1625         register_netdevice_notifier(&ip_netdev_notifier);
1626
1627         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1628         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1629         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1630 }
1631