net: remove delay at device dismantle
[linux-drm-fsl-dcu.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58
59 #include <net/arp.h>
60 #include <net/ip.h>
61 #include <net/route.h>
62 #include <net/ip_fib.h>
63 #include <net/rtnetlink.h>
64 #include <net/net_namespace.h>
65
66 #include "fib_lookup.h"
67
68 static struct ipv4_devconf ipv4_devconf = {
69         .data = {
70                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
71                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
72                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
74         },
75 };
76
77 static struct ipv4_devconf ipv4_devconf_dflt = {
78         .data = {
79                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
80                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
81                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
82                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
83                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
84         },
85 };
86
87 #define IPV4_DEVCONF_DFLT(net, attr) \
88         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
89
90 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
91         [IFA_LOCAL]             = { .type = NLA_U32 },
92         [IFA_ADDRESS]           = { .type = NLA_U32 },
93         [IFA_BROADCAST]         = { .type = NLA_U32 },
94         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95 };
96
97 #define IN4_ADDR_HSIZE_SHIFT    8
98 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
99
100 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
101 static DEFINE_SPINLOCK(inet_addr_hash_lock);
102
103 static u32 inet_addr_hash(struct net *net, __be32 addr)
104 {
105         u32 val = (__force u32) addr ^ net_hash_mix(net);
106
107         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
108 }
109
110 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
111 {
112         u32 hash = inet_addr_hash(net, ifa->ifa_local);
113
114         spin_lock(&inet_addr_hash_lock);
115         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
116         spin_unlock(&inet_addr_hash_lock);
117 }
118
119 static void inet_hash_remove(struct in_ifaddr *ifa)
120 {
121         spin_lock(&inet_addr_hash_lock);
122         hlist_del_init_rcu(&ifa->hash);
123         spin_unlock(&inet_addr_hash_lock);
124 }
125
126 /**
127  * __ip_dev_find - find the first device with a given source address.
128  * @net: the net namespace
129  * @addr: the source address
130  * @devref: if true, take a reference on the found device
131  *
132  * If a caller uses devref=false, it should be protected by RCU, or RTNL
133  */
134 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
135 {
136         u32 hash = inet_addr_hash(net, addr);
137         struct net_device *result = NULL;
138         struct in_ifaddr *ifa;
139         struct hlist_node *node;
140
141         rcu_read_lock();
142         hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
143                 if (ifa->ifa_local == addr) {
144                         struct net_device *dev = ifa->ifa_dev->dev;
145
146                         if (!net_eq(dev_net(dev), net))
147                                 continue;
148                         result = dev;
149                         break;
150                 }
151         }
152         if (!result) {
153                 struct flowi4 fl4 = { .daddr = addr };
154                 struct fib_result res = { 0 };
155                 struct fib_table *local;
156
157                 /* Fallback to FIB local table so that communication
158                  * over loopback subnets work.
159                  */
160                 local = fib_get_table(net, RT_TABLE_LOCAL);
161                 if (local &&
162                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
163                     res.type == RTN_LOCAL)
164                         result = FIB_RES_DEV(res);
165         }
166         if (result && devref)
167                 dev_hold(result);
168         rcu_read_unlock();
169         return result;
170 }
171 EXPORT_SYMBOL(__ip_dev_find);
172
173 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
174
175 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
176 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
177                          int destroy);
178 #ifdef CONFIG_SYSCTL
179 static void devinet_sysctl_register(struct in_device *idev);
180 static void devinet_sysctl_unregister(struct in_device *idev);
181 #else
182 static void devinet_sysctl_register(struct in_device *idev)
183 {
184 }
185 static void devinet_sysctl_unregister(struct in_device *idev)
186 {
187 }
188 #endif
189
190 /* Locks all the inet devices. */
191
192 static struct in_ifaddr *inet_alloc_ifa(void)
193 {
194         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
195 }
196
197 static void inet_rcu_free_ifa(struct rcu_head *head)
198 {
199         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
200         if (ifa->ifa_dev)
201                 in_dev_put(ifa->ifa_dev);
202         kfree(ifa);
203 }
204
205 static void inet_free_ifa(struct in_ifaddr *ifa)
206 {
207         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
208 }
209
210 void in_dev_finish_destroy(struct in_device *idev)
211 {
212         struct net_device *dev = idev->dev;
213
214         WARN_ON(idev->ifa_list);
215         WARN_ON(idev->mc_list);
216 #ifdef NET_REFCNT_DEBUG
217         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
218 #endif
219         dev_put(dev);
220         if (!idev->dead)
221                 pr_err("Freeing alive in_device %p\n", idev);
222         else
223                 kfree(idev);
224 }
225 EXPORT_SYMBOL(in_dev_finish_destroy);
226
227 static struct in_device *inetdev_init(struct net_device *dev)
228 {
229         struct in_device *in_dev;
230
231         ASSERT_RTNL();
232
233         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
234         if (!in_dev)
235                 goto out;
236         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
237                         sizeof(in_dev->cnf));
238         in_dev->cnf.sysctl = NULL;
239         in_dev->dev = dev;
240         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
241         if (!in_dev->arp_parms)
242                 goto out_kfree;
243         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
244                 dev_disable_lro(dev);
245         /* Reference in_dev->dev */
246         dev_hold(dev);
247         /* Account for reference dev->ip_ptr (below) */
248         in_dev_hold(in_dev);
249
250         devinet_sysctl_register(in_dev);
251         ip_mc_init_dev(in_dev);
252         if (dev->flags & IFF_UP)
253                 ip_mc_up(in_dev);
254
255         /* we can receive as soon as ip_ptr is set -- do this last */
256         rcu_assign_pointer(dev->ip_ptr, in_dev);
257 out:
258         return in_dev;
259 out_kfree:
260         kfree(in_dev);
261         in_dev = NULL;
262         goto out;
263 }
264
265 static void in_dev_rcu_put(struct rcu_head *head)
266 {
267         struct in_device *idev = container_of(head, struct in_device, rcu_head);
268         in_dev_put(idev);
269 }
270
271 static void inetdev_destroy(struct in_device *in_dev)
272 {
273         struct in_ifaddr *ifa;
274         struct net_device *dev;
275
276         ASSERT_RTNL();
277
278         dev = in_dev->dev;
279
280         in_dev->dead = 1;
281
282         ip_mc_destroy_dev(in_dev);
283
284         while ((ifa = in_dev->ifa_list) != NULL) {
285                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
286                 inet_free_ifa(ifa);
287         }
288
289         RCU_INIT_POINTER(dev->ip_ptr, NULL);
290
291         devinet_sysctl_unregister(in_dev);
292         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
293         arp_ifdown(dev);
294
295         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
296 }
297
298 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
299 {
300         rcu_read_lock();
301         for_primary_ifa(in_dev) {
302                 if (inet_ifa_match(a, ifa)) {
303                         if (!b || inet_ifa_match(b, ifa)) {
304                                 rcu_read_unlock();
305                                 return 1;
306                         }
307                 }
308         } endfor_ifa(in_dev);
309         rcu_read_unlock();
310         return 0;
311 }
312
313 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
314                          int destroy, struct nlmsghdr *nlh, u32 pid)
315 {
316         struct in_ifaddr *promote = NULL;
317         struct in_ifaddr *ifa, *ifa1 = *ifap;
318         struct in_ifaddr *last_prim = in_dev->ifa_list;
319         struct in_ifaddr *prev_prom = NULL;
320         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
321
322         ASSERT_RTNL();
323
324         /* 1. Deleting primary ifaddr forces deletion all secondaries
325          * unless alias promotion is set
326          **/
327
328         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
329                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
330
331                 while ((ifa = *ifap1) != NULL) {
332                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
333                             ifa1->ifa_scope <= ifa->ifa_scope)
334                                 last_prim = ifa;
335
336                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
337                             ifa1->ifa_mask != ifa->ifa_mask ||
338                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
339                                 ifap1 = &ifa->ifa_next;
340                                 prev_prom = ifa;
341                                 continue;
342                         }
343
344                         if (!do_promote) {
345                                 inet_hash_remove(ifa);
346                                 *ifap1 = ifa->ifa_next;
347
348                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
349                                 blocking_notifier_call_chain(&inetaddr_chain,
350                                                 NETDEV_DOWN, ifa);
351                                 inet_free_ifa(ifa);
352                         } else {
353                                 promote = ifa;
354                                 break;
355                         }
356                 }
357         }
358
359         /* On promotion all secondaries from subnet are changing
360          * the primary IP, we must remove all their routes silently
361          * and later to add them back with new prefsrc. Do this
362          * while all addresses are on the device list.
363          */
364         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
365                 if (ifa1->ifa_mask == ifa->ifa_mask &&
366                     inet_ifa_match(ifa1->ifa_address, ifa))
367                         fib_del_ifaddr(ifa, ifa1);
368         }
369
370         /* 2. Unlink it */
371
372         *ifap = ifa1->ifa_next;
373         inet_hash_remove(ifa1);
374
375         /* 3. Announce address deletion */
376
377         /* Send message first, then call notifier.
378            At first sight, FIB update triggered by notifier
379            will refer to already deleted ifaddr, that could confuse
380            netlink listeners. It is not true: look, gated sees
381            that route deleted and if it still thinks that ifaddr
382            is valid, it will try to restore deleted routes... Grr.
383            So that, this order is correct.
384          */
385         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
386         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
387
388         if (promote) {
389                 struct in_ifaddr *next_sec = promote->ifa_next;
390
391                 if (prev_prom) {
392                         prev_prom->ifa_next = promote->ifa_next;
393                         promote->ifa_next = last_prim->ifa_next;
394                         last_prim->ifa_next = promote;
395                 }
396
397                 promote->ifa_flags &= ~IFA_F_SECONDARY;
398                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
399                 blocking_notifier_call_chain(&inetaddr_chain,
400                                 NETDEV_UP, promote);
401                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
402                         if (ifa1->ifa_mask != ifa->ifa_mask ||
403                             !inet_ifa_match(ifa1->ifa_address, ifa))
404                                         continue;
405                         fib_add_ifaddr(ifa);
406                 }
407
408         }
409         if (destroy)
410                 inet_free_ifa(ifa1);
411 }
412
413 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
414                          int destroy)
415 {
416         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
417 }
418
419 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
420                              u32 pid)
421 {
422         struct in_device *in_dev = ifa->ifa_dev;
423         struct in_ifaddr *ifa1, **ifap, **last_primary;
424
425         ASSERT_RTNL();
426
427         if (!ifa->ifa_local) {
428                 inet_free_ifa(ifa);
429                 return 0;
430         }
431
432         ifa->ifa_flags &= ~IFA_F_SECONDARY;
433         last_primary = &in_dev->ifa_list;
434
435         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
436              ifap = &ifa1->ifa_next) {
437                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
438                     ifa->ifa_scope <= ifa1->ifa_scope)
439                         last_primary = &ifa1->ifa_next;
440                 if (ifa1->ifa_mask == ifa->ifa_mask &&
441                     inet_ifa_match(ifa1->ifa_address, ifa)) {
442                         if (ifa1->ifa_local == ifa->ifa_local) {
443                                 inet_free_ifa(ifa);
444                                 return -EEXIST;
445                         }
446                         if (ifa1->ifa_scope != ifa->ifa_scope) {
447                                 inet_free_ifa(ifa);
448                                 return -EINVAL;
449                         }
450                         ifa->ifa_flags |= IFA_F_SECONDARY;
451                 }
452         }
453
454         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
455                 net_srandom(ifa->ifa_local);
456                 ifap = last_primary;
457         }
458
459         ifa->ifa_next = *ifap;
460         *ifap = ifa;
461
462         inet_hash_insert(dev_net(in_dev->dev), ifa);
463
464         /* Send message first, then call notifier.
465            Notifier will trigger FIB update, so that
466            listeners of netlink will know about new ifaddr */
467         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
468         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
469
470         return 0;
471 }
472
473 static int inet_insert_ifa(struct in_ifaddr *ifa)
474 {
475         return __inet_insert_ifa(ifa, NULL, 0);
476 }
477
478 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
479 {
480         struct in_device *in_dev = __in_dev_get_rtnl(dev);
481
482         ASSERT_RTNL();
483
484         if (!in_dev) {
485                 inet_free_ifa(ifa);
486                 return -ENOBUFS;
487         }
488         ipv4_devconf_setall(in_dev);
489         if (ifa->ifa_dev != in_dev) {
490                 WARN_ON(ifa->ifa_dev);
491                 in_dev_hold(in_dev);
492                 ifa->ifa_dev = in_dev;
493         }
494         if (ipv4_is_loopback(ifa->ifa_local))
495                 ifa->ifa_scope = RT_SCOPE_HOST;
496         return inet_insert_ifa(ifa);
497 }
498
499 /* Caller must hold RCU or RTNL :
500  * We dont take a reference on found in_device
501  */
502 struct in_device *inetdev_by_index(struct net *net, int ifindex)
503 {
504         struct net_device *dev;
505         struct in_device *in_dev = NULL;
506
507         rcu_read_lock();
508         dev = dev_get_by_index_rcu(net, ifindex);
509         if (dev)
510                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
511         rcu_read_unlock();
512         return in_dev;
513 }
514 EXPORT_SYMBOL(inetdev_by_index);
515
516 /* Called only from RTNL semaphored context. No locks. */
517
518 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
519                                     __be32 mask)
520 {
521         ASSERT_RTNL();
522
523         for_primary_ifa(in_dev) {
524                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
525                         return ifa;
526         } endfor_ifa(in_dev);
527         return NULL;
528 }
529
530 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
531 {
532         struct net *net = sock_net(skb->sk);
533         struct nlattr *tb[IFA_MAX+1];
534         struct in_device *in_dev;
535         struct ifaddrmsg *ifm;
536         struct in_ifaddr *ifa, **ifap;
537         int err = -EINVAL;
538
539         ASSERT_RTNL();
540
541         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
542         if (err < 0)
543                 goto errout;
544
545         ifm = nlmsg_data(nlh);
546         in_dev = inetdev_by_index(net, ifm->ifa_index);
547         if (in_dev == NULL) {
548                 err = -ENODEV;
549                 goto errout;
550         }
551
552         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
553              ifap = &ifa->ifa_next) {
554                 if (tb[IFA_LOCAL] &&
555                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
556                         continue;
557
558                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
559                         continue;
560
561                 if (tb[IFA_ADDRESS] &&
562                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
563                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
564                         continue;
565
566                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
567                 return 0;
568         }
569
570         err = -EADDRNOTAVAIL;
571 errout:
572         return err;
573 }
574
575 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
576 {
577         struct nlattr *tb[IFA_MAX+1];
578         struct in_ifaddr *ifa;
579         struct ifaddrmsg *ifm;
580         struct net_device *dev;
581         struct in_device *in_dev;
582         int err;
583
584         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
585         if (err < 0)
586                 goto errout;
587
588         ifm = nlmsg_data(nlh);
589         err = -EINVAL;
590         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
591                 goto errout;
592
593         dev = __dev_get_by_index(net, ifm->ifa_index);
594         err = -ENODEV;
595         if (dev == NULL)
596                 goto errout;
597
598         in_dev = __in_dev_get_rtnl(dev);
599         err = -ENOBUFS;
600         if (in_dev == NULL)
601                 goto errout;
602
603         ifa = inet_alloc_ifa();
604         if (ifa == NULL)
605                 /*
606                  * A potential indev allocation can be left alive, it stays
607                  * assigned to its device and is destroy with it.
608                  */
609                 goto errout;
610
611         ipv4_devconf_setall(in_dev);
612         in_dev_hold(in_dev);
613
614         if (tb[IFA_ADDRESS] == NULL)
615                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
616
617         INIT_HLIST_NODE(&ifa->hash);
618         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
619         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
620         ifa->ifa_flags = ifm->ifa_flags;
621         ifa->ifa_scope = ifm->ifa_scope;
622         ifa->ifa_dev = in_dev;
623
624         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
625         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
626
627         if (tb[IFA_BROADCAST])
628                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
629
630         if (tb[IFA_LABEL])
631                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
632         else
633                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
634
635         return ifa;
636
637 errout:
638         return ERR_PTR(err);
639 }
640
641 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
642 {
643         struct net *net = sock_net(skb->sk);
644         struct in_ifaddr *ifa;
645
646         ASSERT_RTNL();
647
648         ifa = rtm_to_ifaddr(net, nlh);
649         if (IS_ERR(ifa))
650                 return PTR_ERR(ifa);
651
652         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
653 }
654
655 /*
656  *      Determine a default network mask, based on the IP address.
657  */
658
659 static int inet_abc_len(__be32 addr)
660 {
661         int rc = -1;    /* Something else, probably a multicast. */
662
663         if (ipv4_is_zeronet(addr))
664                 rc = 0;
665         else {
666                 __u32 haddr = ntohl(addr);
667
668                 if (IN_CLASSA(haddr))
669                         rc = 8;
670                 else if (IN_CLASSB(haddr))
671                         rc = 16;
672                 else if (IN_CLASSC(haddr))
673                         rc = 24;
674         }
675
676         return rc;
677 }
678
679
680 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
681 {
682         struct ifreq ifr;
683         struct sockaddr_in sin_orig;
684         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
685         struct in_device *in_dev;
686         struct in_ifaddr **ifap = NULL;
687         struct in_ifaddr *ifa = NULL;
688         struct net_device *dev;
689         char *colon;
690         int ret = -EFAULT;
691         int tryaddrmatch = 0;
692
693         /*
694          *      Fetch the caller's info block into kernel space
695          */
696
697         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
698                 goto out;
699         ifr.ifr_name[IFNAMSIZ - 1] = 0;
700
701         /* save original address for comparison */
702         memcpy(&sin_orig, sin, sizeof(*sin));
703
704         colon = strchr(ifr.ifr_name, ':');
705         if (colon)
706                 *colon = 0;
707
708         dev_load(net, ifr.ifr_name);
709
710         switch (cmd) {
711         case SIOCGIFADDR:       /* Get interface address */
712         case SIOCGIFBRDADDR:    /* Get the broadcast address */
713         case SIOCGIFDSTADDR:    /* Get the destination address */
714         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
715                 /* Note that these ioctls will not sleep,
716                    so that we do not impose a lock.
717                    One day we will be forced to put shlock here (I mean SMP)
718                  */
719                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
720                 memset(sin, 0, sizeof(*sin));
721                 sin->sin_family = AF_INET;
722                 break;
723
724         case SIOCSIFFLAGS:
725                 ret = -EACCES;
726                 if (!capable(CAP_NET_ADMIN))
727                         goto out;
728                 break;
729         case SIOCSIFADDR:       /* Set interface address (and family) */
730         case SIOCSIFBRDADDR:    /* Set the broadcast address */
731         case SIOCSIFDSTADDR:    /* Set the destination address */
732         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
733                 ret = -EACCES;
734                 if (!capable(CAP_NET_ADMIN))
735                         goto out;
736                 ret = -EINVAL;
737                 if (sin->sin_family != AF_INET)
738                         goto out;
739                 break;
740         default:
741                 ret = -EINVAL;
742                 goto out;
743         }
744
745         rtnl_lock();
746
747         ret = -ENODEV;
748         dev = __dev_get_by_name(net, ifr.ifr_name);
749         if (!dev)
750                 goto done;
751
752         if (colon)
753                 *colon = ':';
754
755         in_dev = __in_dev_get_rtnl(dev);
756         if (in_dev) {
757                 if (tryaddrmatch) {
758                         /* Matthias Andree */
759                         /* compare label and address (4.4BSD style) */
760                         /* note: we only do this for a limited set of ioctls
761                            and only if the original address family was AF_INET.
762                            This is checked above. */
763                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
764                              ifap = &ifa->ifa_next) {
765                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
766                                     sin_orig.sin_addr.s_addr ==
767                                                         ifa->ifa_local) {
768                                         break; /* found */
769                                 }
770                         }
771                 }
772                 /* we didn't get a match, maybe the application is
773                    4.3BSD-style and passed in junk so we fall back to
774                    comparing just the label */
775                 if (!ifa) {
776                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
777                              ifap = &ifa->ifa_next)
778                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
779                                         break;
780                 }
781         }
782
783         ret = -EADDRNOTAVAIL;
784         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
785                 goto done;
786
787         switch (cmd) {
788         case SIOCGIFADDR:       /* Get interface address */
789                 sin->sin_addr.s_addr = ifa->ifa_local;
790                 goto rarok;
791
792         case SIOCGIFBRDADDR:    /* Get the broadcast address */
793                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
794                 goto rarok;
795
796         case SIOCGIFDSTADDR:    /* Get the destination address */
797                 sin->sin_addr.s_addr = ifa->ifa_address;
798                 goto rarok;
799
800         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
801                 sin->sin_addr.s_addr = ifa->ifa_mask;
802                 goto rarok;
803
804         case SIOCSIFFLAGS:
805                 if (colon) {
806                         ret = -EADDRNOTAVAIL;
807                         if (!ifa)
808                                 break;
809                         ret = 0;
810                         if (!(ifr.ifr_flags & IFF_UP))
811                                 inet_del_ifa(in_dev, ifap, 1);
812                         break;
813                 }
814                 ret = dev_change_flags(dev, ifr.ifr_flags);
815                 break;
816
817         case SIOCSIFADDR:       /* Set interface address (and family) */
818                 ret = -EINVAL;
819                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
820                         break;
821
822                 if (!ifa) {
823                         ret = -ENOBUFS;
824                         ifa = inet_alloc_ifa();
825                         INIT_HLIST_NODE(&ifa->hash);
826                         if (!ifa)
827                                 break;
828                         if (colon)
829                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
830                         else
831                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
832                 } else {
833                         ret = 0;
834                         if (ifa->ifa_local == sin->sin_addr.s_addr)
835                                 break;
836                         inet_del_ifa(in_dev, ifap, 0);
837                         ifa->ifa_broadcast = 0;
838                         ifa->ifa_scope = 0;
839                 }
840
841                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
842
843                 if (!(dev->flags & IFF_POINTOPOINT)) {
844                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
845                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
846                         if ((dev->flags & IFF_BROADCAST) &&
847                             ifa->ifa_prefixlen < 31)
848                                 ifa->ifa_broadcast = ifa->ifa_address |
849                                                      ~ifa->ifa_mask;
850                 } else {
851                         ifa->ifa_prefixlen = 32;
852                         ifa->ifa_mask = inet_make_mask(32);
853                 }
854                 ret = inet_set_ifa(dev, ifa);
855                 break;
856
857         case SIOCSIFBRDADDR:    /* Set the broadcast address */
858                 ret = 0;
859                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
860                         inet_del_ifa(in_dev, ifap, 0);
861                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
862                         inet_insert_ifa(ifa);
863                 }
864                 break;
865
866         case SIOCSIFDSTADDR:    /* Set the destination address */
867                 ret = 0;
868                 if (ifa->ifa_address == sin->sin_addr.s_addr)
869                         break;
870                 ret = -EINVAL;
871                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
872                         break;
873                 ret = 0;
874                 inet_del_ifa(in_dev, ifap, 0);
875                 ifa->ifa_address = sin->sin_addr.s_addr;
876                 inet_insert_ifa(ifa);
877                 break;
878
879         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
880
881                 /*
882                  *      The mask we set must be legal.
883                  */
884                 ret = -EINVAL;
885                 if (bad_mask(sin->sin_addr.s_addr, 0))
886                         break;
887                 ret = 0;
888                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
889                         __be32 old_mask = ifa->ifa_mask;
890                         inet_del_ifa(in_dev, ifap, 0);
891                         ifa->ifa_mask = sin->sin_addr.s_addr;
892                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
893
894                         /* See if current broadcast address matches
895                          * with current netmask, then recalculate
896                          * the broadcast address. Otherwise it's a
897                          * funny address, so don't touch it since
898                          * the user seems to know what (s)he's doing...
899                          */
900                         if ((dev->flags & IFF_BROADCAST) &&
901                             (ifa->ifa_prefixlen < 31) &&
902                             (ifa->ifa_broadcast ==
903                              (ifa->ifa_local|~old_mask))) {
904                                 ifa->ifa_broadcast = (ifa->ifa_local |
905                                                       ~sin->sin_addr.s_addr);
906                         }
907                         inet_insert_ifa(ifa);
908                 }
909                 break;
910         }
911 done:
912         rtnl_unlock();
913 out:
914         return ret;
915 rarok:
916         rtnl_unlock();
917         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
918         goto out;
919 }
920
921 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
922 {
923         struct in_device *in_dev = __in_dev_get_rtnl(dev);
924         struct in_ifaddr *ifa;
925         struct ifreq ifr;
926         int done = 0;
927
928         if (!in_dev)
929                 goto out;
930
931         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
932                 if (!buf) {
933                         done += sizeof(ifr);
934                         continue;
935                 }
936                 if (len < (int) sizeof(ifr))
937                         break;
938                 memset(&ifr, 0, sizeof(struct ifreq));
939                 if (ifa->ifa_label)
940                         strcpy(ifr.ifr_name, ifa->ifa_label);
941                 else
942                         strcpy(ifr.ifr_name, dev->name);
943
944                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
945                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
946                                                                 ifa->ifa_local;
947
948                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
949                         done = -EFAULT;
950                         break;
951                 }
952                 buf  += sizeof(struct ifreq);
953                 len  -= sizeof(struct ifreq);
954                 done += sizeof(struct ifreq);
955         }
956 out:
957         return done;
958 }
959
960 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
961 {
962         __be32 addr = 0;
963         struct in_device *in_dev;
964         struct net *net = dev_net(dev);
965
966         rcu_read_lock();
967         in_dev = __in_dev_get_rcu(dev);
968         if (!in_dev)
969                 goto no_in_dev;
970
971         for_primary_ifa(in_dev) {
972                 if (ifa->ifa_scope > scope)
973                         continue;
974                 if (!dst || inet_ifa_match(dst, ifa)) {
975                         addr = ifa->ifa_local;
976                         break;
977                 }
978                 if (!addr)
979                         addr = ifa->ifa_local;
980         } endfor_ifa(in_dev);
981
982         if (addr)
983                 goto out_unlock;
984 no_in_dev:
985
986         /* Not loopback addresses on loopback should be preferred
987            in this case. It is importnat that lo is the first interface
988            in dev_base list.
989          */
990         for_each_netdev_rcu(net, dev) {
991                 in_dev = __in_dev_get_rcu(dev);
992                 if (!in_dev)
993                         continue;
994
995                 for_primary_ifa(in_dev) {
996                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
997                             ifa->ifa_scope <= scope) {
998                                 addr = ifa->ifa_local;
999                                 goto out_unlock;
1000                         }
1001                 } endfor_ifa(in_dev);
1002         }
1003 out_unlock:
1004         rcu_read_unlock();
1005         return addr;
1006 }
1007 EXPORT_SYMBOL(inet_select_addr);
1008
1009 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1010                               __be32 local, int scope)
1011 {
1012         int same = 0;
1013         __be32 addr = 0;
1014
1015         for_ifa(in_dev) {
1016                 if (!addr &&
1017                     (local == ifa->ifa_local || !local) &&
1018                     ifa->ifa_scope <= scope) {
1019                         addr = ifa->ifa_local;
1020                         if (same)
1021                                 break;
1022                 }
1023                 if (!same) {
1024                         same = (!local || inet_ifa_match(local, ifa)) &&
1025                                 (!dst || inet_ifa_match(dst, ifa));
1026                         if (same && addr) {
1027                                 if (local || !dst)
1028                                         break;
1029                                 /* Is the selected addr into dst subnet? */
1030                                 if (inet_ifa_match(addr, ifa))
1031                                         break;
1032                                 /* No, then can we use new local src? */
1033                                 if (ifa->ifa_scope <= scope) {
1034                                         addr = ifa->ifa_local;
1035                                         break;
1036                                 }
1037                                 /* search for large dst subnet for addr */
1038                                 same = 0;
1039                         }
1040                 }
1041         } endfor_ifa(in_dev);
1042
1043         return same ? addr : 0;
1044 }
1045
1046 /*
1047  * Confirm that local IP address exists using wildcards:
1048  * - in_dev: only on this interface, 0=any interface
1049  * - dst: only in the same subnet as dst, 0=any dst
1050  * - local: address, 0=autoselect the local address
1051  * - scope: maximum allowed scope value for the local address
1052  */
1053 __be32 inet_confirm_addr(struct in_device *in_dev,
1054                          __be32 dst, __be32 local, int scope)
1055 {
1056         __be32 addr = 0;
1057         struct net_device *dev;
1058         struct net *net;
1059
1060         if (scope != RT_SCOPE_LINK)
1061                 return confirm_addr_indev(in_dev, dst, local, scope);
1062
1063         net = dev_net(in_dev->dev);
1064         rcu_read_lock();
1065         for_each_netdev_rcu(net, dev) {
1066                 in_dev = __in_dev_get_rcu(dev);
1067                 if (in_dev) {
1068                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1069                         if (addr)
1070                                 break;
1071                 }
1072         }
1073         rcu_read_unlock();
1074
1075         return addr;
1076 }
1077 EXPORT_SYMBOL(inet_confirm_addr);
1078
1079 /*
1080  *      Device notifier
1081  */
1082
1083 int register_inetaddr_notifier(struct notifier_block *nb)
1084 {
1085         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1086 }
1087 EXPORT_SYMBOL(register_inetaddr_notifier);
1088
1089 int unregister_inetaddr_notifier(struct notifier_block *nb)
1090 {
1091         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1092 }
1093 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1094
1095 /* Rename ifa_labels for a device name change. Make some effort to preserve
1096  * existing alias numbering and to create unique labels if possible.
1097 */
1098 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1099 {
1100         struct in_ifaddr *ifa;
1101         int named = 0;
1102
1103         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1104                 char old[IFNAMSIZ], *dot;
1105
1106                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1107                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1108                 if (named++ == 0)
1109                         goto skip;
1110                 dot = strchr(old, ':');
1111                 if (dot == NULL) {
1112                         sprintf(old, ":%d", named);
1113                         dot = old;
1114                 }
1115                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1116                         strcat(ifa->ifa_label, dot);
1117                 else
1118                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1119 skip:
1120                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1121         }
1122 }
1123
1124 static bool inetdev_valid_mtu(unsigned int mtu)
1125 {
1126         return mtu >= 68;
1127 }
1128
1129 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1130                                         struct in_device *in_dev)
1131
1132 {
1133         struct in_ifaddr *ifa;
1134
1135         for (ifa = in_dev->ifa_list; ifa;
1136              ifa = ifa->ifa_next) {
1137                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1138                          ifa->ifa_local, dev,
1139                          ifa->ifa_local, NULL,
1140                          dev->dev_addr, NULL);
1141         }
1142 }
1143
1144 /* Called only under RTNL semaphore */
1145
1146 static int inetdev_event(struct notifier_block *this, unsigned long event,
1147                          void *ptr)
1148 {
1149         struct net_device *dev = ptr;
1150         struct in_device *in_dev;
1151
1152         if (event == NETDEV_UNREGISTER_FINAL)
1153                 goto out;
1154
1155         in_dev = __in_dev_get_rtnl(dev);
1156         ASSERT_RTNL();
1157
1158         if (!in_dev) {
1159                 if (event == NETDEV_REGISTER) {
1160                         in_dev = inetdev_init(dev);
1161                         if (!in_dev)
1162                                 return notifier_from_errno(-ENOMEM);
1163                         if (dev->flags & IFF_LOOPBACK) {
1164                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1165                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1166                         }
1167                 } else if (event == NETDEV_CHANGEMTU) {
1168                         /* Re-enabling IP */
1169                         if (inetdev_valid_mtu(dev->mtu))
1170                                 in_dev = inetdev_init(dev);
1171                 }
1172                 goto out;
1173         }
1174
1175         switch (event) {
1176         case NETDEV_REGISTER:
1177                 pr_debug("%s: bug\n", __func__);
1178                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1179                 break;
1180         case NETDEV_UP:
1181                 if (!inetdev_valid_mtu(dev->mtu))
1182                         break;
1183                 if (dev->flags & IFF_LOOPBACK) {
1184                         struct in_ifaddr *ifa = inet_alloc_ifa();
1185
1186                         if (ifa) {
1187                                 INIT_HLIST_NODE(&ifa->hash);
1188                                 ifa->ifa_local =
1189                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1190                                 ifa->ifa_prefixlen = 8;
1191                                 ifa->ifa_mask = inet_make_mask(8);
1192                                 in_dev_hold(in_dev);
1193                                 ifa->ifa_dev = in_dev;
1194                                 ifa->ifa_scope = RT_SCOPE_HOST;
1195                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1196                                 inet_insert_ifa(ifa);
1197                         }
1198                 }
1199                 ip_mc_up(in_dev);
1200                 /* fall through */
1201         case NETDEV_CHANGEADDR:
1202                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1203                         break;
1204                 /* fall through */
1205         case NETDEV_NOTIFY_PEERS:
1206                 /* Send gratuitous ARP to notify of link change */
1207                 inetdev_send_gratuitous_arp(dev, in_dev);
1208                 break;
1209         case NETDEV_DOWN:
1210                 ip_mc_down(in_dev);
1211                 break;
1212         case NETDEV_PRE_TYPE_CHANGE:
1213                 ip_mc_unmap(in_dev);
1214                 break;
1215         case NETDEV_POST_TYPE_CHANGE:
1216                 ip_mc_remap(in_dev);
1217                 break;
1218         case NETDEV_CHANGEMTU:
1219                 if (inetdev_valid_mtu(dev->mtu))
1220                         break;
1221                 /* disable IP when MTU is not enough */
1222         case NETDEV_UNREGISTER:
1223                 inetdev_destroy(in_dev);
1224                 break;
1225         case NETDEV_CHANGENAME:
1226                 /* Do not notify about label change, this event is
1227                  * not interesting to applications using netlink.
1228                  */
1229                 inetdev_changename(dev, in_dev);
1230
1231                 devinet_sysctl_unregister(in_dev);
1232                 devinet_sysctl_register(in_dev);
1233                 break;
1234         }
1235 out:
1236         return NOTIFY_DONE;
1237 }
1238
1239 static struct notifier_block ip_netdev_notifier = {
1240         .notifier_call = inetdev_event,
1241 };
1242
1243 static size_t inet_nlmsg_size(void)
1244 {
1245         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1246                + nla_total_size(4) /* IFA_ADDRESS */
1247                + nla_total_size(4) /* IFA_LOCAL */
1248                + nla_total_size(4) /* IFA_BROADCAST */
1249                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1250 }
1251
1252 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1253                             u32 pid, u32 seq, int event, unsigned int flags)
1254 {
1255         struct ifaddrmsg *ifm;
1256         struct nlmsghdr  *nlh;
1257
1258         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1259         if (nlh == NULL)
1260                 return -EMSGSIZE;
1261
1262         ifm = nlmsg_data(nlh);
1263         ifm->ifa_family = AF_INET;
1264         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1265         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1266         ifm->ifa_scope = ifa->ifa_scope;
1267         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1268
1269         if ((ifa->ifa_address &&
1270              nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1271             (ifa->ifa_local &&
1272              nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1273             (ifa->ifa_broadcast &&
1274              nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1275             (ifa->ifa_label[0] &&
1276              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
1277                 goto nla_put_failure;
1278
1279         return nlmsg_end(skb, nlh);
1280
1281 nla_put_failure:
1282         nlmsg_cancel(skb, nlh);
1283         return -EMSGSIZE;
1284 }
1285
1286 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1287 {
1288         struct net *net = sock_net(skb->sk);
1289         int h, s_h;
1290         int idx, s_idx;
1291         int ip_idx, s_ip_idx;
1292         struct net_device *dev;
1293         struct in_device *in_dev;
1294         struct in_ifaddr *ifa;
1295         struct hlist_head *head;
1296         struct hlist_node *node;
1297
1298         s_h = cb->args[0];
1299         s_idx = idx = cb->args[1];
1300         s_ip_idx = ip_idx = cb->args[2];
1301
1302         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1303                 idx = 0;
1304                 head = &net->dev_index_head[h];
1305                 rcu_read_lock();
1306                 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1307                         if (idx < s_idx)
1308                                 goto cont;
1309                         if (h > s_h || idx > s_idx)
1310                                 s_ip_idx = 0;
1311                         in_dev = __in_dev_get_rcu(dev);
1312                         if (!in_dev)
1313                                 goto cont;
1314
1315                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1316                              ifa = ifa->ifa_next, ip_idx++) {
1317                                 if (ip_idx < s_ip_idx)
1318                                         continue;
1319                                 if (inet_fill_ifaddr(skb, ifa,
1320                                              NETLINK_CB(cb->skb).pid,
1321                                              cb->nlh->nlmsg_seq,
1322                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1323                                         rcu_read_unlock();
1324                                         goto done;
1325                                 }
1326                         }
1327 cont:
1328                         idx++;
1329                 }
1330                 rcu_read_unlock();
1331         }
1332
1333 done:
1334         cb->args[0] = h;
1335         cb->args[1] = idx;
1336         cb->args[2] = ip_idx;
1337
1338         return skb->len;
1339 }
1340
1341 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1342                       u32 pid)
1343 {
1344         struct sk_buff *skb;
1345         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1346         int err = -ENOBUFS;
1347         struct net *net;
1348
1349         net = dev_net(ifa->ifa_dev->dev);
1350         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1351         if (skb == NULL)
1352                 goto errout;
1353
1354         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1355         if (err < 0) {
1356                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1357                 WARN_ON(err == -EMSGSIZE);
1358                 kfree_skb(skb);
1359                 goto errout;
1360         }
1361         rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1362         return;
1363 errout:
1364         if (err < 0)
1365                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1366 }
1367
1368 static size_t inet_get_link_af_size(const struct net_device *dev)
1369 {
1370         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1371
1372         if (!in_dev)
1373                 return 0;
1374
1375         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1376 }
1377
1378 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1379 {
1380         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1381         struct nlattr *nla;
1382         int i;
1383
1384         if (!in_dev)
1385                 return -ENODATA;
1386
1387         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1388         if (nla == NULL)
1389                 return -EMSGSIZE;
1390
1391         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1392                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1393
1394         return 0;
1395 }
1396
1397 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1398         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1399 };
1400
1401 static int inet_validate_link_af(const struct net_device *dev,
1402                                  const struct nlattr *nla)
1403 {
1404         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1405         int err, rem;
1406
1407         if (dev && !__in_dev_get_rtnl(dev))
1408                 return -EAFNOSUPPORT;
1409
1410         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1411         if (err < 0)
1412                 return err;
1413
1414         if (tb[IFLA_INET_CONF]) {
1415                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1416                         int cfgid = nla_type(a);
1417
1418                         if (nla_len(a) < 4)
1419                                 return -EINVAL;
1420
1421                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1422                                 return -EINVAL;
1423                 }
1424         }
1425
1426         return 0;
1427 }
1428
1429 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1430 {
1431         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1432         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1433         int rem;
1434
1435         if (!in_dev)
1436                 return -EAFNOSUPPORT;
1437
1438         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1439                 BUG();
1440
1441         if (tb[IFLA_INET_CONF]) {
1442                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1443                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1444         }
1445
1446         return 0;
1447 }
1448
1449 #ifdef CONFIG_SYSCTL
1450
1451 static void devinet_copy_dflt_conf(struct net *net, int i)
1452 {
1453         struct net_device *dev;
1454
1455         rcu_read_lock();
1456         for_each_netdev_rcu(net, dev) {
1457                 struct in_device *in_dev;
1458
1459                 in_dev = __in_dev_get_rcu(dev);
1460                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1461                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1462         }
1463         rcu_read_unlock();
1464 }
1465
1466 /* called with RTNL locked */
1467 static void inet_forward_change(struct net *net)
1468 {
1469         struct net_device *dev;
1470         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1471
1472         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1473         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1474
1475         for_each_netdev(net, dev) {
1476                 struct in_device *in_dev;
1477                 if (on)
1478                         dev_disable_lro(dev);
1479                 rcu_read_lock();
1480                 in_dev = __in_dev_get_rcu(dev);
1481                 if (in_dev)
1482                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1483                 rcu_read_unlock();
1484         }
1485 }
1486
1487 static int devinet_conf_proc(ctl_table *ctl, int write,
1488                              void __user *buffer,
1489                              size_t *lenp, loff_t *ppos)
1490 {
1491         int old_value = *(int *)ctl->data;
1492         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1493         int new_value = *(int *)ctl->data;
1494
1495         if (write) {
1496                 struct ipv4_devconf *cnf = ctl->extra1;
1497                 struct net *net = ctl->extra2;
1498                 int i = (int *)ctl->data - cnf->data;
1499
1500                 set_bit(i, cnf->state);
1501
1502                 if (cnf == net->ipv4.devconf_dflt)
1503                         devinet_copy_dflt_conf(net, i);
1504                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1505                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1506                         if ((new_value == 0) && (old_value != 0))
1507                                 rt_cache_flush(net, 0);
1508         }
1509
1510         return ret;
1511 }
1512
1513 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1514                                   void __user *buffer,
1515                                   size_t *lenp, loff_t *ppos)
1516 {
1517         int *valp = ctl->data;
1518         int val = *valp;
1519         loff_t pos = *ppos;
1520         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1521
1522         if (write && *valp != val) {
1523                 struct net *net = ctl->extra2;
1524
1525                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1526                         if (!rtnl_trylock()) {
1527                                 /* Restore the original values before restarting */
1528                                 *valp = val;
1529                                 *ppos = pos;
1530                                 return restart_syscall();
1531                         }
1532                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1533                                 inet_forward_change(net);
1534                         } else if (*valp) {
1535                                 struct ipv4_devconf *cnf = ctl->extra1;
1536                                 struct in_device *idev =
1537                                         container_of(cnf, struct in_device, cnf);
1538                                 dev_disable_lro(idev->dev);
1539                         }
1540                         rtnl_unlock();
1541                         rt_cache_flush(net, 0);
1542                 }
1543         }
1544
1545         return ret;
1546 }
1547
1548 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1549                                 void __user *buffer,
1550                                 size_t *lenp, loff_t *ppos)
1551 {
1552         int *valp = ctl->data;
1553         int val = *valp;
1554         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1555         struct net *net = ctl->extra2;
1556
1557         if (write && *valp != val)
1558                 rt_cache_flush(net, 0);
1559
1560         return ret;
1561 }
1562
1563 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1564         { \
1565                 .procname       = name, \
1566                 .data           = ipv4_devconf.data + \
1567                                   IPV4_DEVCONF_ ## attr - 1, \
1568                 .maxlen         = sizeof(int), \
1569                 .mode           = mval, \
1570                 .proc_handler   = proc, \
1571                 .extra1         = &ipv4_devconf, \
1572         }
1573
1574 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1575         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1576
1577 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1578         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1579
1580 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1581         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1582
1583 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1584         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1585
1586 static struct devinet_sysctl_table {
1587         struct ctl_table_header *sysctl_header;
1588         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1589 } devinet_sysctl = {
1590         .devinet_vars = {
1591                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1592                                              devinet_sysctl_forward),
1593                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1594
1595                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1596                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1597                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1598                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1599                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1600                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1601                                         "accept_source_route"),
1602                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1603                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1604                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1605                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1606                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1607                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1608                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1609                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1610                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1611                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1612                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1613                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1614                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1615
1616                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1617                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1618                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1619                                               "force_igmp_version"),
1620                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1621                                               "promote_secondaries"),
1622                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
1623                                               "route_localnet"),
1624         },
1625 };
1626
1627 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1628                                         struct ipv4_devconf *p)
1629 {
1630         int i;
1631         struct devinet_sysctl_table *t;
1632         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
1633
1634         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1635         if (!t)
1636                 goto out;
1637
1638         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1639                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1640                 t->devinet_vars[i].extra1 = p;
1641                 t->devinet_vars[i].extra2 = net;
1642         }
1643
1644         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
1645
1646         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
1647         if (!t->sysctl_header)
1648                 goto free;
1649
1650         p->sysctl = t;
1651         return 0;
1652
1653 free:
1654         kfree(t);
1655 out:
1656         return -ENOBUFS;
1657 }
1658
1659 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1660 {
1661         struct devinet_sysctl_table *t = cnf->sysctl;
1662
1663         if (t == NULL)
1664                 return;
1665
1666         cnf->sysctl = NULL;
1667         unregister_net_sysctl_table(t->sysctl_header);
1668         kfree(t);
1669 }
1670
1671 static void devinet_sysctl_register(struct in_device *idev)
1672 {
1673         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1674         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1675                                         &idev->cnf);
1676 }
1677
1678 static void devinet_sysctl_unregister(struct in_device *idev)
1679 {
1680         __devinet_sysctl_unregister(&idev->cnf);
1681         neigh_sysctl_unregister(idev->arp_parms);
1682 }
1683
1684 static struct ctl_table ctl_forward_entry[] = {
1685         {
1686                 .procname       = "ip_forward",
1687                 .data           = &ipv4_devconf.data[
1688                                         IPV4_DEVCONF_FORWARDING - 1],
1689                 .maxlen         = sizeof(int),
1690                 .mode           = 0644,
1691                 .proc_handler   = devinet_sysctl_forward,
1692                 .extra1         = &ipv4_devconf,
1693                 .extra2         = &init_net,
1694         },
1695         { },
1696 };
1697 #endif
1698
1699 static __net_init int devinet_init_net(struct net *net)
1700 {
1701         int err;
1702         struct ipv4_devconf *all, *dflt;
1703 #ifdef CONFIG_SYSCTL
1704         struct ctl_table *tbl = ctl_forward_entry;
1705         struct ctl_table_header *forw_hdr;
1706 #endif
1707
1708         err = -ENOMEM;
1709         all = &ipv4_devconf;
1710         dflt = &ipv4_devconf_dflt;
1711
1712         if (!net_eq(net, &init_net)) {
1713                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1714                 if (all == NULL)
1715                         goto err_alloc_all;
1716
1717                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1718                 if (dflt == NULL)
1719                         goto err_alloc_dflt;
1720
1721 #ifdef CONFIG_SYSCTL
1722                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1723                 if (tbl == NULL)
1724                         goto err_alloc_ctl;
1725
1726                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1727                 tbl[0].extra1 = all;
1728                 tbl[0].extra2 = net;
1729 #endif
1730         }
1731
1732 #ifdef CONFIG_SYSCTL
1733         err = __devinet_sysctl_register(net, "all", all);
1734         if (err < 0)
1735                 goto err_reg_all;
1736
1737         err = __devinet_sysctl_register(net, "default", dflt);
1738         if (err < 0)
1739                 goto err_reg_dflt;
1740
1741         err = -ENOMEM;
1742         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
1743         if (forw_hdr == NULL)
1744                 goto err_reg_ctl;
1745         net->ipv4.forw_hdr = forw_hdr;
1746 #endif
1747
1748         net->ipv4.devconf_all = all;
1749         net->ipv4.devconf_dflt = dflt;
1750         return 0;
1751
1752 #ifdef CONFIG_SYSCTL
1753 err_reg_ctl:
1754         __devinet_sysctl_unregister(dflt);
1755 err_reg_dflt:
1756         __devinet_sysctl_unregister(all);
1757 err_reg_all:
1758         if (tbl != ctl_forward_entry)
1759                 kfree(tbl);
1760 err_alloc_ctl:
1761 #endif
1762         if (dflt != &ipv4_devconf_dflt)
1763                 kfree(dflt);
1764 err_alloc_dflt:
1765         if (all != &ipv4_devconf)
1766                 kfree(all);
1767 err_alloc_all:
1768         return err;
1769 }
1770
1771 static __net_exit void devinet_exit_net(struct net *net)
1772 {
1773 #ifdef CONFIG_SYSCTL
1774         struct ctl_table *tbl;
1775
1776         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1777         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1778         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1779         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1780         kfree(tbl);
1781 #endif
1782         kfree(net->ipv4.devconf_dflt);
1783         kfree(net->ipv4.devconf_all);
1784 }
1785
1786 static __net_initdata struct pernet_operations devinet_ops = {
1787         .init = devinet_init_net,
1788         .exit = devinet_exit_net,
1789 };
1790
1791 static struct rtnl_af_ops inet_af_ops = {
1792         .family           = AF_INET,
1793         .fill_link_af     = inet_fill_link_af,
1794         .get_link_af_size = inet_get_link_af_size,
1795         .validate_link_af = inet_validate_link_af,
1796         .set_link_af      = inet_set_link_af,
1797 };
1798
1799 void __init devinet_init(void)
1800 {
1801         int i;
1802
1803         for (i = 0; i < IN4_ADDR_HSIZE; i++)
1804                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
1805
1806         register_pernet_subsys(&devinet_ops);
1807
1808         register_gifconf(PF_INET, inet_gifconf);
1809         register_netdevice_notifier(&ip_netdev_notifier);
1810
1811         rtnl_af_register(&inet_af_ops);
1812
1813         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1814         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1815         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1816 }
1817