initramfs: fix initramfs size calculation
[linux-drm-fsl-dcu.git] / net / ipv4 / fib_semantics.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              IPv4 Forwarding Information Base: semantics.
7  *
8  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  *
10  *              This program is free software; you can redistribute it and/or
11  *              modify it under the terms of the GNU General Public License
12  *              as published by the Free Software Foundation; either version
13  *              2 of the License, or (at your option) any later version.
14  */
15
16 #include <asm/uaccess.h>
17 #include <asm/system.h>
18 #include <linux/bitops.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/jiffies.h>
22 #include <linux/mm.h>
23 #include <linux/string.h>
24 #include <linux/socket.h>
25 #include <linux/sockios.h>
26 #include <linux/errno.h>
27 #include <linux/in.h>
28 #include <linux/inet.h>
29 #include <linux/inetdevice.h>
30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h>
32 #include <linux/proc_fs.h>
33 #include <linux/skbuff.h>
34 #include <linux/init.h>
35 #include <linux/slab.h>
36
37 #include <net/arp.h>
38 #include <net/ip.h>
39 #include <net/protocol.h>
40 #include <net/route.h>
41 #include <net/tcp.h>
42 #include <net/sock.h>
43 #include <net/ip_fib.h>
44 #include <net/netlink.h>
45 #include <net/nexthop.h>
46
47 #include "fib_lookup.h"
48
49 static DEFINE_SPINLOCK(fib_info_lock);
50 static struct hlist_head *fib_info_hash;
51 static struct hlist_head *fib_info_laddrhash;
52 static unsigned int fib_hash_size;
53 static unsigned int fib_info_cnt;
54
55 #define DEVINDEX_HASHBITS 8
56 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
57 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
58
59 #ifdef CONFIG_IP_ROUTE_MULTIPATH
60
61 static DEFINE_SPINLOCK(fib_multipath_lock);
62
63 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
64 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
65
66 #define change_nexthops(fi) { int nhsel; struct fib_nh *nexthop_nh; \
67 for (nhsel=0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nexthop_nh++, nhsel++)
68
69 #else /* CONFIG_IP_ROUTE_MULTIPATH */
70
71 /* Hope, that gcc will optimize it to get rid of dummy loop */
72
73 #define for_nexthops(fi) { int nhsel = 0; const struct fib_nh * nh = (fi)->fib_nh; \
74 for (nhsel=0; nhsel < 1; nhsel++)
75
76 #define change_nexthops(fi) { int nhsel = 0; struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
77 for (nhsel=0; nhsel < 1; nhsel++)
78
79 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
80
81 #define endfor_nexthops(fi) }
82
83
84 static const struct
85 {
86         int     error;
87         u8      scope;
88 } fib_props[RTN_MAX + 1] = {
89         {
90                 .error  = 0,
91                 .scope  = RT_SCOPE_NOWHERE,
92         },      /* RTN_UNSPEC */
93         {
94                 .error  = 0,
95                 .scope  = RT_SCOPE_UNIVERSE,
96         },      /* RTN_UNICAST */
97         {
98                 .error  = 0,
99                 .scope  = RT_SCOPE_HOST,
100         },      /* RTN_LOCAL */
101         {
102                 .error  = 0,
103                 .scope  = RT_SCOPE_LINK,
104         },      /* RTN_BROADCAST */
105         {
106                 .error  = 0,
107                 .scope  = RT_SCOPE_LINK,
108         },      /* RTN_ANYCAST */
109         {
110                 .error  = 0,
111                 .scope  = RT_SCOPE_UNIVERSE,
112         },      /* RTN_MULTICAST */
113         {
114                 .error  = -EINVAL,
115                 .scope  = RT_SCOPE_UNIVERSE,
116         },      /* RTN_BLACKHOLE */
117         {
118                 .error  = -EHOSTUNREACH,
119                 .scope  = RT_SCOPE_UNIVERSE,
120         },      /* RTN_UNREACHABLE */
121         {
122                 .error  = -EACCES,
123                 .scope  = RT_SCOPE_UNIVERSE,
124         },      /* RTN_PROHIBIT */
125         {
126                 .error  = -EAGAIN,
127                 .scope  = RT_SCOPE_UNIVERSE,
128         },      /* RTN_THROW */
129         {
130                 .error  = -EINVAL,
131                 .scope  = RT_SCOPE_NOWHERE,
132         },      /* RTN_NAT */
133         {
134                 .error  = -EINVAL,
135                 .scope  = RT_SCOPE_NOWHERE,
136         },      /* RTN_XRESOLVE */
137 };
138
139
140 /* Release a nexthop info record */
141
142 void free_fib_info(struct fib_info *fi)
143 {
144         if (fi->fib_dead == 0) {
145                 printk(KERN_WARNING "Freeing alive fib_info %p\n", fi);
146                 return;
147         }
148         change_nexthops(fi) {
149                 if (nexthop_nh->nh_dev)
150                         dev_put(nexthop_nh->nh_dev);
151                 nexthop_nh->nh_dev = NULL;
152         } endfor_nexthops(fi);
153         fib_info_cnt--;
154         release_net(fi->fib_net);
155         kfree(fi);
156 }
157
158 void fib_release_info(struct fib_info *fi)
159 {
160         spin_lock_bh(&fib_info_lock);
161         if (fi && --fi->fib_treeref == 0) {
162                 hlist_del(&fi->fib_hash);
163                 if (fi->fib_prefsrc)
164                         hlist_del(&fi->fib_lhash);
165                 change_nexthops(fi) {
166                         if (!nexthop_nh->nh_dev)
167                                 continue;
168                         hlist_del(&nexthop_nh->nh_hash);
169                 } endfor_nexthops(fi)
170                 fi->fib_dead = 1;
171                 fib_info_put(fi);
172         }
173         spin_unlock_bh(&fib_info_lock);
174 }
175
176 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
177 {
178         const struct fib_nh *onh = ofi->fib_nh;
179
180         for_nexthops(fi) {
181                 if (nh->nh_oif != onh->nh_oif ||
182                     nh->nh_gw  != onh->nh_gw ||
183                     nh->nh_scope != onh->nh_scope ||
184 #ifdef CONFIG_IP_ROUTE_MULTIPATH
185                     nh->nh_weight != onh->nh_weight ||
186 #endif
187 #ifdef CONFIG_NET_CLS_ROUTE
188                     nh->nh_tclassid != onh->nh_tclassid ||
189 #endif
190                     ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
191                         return -1;
192                 onh++;
193         } endfor_nexthops(fi);
194         return 0;
195 }
196
197 static inline unsigned int fib_devindex_hashfn(unsigned int val)
198 {
199         unsigned int mask = DEVINDEX_HASHSIZE - 1;
200
201         return (val ^
202                 (val >> DEVINDEX_HASHBITS) ^
203                 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
204 }
205
206 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
207 {
208         unsigned int mask = (fib_hash_size - 1);
209         unsigned int val = fi->fib_nhs;
210
211         val ^= fi->fib_protocol;
212         val ^= (__force u32)fi->fib_prefsrc;
213         val ^= fi->fib_priority;
214         for_nexthops(fi) {
215                 val ^= fib_devindex_hashfn(nh->nh_oif);
216         } endfor_nexthops(fi)
217
218         return (val ^ (val >> 7) ^ (val >> 12)) & mask;
219 }
220
221 static struct fib_info *fib_find_info(const struct fib_info *nfi)
222 {
223         struct hlist_head *head;
224         struct hlist_node *node;
225         struct fib_info *fi;
226         unsigned int hash;
227
228         hash = fib_info_hashfn(nfi);
229         head = &fib_info_hash[hash];
230
231         hlist_for_each_entry(fi, node, head, fib_hash) {
232                 if (!net_eq(fi->fib_net, nfi->fib_net))
233                         continue;
234                 if (fi->fib_nhs != nfi->fib_nhs)
235                         continue;
236                 if (nfi->fib_protocol == fi->fib_protocol &&
237                     nfi->fib_prefsrc == fi->fib_prefsrc &&
238                     nfi->fib_priority == fi->fib_priority &&
239                     memcmp(nfi->fib_metrics, fi->fib_metrics,
240                            sizeof(fi->fib_metrics)) == 0 &&
241                     ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
242                     (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
243                         return fi;
244         }
245
246         return NULL;
247 }
248
249 /* Check, that the gateway is already configured.
250    Used only by redirect accept routine.
251  */
252
253 int ip_fib_check_default(__be32 gw, struct net_device *dev)
254 {
255         struct hlist_head *head;
256         struct hlist_node *node;
257         struct fib_nh *nh;
258         unsigned int hash;
259
260         spin_lock(&fib_info_lock);
261
262         hash = fib_devindex_hashfn(dev->ifindex);
263         head = &fib_info_devhash[hash];
264         hlist_for_each_entry(nh, node, head, nh_hash) {
265                 if (nh->nh_dev == dev &&
266                     nh->nh_gw == gw &&
267                     !(nh->nh_flags&RTNH_F_DEAD)) {
268                         spin_unlock(&fib_info_lock);
269                         return 0;
270                 }
271         }
272
273         spin_unlock(&fib_info_lock);
274
275         return -1;
276 }
277
278 static inline size_t fib_nlmsg_size(struct fib_info *fi)
279 {
280         size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
281                          + nla_total_size(4) /* RTA_TABLE */
282                          + nla_total_size(4) /* RTA_DST */
283                          + nla_total_size(4) /* RTA_PRIORITY */
284                          + nla_total_size(4); /* RTA_PREFSRC */
285
286         /* space for nested metrics */
287         payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
288
289         if (fi->fib_nhs) {
290                 /* Also handles the special case fib_nhs == 1 */
291
292                 /* each nexthop is packed in an attribute */
293                 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
294
295                 /* may contain flow and gateway attribute */
296                 nhsize += 2 * nla_total_size(4);
297
298                 /* all nexthops are packed in a nested attribute */
299                 payload += nla_total_size(fi->fib_nhs * nhsize);
300         }
301
302         return payload;
303 }
304
305 void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
306                int dst_len, u32 tb_id, struct nl_info *info,
307                unsigned int nlm_flags)
308 {
309         struct sk_buff *skb;
310         u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
311         int err = -ENOBUFS;
312
313         skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
314         if (skb == NULL)
315                 goto errout;
316
317         err = fib_dump_info(skb, info->pid, seq, event, tb_id,
318                             fa->fa_type, fa->fa_scope, key, dst_len,
319                             fa->fa_tos, fa->fa_info, nlm_flags);
320         if (err < 0) {
321                 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
322                 WARN_ON(err == -EMSGSIZE);
323                 kfree_skb(skb);
324                 goto errout;
325         }
326         rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
327                     info->nlh, GFP_KERNEL);
328         return;
329 errout:
330         if (err < 0)
331                 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
332 }
333
334 /* Return the first fib alias matching TOS with
335  * priority less than or equal to PRIO.
336  */
337 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
338 {
339         if (fah) {
340                 struct fib_alias *fa;
341                 list_for_each_entry(fa, fah, fa_list) {
342                         if (fa->fa_tos > tos)
343                                 continue;
344                         if (fa->fa_info->fib_priority >= prio ||
345                             fa->fa_tos < tos)
346                                 return fa;
347                 }
348         }
349         return NULL;
350 }
351
352 int fib_detect_death(struct fib_info *fi, int order,
353                      struct fib_info **last_resort, int *last_idx, int dflt)
354 {
355         struct neighbour *n;
356         int state = NUD_NONE;
357
358         n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
359         if (n) {
360                 state = n->nud_state;
361                 neigh_release(n);
362         }
363         if (state == NUD_REACHABLE)
364                 return 0;
365         if ((state&NUD_VALID) && order != dflt)
366                 return 0;
367         if ((state&NUD_VALID) ||
368             (*last_idx<0 && order > dflt)) {
369                 *last_resort = fi;
370                 *last_idx = order;
371         }
372         return 1;
373 }
374
375 #ifdef CONFIG_IP_ROUTE_MULTIPATH
376
377 static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
378 {
379         int nhs = 0;
380
381         while (rtnh_ok(rtnh, remaining)) {
382                 nhs++;
383                 rtnh = rtnh_next(rtnh, &remaining);
384         }
385
386         /* leftover implies invalid nexthop configuration, discard it */
387         return remaining > 0 ? 0 : nhs;
388 }
389
390 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
391                        int remaining, struct fib_config *cfg)
392 {
393         change_nexthops(fi) {
394                 int attrlen;
395
396                 if (!rtnh_ok(rtnh, remaining))
397                         return -EINVAL;
398
399                 nexthop_nh->nh_flags =
400                         (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
401                 nexthop_nh->nh_oif = rtnh->rtnh_ifindex;
402                 nexthop_nh->nh_weight = rtnh->rtnh_hops + 1;
403
404                 attrlen = rtnh_attrlen(rtnh);
405                 if (attrlen > 0) {
406                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
407
408                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
409                         nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0;
410 #ifdef CONFIG_NET_CLS_ROUTE
411                         nla = nla_find(attrs, attrlen, RTA_FLOW);
412                         nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
413 #endif
414                 }
415
416                 rtnh = rtnh_next(rtnh, &remaining);
417         } endfor_nexthops(fi);
418
419         return 0;
420 }
421
422 #endif
423
424 int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
425 {
426 #ifdef CONFIG_IP_ROUTE_MULTIPATH
427         struct rtnexthop *rtnh;
428         int remaining;
429 #endif
430
431         if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
432                 return 1;
433
434         if (cfg->fc_oif || cfg->fc_gw) {
435                 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
436                     (!cfg->fc_gw  || cfg->fc_gw == fi->fib_nh->nh_gw))
437                         return 0;
438                 return 1;
439         }
440
441 #ifdef CONFIG_IP_ROUTE_MULTIPATH
442         if (cfg->fc_mp == NULL)
443                 return 0;
444
445         rtnh = cfg->fc_mp;
446         remaining = cfg->fc_mp_len;
447
448         for_nexthops(fi) {
449                 int attrlen;
450
451                 if (!rtnh_ok(rtnh, remaining))
452                         return -EINVAL;
453
454                 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
455                         return 1;
456
457                 attrlen = rtnh_attrlen(rtnh);
458                 if (attrlen < 0) {
459                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
460
461                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
462                         if (nla && nla_get_be32(nla) != nh->nh_gw)
463                                 return 1;
464 #ifdef CONFIG_NET_CLS_ROUTE
465                         nla = nla_find(attrs, attrlen, RTA_FLOW);
466                         if (nla && nla_get_u32(nla) != nh->nh_tclassid)
467                                 return 1;
468 #endif
469                 }
470
471                 rtnh = rtnh_next(rtnh, &remaining);
472         } endfor_nexthops(fi);
473 #endif
474         return 0;
475 }
476
477
478 /*
479    Picture
480    -------
481
482    Semantics of nexthop is very messy by historical reasons.
483    We have to take into account, that:
484    a) gateway can be actually local interface address,
485       so that gatewayed route is direct.
486    b) gateway must be on-link address, possibly
487       described not by an ifaddr, but also by a direct route.
488    c) If both gateway and interface are specified, they should not
489       contradict.
490    d) If we use tunnel routes, gateway could be not on-link.
491
492    Attempt to reconcile all of these (alas, self-contradictory) conditions
493    results in pretty ugly and hairy code with obscure logic.
494
495    I chose to generalized it instead, so that the size
496    of code does not increase practically, but it becomes
497    much more general.
498    Every prefix is assigned a "scope" value: "host" is local address,
499    "link" is direct route,
500    [ ... "site" ... "interior" ... ]
501    and "universe" is true gateway route with global meaning.
502
503    Every prefix refers to a set of "nexthop"s (gw, oif),
504    where gw must have narrower scope. This recursion stops
505    when gw has LOCAL scope or if "nexthop" is declared ONLINK,
506    which means that gw is forced to be on link.
507
508    Code is still hairy, but now it is apparently logically
509    consistent and very flexible. F.e. as by-product it allows
510    to co-exists in peace independent exterior and interior
511    routing processes.
512
513    Normally it looks as following.
514
515    {universe prefix}  -> (gw, oif) [scope link]
516                           |
517                           |-> {link prefix} -> (gw, oif) [scope local]
518                                                 |
519                                                 |-> {local prefix} (terminal node)
520  */
521
522 static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
523                         struct fib_nh *nh)
524 {
525         int err;
526         struct net *net;
527
528         net = cfg->fc_nlinfo.nl_net;
529         if (nh->nh_gw) {
530                 struct fib_result res;
531
532                 if (nh->nh_flags&RTNH_F_ONLINK) {
533                         struct net_device *dev;
534
535                         if (cfg->fc_scope >= RT_SCOPE_LINK)
536                                 return -EINVAL;
537                         if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST)
538                                 return -EINVAL;
539                         if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL)
540                                 return -ENODEV;
541                         if (!(dev->flags&IFF_UP))
542                                 return -ENETDOWN;
543                         nh->nh_dev = dev;
544                         dev_hold(dev);
545                         nh->nh_scope = RT_SCOPE_LINK;
546                         return 0;
547                 }
548                 {
549                         struct flowi fl = {
550                                 .nl_u = {
551                                         .ip4_u = {
552                                                 .daddr = nh->nh_gw,
553                                                 .scope = cfg->fc_scope + 1,
554                                         },
555                                 },
556                                 .oif = nh->nh_oif,
557                         };
558
559                         /* It is not necessary, but requires a bit of thinking */
560                         if (fl.fl4_scope < RT_SCOPE_LINK)
561                                 fl.fl4_scope = RT_SCOPE_LINK;
562                         if ((err = fib_lookup(net, &fl, &res)) != 0)
563                                 return err;
564                 }
565                 err = -EINVAL;
566                 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
567                         goto out;
568                 nh->nh_scope = res.scope;
569                 nh->nh_oif = FIB_RES_OIF(res);
570                 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
571                         goto out;
572                 dev_hold(nh->nh_dev);
573                 err = -ENETDOWN;
574                 if (!(nh->nh_dev->flags & IFF_UP))
575                         goto out;
576                 err = 0;
577 out:
578                 fib_res_put(&res);
579                 return err;
580         } else {
581                 struct in_device *in_dev;
582
583                 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
584                         return -EINVAL;
585
586                 in_dev = inetdev_by_index(net, nh->nh_oif);
587                 if (in_dev == NULL)
588                         return -ENODEV;
589                 if (!(in_dev->dev->flags&IFF_UP)) {
590                         in_dev_put(in_dev);
591                         return -ENETDOWN;
592                 }
593                 nh->nh_dev = in_dev->dev;
594                 dev_hold(nh->nh_dev);
595                 nh->nh_scope = RT_SCOPE_HOST;
596                 in_dev_put(in_dev);
597         }
598         return 0;
599 }
600
601 static inline unsigned int fib_laddr_hashfn(__be32 val)
602 {
603         unsigned int mask = (fib_hash_size - 1);
604
605         return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
606 }
607
608 static struct hlist_head *fib_hash_alloc(int bytes)
609 {
610         if (bytes <= PAGE_SIZE)
611                 return kzalloc(bytes, GFP_KERNEL);
612         else
613                 return (struct hlist_head *)
614                         __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes));
615 }
616
617 static void fib_hash_free(struct hlist_head *hash, int bytes)
618 {
619         if (!hash)
620                 return;
621
622         if (bytes <= PAGE_SIZE)
623                 kfree(hash);
624         else
625                 free_pages((unsigned long) hash, get_order(bytes));
626 }
627
628 static void fib_hash_move(struct hlist_head *new_info_hash,
629                           struct hlist_head *new_laddrhash,
630                           unsigned int new_size)
631 {
632         struct hlist_head *old_info_hash, *old_laddrhash;
633         unsigned int old_size = fib_hash_size;
634         unsigned int i, bytes;
635
636         spin_lock_bh(&fib_info_lock);
637         old_info_hash = fib_info_hash;
638         old_laddrhash = fib_info_laddrhash;
639         fib_hash_size = new_size;
640
641         for (i = 0; i < old_size; i++) {
642                 struct hlist_head *head = &fib_info_hash[i];
643                 struct hlist_node *node, *n;
644                 struct fib_info *fi;
645
646                 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
647                         struct hlist_head *dest;
648                         unsigned int new_hash;
649
650                         hlist_del(&fi->fib_hash);
651
652                         new_hash = fib_info_hashfn(fi);
653                         dest = &new_info_hash[new_hash];
654                         hlist_add_head(&fi->fib_hash, dest);
655                 }
656         }
657         fib_info_hash = new_info_hash;
658
659         for (i = 0; i < old_size; i++) {
660                 struct hlist_head *lhead = &fib_info_laddrhash[i];
661                 struct hlist_node *node, *n;
662                 struct fib_info *fi;
663
664                 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
665                         struct hlist_head *ldest;
666                         unsigned int new_hash;
667
668                         hlist_del(&fi->fib_lhash);
669
670                         new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
671                         ldest = &new_laddrhash[new_hash];
672                         hlist_add_head(&fi->fib_lhash, ldest);
673                 }
674         }
675         fib_info_laddrhash = new_laddrhash;
676
677         spin_unlock_bh(&fib_info_lock);
678
679         bytes = old_size * sizeof(struct hlist_head *);
680         fib_hash_free(old_info_hash, bytes);
681         fib_hash_free(old_laddrhash, bytes);
682 }
683
684 struct fib_info *fib_create_info(struct fib_config *cfg)
685 {
686         int err;
687         struct fib_info *fi = NULL;
688         struct fib_info *ofi;
689         int nhs = 1;
690         struct net *net = cfg->fc_nlinfo.nl_net;
691
692         /* Fast check to catch the most weird cases */
693         if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
694                 goto err_inval;
695
696 #ifdef CONFIG_IP_ROUTE_MULTIPATH
697         if (cfg->fc_mp) {
698                 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
699                 if (nhs == 0)
700                         goto err_inval;
701         }
702 #endif
703
704         err = -ENOBUFS;
705         if (fib_info_cnt >= fib_hash_size) {
706                 unsigned int new_size = fib_hash_size << 1;
707                 struct hlist_head *new_info_hash;
708                 struct hlist_head *new_laddrhash;
709                 unsigned int bytes;
710
711                 if (!new_size)
712                         new_size = 1;
713                 bytes = new_size * sizeof(struct hlist_head *);
714                 new_info_hash = fib_hash_alloc(bytes);
715                 new_laddrhash = fib_hash_alloc(bytes);
716                 if (!new_info_hash || !new_laddrhash) {
717                         fib_hash_free(new_info_hash, bytes);
718                         fib_hash_free(new_laddrhash, bytes);
719                 } else
720                         fib_hash_move(new_info_hash, new_laddrhash, new_size);
721
722                 if (!fib_hash_size)
723                         goto failure;
724         }
725
726         fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
727         if (fi == NULL)
728                 goto failure;
729         fib_info_cnt++;
730
731         fi->fib_net = hold_net(net);
732         fi->fib_protocol = cfg->fc_protocol;
733         fi->fib_flags = cfg->fc_flags;
734         fi->fib_priority = cfg->fc_priority;
735         fi->fib_prefsrc = cfg->fc_prefsrc;
736
737         fi->fib_nhs = nhs;
738         change_nexthops(fi) {
739                 nexthop_nh->nh_parent = fi;
740         } endfor_nexthops(fi)
741
742         if (cfg->fc_mx) {
743                 struct nlattr *nla;
744                 int remaining;
745
746                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
747                         int type = nla_type(nla);
748
749                         if (type) {
750                                 if (type > RTAX_MAX)
751                                         goto err_inval;
752                                 fi->fib_metrics[type - 1] = nla_get_u32(nla);
753                         }
754                 }
755         }
756
757         if (cfg->fc_mp) {
758 #ifdef CONFIG_IP_ROUTE_MULTIPATH
759                 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
760                 if (err != 0)
761                         goto failure;
762                 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
763                         goto err_inval;
764                 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
765                         goto err_inval;
766 #ifdef CONFIG_NET_CLS_ROUTE
767                 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
768                         goto err_inval;
769 #endif
770 #else
771                 goto err_inval;
772 #endif
773         } else {
774                 struct fib_nh *nh = fi->fib_nh;
775
776                 nh->nh_oif = cfg->fc_oif;
777                 nh->nh_gw = cfg->fc_gw;
778                 nh->nh_flags = cfg->fc_flags;
779 #ifdef CONFIG_NET_CLS_ROUTE
780                 nh->nh_tclassid = cfg->fc_flow;
781 #endif
782 #ifdef CONFIG_IP_ROUTE_MULTIPATH
783                 nh->nh_weight = 1;
784 #endif
785         }
786
787         if (fib_props[cfg->fc_type].error) {
788                 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
789                         goto err_inval;
790                 goto link_it;
791         }
792
793         if (cfg->fc_scope > RT_SCOPE_HOST)
794                 goto err_inval;
795
796         if (cfg->fc_scope == RT_SCOPE_HOST) {
797                 struct fib_nh *nh = fi->fib_nh;
798
799                 /* Local address is added. */
800                 if (nhs != 1 || nh->nh_gw)
801                         goto err_inval;
802                 nh->nh_scope = RT_SCOPE_NOWHERE;
803                 nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
804                 err = -ENODEV;
805                 if (nh->nh_dev == NULL)
806                         goto failure;
807         } else {
808                 change_nexthops(fi) {
809                         if ((err = fib_check_nh(cfg, fi, nexthop_nh)) != 0)
810                                 goto failure;
811                 } endfor_nexthops(fi)
812         }
813
814         if (fi->fib_prefsrc) {
815                 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
816                     fi->fib_prefsrc != cfg->fc_dst)
817                         if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL)
818                                 goto err_inval;
819         }
820
821 link_it:
822         if ((ofi = fib_find_info(fi)) != NULL) {
823                 fi->fib_dead = 1;
824                 free_fib_info(fi);
825                 ofi->fib_treeref++;
826                 return ofi;
827         }
828
829         fi->fib_treeref++;
830         atomic_inc(&fi->fib_clntref);
831         spin_lock_bh(&fib_info_lock);
832         hlist_add_head(&fi->fib_hash,
833                        &fib_info_hash[fib_info_hashfn(fi)]);
834         if (fi->fib_prefsrc) {
835                 struct hlist_head *head;
836
837                 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
838                 hlist_add_head(&fi->fib_lhash, head);
839         }
840         change_nexthops(fi) {
841                 struct hlist_head *head;
842                 unsigned int hash;
843
844                 if (!nexthop_nh->nh_dev)
845                         continue;
846                 hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex);
847                 head = &fib_info_devhash[hash];
848                 hlist_add_head(&nexthop_nh->nh_hash, head);
849         } endfor_nexthops(fi)
850         spin_unlock_bh(&fib_info_lock);
851         return fi;
852
853 err_inval:
854         err = -EINVAL;
855
856 failure:
857         if (fi) {
858                 fi->fib_dead = 1;
859                 free_fib_info(fi);
860         }
861
862         return ERR_PTR(err);
863 }
864
865 /* Note! fib_semantic_match intentionally uses  RCU list functions. */
866 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
867                        struct fib_result *res, int prefixlen)
868 {
869         struct fib_alias *fa;
870         int nh_sel = 0;
871
872         list_for_each_entry_rcu(fa, head, fa_list) {
873                 int err;
874
875                 if (fa->fa_tos &&
876                     fa->fa_tos != flp->fl4_tos)
877                         continue;
878
879                 if (fa->fa_scope < flp->fl4_scope)
880                         continue;
881
882                 fa->fa_state |= FA_S_ACCESSED;
883
884                 err = fib_props[fa->fa_type].error;
885                 if (err == 0) {
886                         struct fib_info *fi = fa->fa_info;
887
888                         if (fi->fib_flags & RTNH_F_DEAD)
889                                 continue;
890
891                         switch (fa->fa_type) {
892                         case RTN_UNICAST:
893                         case RTN_LOCAL:
894                         case RTN_BROADCAST:
895                         case RTN_ANYCAST:
896                         case RTN_MULTICAST:
897                                 for_nexthops(fi) {
898                                         if (nh->nh_flags&RTNH_F_DEAD)
899                                                 continue;
900                                         if (!flp->oif || flp->oif == nh->nh_oif)
901                                                 break;
902                                 }
903 #ifdef CONFIG_IP_ROUTE_MULTIPATH
904                                 if (nhsel < fi->fib_nhs) {
905                                         nh_sel = nhsel;
906                                         goto out_fill_res;
907                                 }
908 #else
909                                 if (nhsel < 1) {
910                                         goto out_fill_res;
911                                 }
912 #endif
913                                 endfor_nexthops(fi);
914                                 continue;
915
916                         default:
917                                 printk(KERN_WARNING "fib_semantic_match bad type %#x\n",
918                                         fa->fa_type);
919                                 return -EINVAL;
920                         }
921                 }
922                 return err;
923         }
924         return 1;
925
926 out_fill_res:
927         res->prefixlen = prefixlen;
928         res->nh_sel = nh_sel;
929         res->type = fa->fa_type;
930         res->scope = fa->fa_scope;
931         res->fi = fa->fa_info;
932         atomic_inc(&res->fi->fib_clntref);
933         return 0;
934 }
935
936 /* Find appropriate source address to this destination */
937
938 __be32 __fib_res_prefsrc(struct fib_result *res)
939 {
940         return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
941 }
942
943 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
944                   u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
945                   struct fib_info *fi, unsigned int flags)
946 {
947         struct nlmsghdr *nlh;
948         struct rtmsg *rtm;
949
950         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
951         if (nlh == NULL)
952                 return -EMSGSIZE;
953
954         rtm = nlmsg_data(nlh);
955         rtm->rtm_family = AF_INET;
956         rtm->rtm_dst_len = dst_len;
957         rtm->rtm_src_len = 0;
958         rtm->rtm_tos = tos;
959         if (tb_id < 256)
960                 rtm->rtm_table = tb_id;
961         else
962                 rtm->rtm_table = RT_TABLE_COMPAT;
963         NLA_PUT_U32(skb, RTA_TABLE, tb_id);
964         rtm->rtm_type = type;
965         rtm->rtm_flags = fi->fib_flags;
966         rtm->rtm_scope = scope;
967         rtm->rtm_protocol = fi->fib_protocol;
968
969         if (rtm->rtm_dst_len)
970                 NLA_PUT_BE32(skb, RTA_DST, dst);
971
972         if (fi->fib_priority)
973                 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
974
975         if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
976                 goto nla_put_failure;
977
978         if (fi->fib_prefsrc)
979                 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
980
981         if (fi->fib_nhs == 1) {
982                 if (fi->fib_nh->nh_gw)
983                         NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
984
985                 if (fi->fib_nh->nh_oif)
986                         NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
987 #ifdef CONFIG_NET_CLS_ROUTE
988                 if (fi->fib_nh[0].nh_tclassid)
989                         NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
990 #endif
991         }
992 #ifdef CONFIG_IP_ROUTE_MULTIPATH
993         if (fi->fib_nhs > 1) {
994                 struct rtnexthop *rtnh;
995                 struct nlattr *mp;
996
997                 mp = nla_nest_start(skb, RTA_MULTIPATH);
998                 if (mp == NULL)
999                         goto nla_put_failure;
1000
1001                 for_nexthops(fi) {
1002                         rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1003                         if (rtnh == NULL)
1004                                 goto nla_put_failure;
1005
1006                         rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1007                         rtnh->rtnh_hops = nh->nh_weight - 1;
1008                         rtnh->rtnh_ifindex = nh->nh_oif;
1009
1010                         if (nh->nh_gw)
1011                                 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
1012 #ifdef CONFIG_NET_CLS_ROUTE
1013                         if (nh->nh_tclassid)
1014                                 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
1015 #endif
1016                         /* length of rtnetlink header + attributes */
1017                         rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
1018                 } endfor_nexthops(fi);
1019
1020                 nla_nest_end(skb, mp);
1021         }
1022 #endif
1023         return nlmsg_end(skb, nlh);
1024
1025 nla_put_failure:
1026         nlmsg_cancel(skb, nlh);
1027         return -EMSGSIZE;
1028 }
1029
1030 /*
1031    Update FIB if:
1032    - local address disappeared -> we must delete all the entries
1033      referring to it.
1034    - device went down -> we must shutdown all nexthops going via it.
1035  */
1036 int fib_sync_down_addr(struct net *net, __be32 local)
1037 {
1038         int ret = 0;
1039         unsigned int hash = fib_laddr_hashfn(local);
1040         struct hlist_head *head = &fib_info_laddrhash[hash];
1041         struct hlist_node *node;
1042         struct fib_info *fi;
1043
1044         if (fib_info_laddrhash == NULL || local == 0)
1045                 return 0;
1046
1047         hlist_for_each_entry(fi, node, head, fib_lhash) {
1048                 if (!net_eq(fi->fib_net, net))
1049                         continue;
1050                 if (fi->fib_prefsrc == local) {
1051                         fi->fib_flags |= RTNH_F_DEAD;
1052                         ret++;
1053                 }
1054         }
1055         return ret;
1056 }
1057
1058 int fib_sync_down_dev(struct net_device *dev, int force)
1059 {
1060         int ret = 0;
1061         int scope = RT_SCOPE_NOWHERE;
1062         struct fib_info *prev_fi = NULL;
1063         unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1064         struct hlist_head *head = &fib_info_devhash[hash];
1065         struct hlist_node *node;
1066         struct fib_nh *nh;
1067
1068         if (force)
1069                 scope = -1;
1070
1071         hlist_for_each_entry(nh, node, head, nh_hash) {
1072                 struct fib_info *fi = nh->nh_parent;
1073                 int dead;
1074
1075                 BUG_ON(!fi->fib_nhs);
1076                 if (nh->nh_dev != dev || fi == prev_fi)
1077                         continue;
1078                 prev_fi = fi;
1079                 dead = 0;
1080                 change_nexthops(fi) {
1081                         if (nexthop_nh->nh_flags&RTNH_F_DEAD)
1082                                 dead++;
1083                         else if (nexthop_nh->nh_dev == dev &&
1084                                  nexthop_nh->nh_scope != scope) {
1085                                 nexthop_nh->nh_flags |= RTNH_F_DEAD;
1086 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1087                                 spin_lock_bh(&fib_multipath_lock);
1088                                 fi->fib_power -= nexthop_nh->nh_power;
1089                                 nexthop_nh->nh_power = 0;
1090                                 spin_unlock_bh(&fib_multipath_lock);
1091 #endif
1092                                 dead++;
1093                         }
1094 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1095                         if (force > 1 && nexthop_nh->nh_dev == dev) {
1096                                 dead = fi->fib_nhs;
1097                                 break;
1098                         }
1099 #endif
1100                 } endfor_nexthops(fi)
1101                 if (dead == fi->fib_nhs) {
1102                         fi->fib_flags |= RTNH_F_DEAD;
1103                         ret++;
1104                 }
1105         }
1106
1107         return ret;
1108 }
1109
1110 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1111
1112 /*
1113    Dead device goes up. We wake up dead nexthops.
1114    It takes sense only on multipath routes.
1115  */
1116
1117 int fib_sync_up(struct net_device *dev)
1118 {
1119         struct fib_info *prev_fi;
1120         unsigned int hash;
1121         struct hlist_head *head;
1122         struct hlist_node *node;
1123         struct fib_nh *nh;
1124         int ret;
1125
1126         if (!(dev->flags&IFF_UP))
1127                 return 0;
1128
1129         prev_fi = NULL;
1130         hash = fib_devindex_hashfn(dev->ifindex);
1131         head = &fib_info_devhash[hash];
1132         ret = 0;
1133
1134         hlist_for_each_entry(nh, node, head, nh_hash) {
1135                 struct fib_info *fi = nh->nh_parent;
1136                 int alive;
1137
1138                 BUG_ON(!fi->fib_nhs);
1139                 if (nh->nh_dev != dev || fi == prev_fi)
1140                         continue;
1141
1142                 prev_fi = fi;
1143                 alive = 0;
1144                 change_nexthops(fi) {
1145                         if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) {
1146                                 alive++;
1147                                 continue;
1148                         }
1149                         if (nexthop_nh->nh_dev == NULL ||
1150                             !(nexthop_nh->nh_dev->flags&IFF_UP))
1151                                 continue;
1152                         if (nexthop_nh->nh_dev != dev ||
1153                             !__in_dev_get_rtnl(dev))
1154                                 continue;
1155                         alive++;
1156                         spin_lock_bh(&fib_multipath_lock);
1157                         nexthop_nh->nh_power = 0;
1158                         nexthop_nh->nh_flags &= ~RTNH_F_DEAD;
1159                         spin_unlock_bh(&fib_multipath_lock);
1160                 } endfor_nexthops(fi)
1161
1162                 if (alive > 0) {
1163                         fi->fib_flags &= ~RTNH_F_DEAD;
1164                         ret++;
1165                 }
1166         }
1167
1168         return ret;
1169 }
1170
1171 /*
1172    The algorithm is suboptimal, but it provides really
1173    fair weighted route distribution.
1174  */
1175
1176 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1177 {
1178         struct fib_info *fi = res->fi;
1179         int w;
1180
1181         spin_lock_bh(&fib_multipath_lock);
1182         if (fi->fib_power <= 0) {
1183                 int power = 0;
1184                 change_nexthops(fi) {
1185                         if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) {
1186                                 power += nexthop_nh->nh_weight;
1187                                 nexthop_nh->nh_power = nexthop_nh->nh_weight;
1188                         }
1189                 } endfor_nexthops(fi);
1190                 fi->fib_power = power;
1191                 if (power <= 0) {
1192                         spin_unlock_bh(&fib_multipath_lock);
1193                         /* Race condition: route has just become dead. */
1194                         res->nh_sel = 0;
1195                         return;
1196                 }
1197         }
1198
1199
1200         /* w should be random number [0..fi->fib_power-1],
1201            it is pretty bad approximation.
1202          */
1203
1204         w = jiffies % fi->fib_power;
1205
1206         change_nexthops(fi) {
1207                 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD) &&
1208                     nexthop_nh->nh_power) {
1209                         if ((w -= nexthop_nh->nh_power) <= 0) {
1210                                 nexthop_nh->nh_power--;
1211                                 fi->fib_power--;
1212                                 res->nh_sel = nhsel;
1213                                 spin_unlock_bh(&fib_multipath_lock);
1214                                 return;
1215                         }
1216                 }
1217         } endfor_nexthops(fi);
1218
1219         /* Race condition: route has just become dead. */
1220         res->nh_sel = 0;
1221         spin_unlock_bh(&fib_multipath_lock);
1222 }
1223 #endif