Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
[linux-drm-fsl-dcu.git] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/cache.h>
22 #include <asm/uaccess.h>
23
24 #include "xfrm_hash.h"
25
26 struct sock *xfrm_nl;
27 EXPORT_SYMBOL(xfrm_nl);
28
29 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
31
32 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
33 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
34
35 /* Each xfrm_state may be linked to two tables:
36
37    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
38    2. Hash table by (daddr,family,reqid) to find what SAs exist for given
39       destination/tunnel endpoint. (output)
40  */
41
42 static DEFINE_SPINLOCK(xfrm_state_lock);
43
44 /* Hash table to find appropriate SA towards given target (endpoint
45  * of tunnel or destination of transport mode) allowed by selector.
46  *
47  * Main use is finding SA after policy selected tunnel or transport mode.
48  * Also, it can be used by ah/esp icmp error handler to find offending SA.
49  */
50 static struct hlist_head *xfrm_state_bydst __read_mostly;
51 static struct hlist_head *xfrm_state_bysrc __read_mostly;
52 static struct hlist_head *xfrm_state_byspi __read_mostly;
53 static unsigned int xfrm_state_hmask __read_mostly;
54 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
55 static unsigned int xfrm_state_num;
56 static unsigned int xfrm_state_genid;
57
58 static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
59                                          xfrm_address_t *saddr,
60                                          u32 reqid,
61                                          unsigned short family)
62 {
63         return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
64 }
65
66 static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
67                                          xfrm_address_t *saddr,
68                                          unsigned short family)
69 {
70         return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
71 }
72
73 static inline unsigned int
74 xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
75 {
76         return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
77 }
78
79 static void xfrm_hash_transfer(struct hlist_head *list,
80                                struct hlist_head *ndsttable,
81                                struct hlist_head *nsrctable,
82                                struct hlist_head *nspitable,
83                                unsigned int nhashmask)
84 {
85         struct hlist_node *entry, *tmp;
86         struct xfrm_state *x;
87
88         hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
89                 unsigned int h;
90
91                 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
92                                     x->props.reqid, x->props.family,
93                                     nhashmask);
94                 hlist_add_head(&x->bydst, ndsttable+h);
95
96                 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
97                                     x->props.family,
98                                     nhashmask);
99                 hlist_add_head(&x->bysrc, nsrctable+h);
100
101                 if (x->id.spi) {
102                         h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
103                                             x->id.proto, x->props.family,
104                                             nhashmask);
105                         hlist_add_head(&x->byspi, nspitable+h);
106                 }
107         }
108 }
109
110 static unsigned long xfrm_hash_new_size(void)
111 {
112         return ((xfrm_state_hmask + 1) << 1) *
113                 sizeof(struct hlist_head);
114 }
115
116 static DEFINE_MUTEX(hash_resize_mutex);
117
118 static void xfrm_hash_resize(struct work_struct *__unused)
119 {
120         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
121         unsigned long nsize, osize;
122         unsigned int nhashmask, ohashmask;
123         int i;
124
125         mutex_lock(&hash_resize_mutex);
126
127         nsize = xfrm_hash_new_size();
128         ndst = xfrm_hash_alloc(nsize);
129         if (!ndst)
130                 goto out_unlock;
131         nsrc = xfrm_hash_alloc(nsize);
132         if (!nsrc) {
133                 xfrm_hash_free(ndst, nsize);
134                 goto out_unlock;
135         }
136         nspi = xfrm_hash_alloc(nsize);
137         if (!nspi) {
138                 xfrm_hash_free(ndst, nsize);
139                 xfrm_hash_free(nsrc, nsize);
140                 goto out_unlock;
141         }
142
143         spin_lock_bh(&xfrm_state_lock);
144
145         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
146         for (i = xfrm_state_hmask; i >= 0; i--)
147                 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
148                                    nhashmask);
149
150         odst = xfrm_state_bydst;
151         osrc = xfrm_state_bysrc;
152         ospi = xfrm_state_byspi;
153         ohashmask = xfrm_state_hmask;
154
155         xfrm_state_bydst = ndst;
156         xfrm_state_bysrc = nsrc;
157         xfrm_state_byspi = nspi;
158         xfrm_state_hmask = nhashmask;
159
160         spin_unlock_bh(&xfrm_state_lock);
161
162         osize = (ohashmask + 1) * sizeof(struct hlist_head);
163         xfrm_hash_free(odst, osize);
164         xfrm_hash_free(osrc, osize);
165         xfrm_hash_free(ospi, osize);
166
167 out_unlock:
168         mutex_unlock(&hash_resize_mutex);
169 }
170
171 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
172
173 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
174 EXPORT_SYMBOL(km_waitq);
175
176 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
177 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
178
179 static struct work_struct xfrm_state_gc_work;
180 static HLIST_HEAD(xfrm_state_gc_list);
181 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
182
183 int __xfrm_state_delete(struct xfrm_state *x);
184
185 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
186 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
187
188 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
189 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
190
191 static void xfrm_state_gc_destroy(struct xfrm_state *x)
192 {
193         del_timer_sync(&x->timer);
194         del_timer_sync(&x->rtimer);
195         kfree(x->aalg);
196         kfree(x->ealg);
197         kfree(x->calg);
198         kfree(x->encap);
199         kfree(x->coaddr);
200         if (x->mode)
201                 xfrm_put_mode(x->mode);
202         if (x->type) {
203                 x->type->destructor(x);
204                 xfrm_put_type(x->type);
205         }
206         security_xfrm_state_free(x);
207         kfree(x);
208 }
209
210 static void xfrm_state_gc_task(struct work_struct *data)
211 {
212         struct xfrm_state *x;
213         struct hlist_node *entry, *tmp;
214         struct hlist_head gc_list;
215
216         spin_lock_bh(&xfrm_state_gc_lock);
217         gc_list.first = xfrm_state_gc_list.first;
218         INIT_HLIST_HEAD(&xfrm_state_gc_list);
219         spin_unlock_bh(&xfrm_state_gc_lock);
220
221         hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
222                 xfrm_state_gc_destroy(x);
223
224         wake_up(&km_waitq);
225 }
226
227 static inline unsigned long make_jiffies(long secs)
228 {
229         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
230                 return MAX_SCHEDULE_TIMEOUT-1;
231         else
232                 return secs*HZ;
233 }
234
235 static void xfrm_timer_handler(unsigned long data)
236 {
237         struct xfrm_state *x = (struct xfrm_state*)data;
238         unsigned long now = (unsigned long)xtime.tv_sec;
239         long next = LONG_MAX;
240         int warn = 0;
241
242         spin_lock(&x->lock);
243         if (x->km.state == XFRM_STATE_DEAD)
244                 goto out;
245         if (x->km.state == XFRM_STATE_EXPIRED)
246                 goto expired;
247         if (x->lft.hard_add_expires_seconds) {
248                 long tmo = x->lft.hard_add_expires_seconds +
249                         x->curlft.add_time - now;
250                 if (tmo <= 0)
251                         goto expired;
252                 if (tmo < next)
253                         next = tmo;
254         }
255         if (x->lft.hard_use_expires_seconds) {
256                 long tmo = x->lft.hard_use_expires_seconds +
257                         (x->curlft.use_time ? : now) - now;
258                 if (tmo <= 0)
259                         goto expired;
260                 if (tmo < next)
261                         next = tmo;
262         }
263         if (x->km.dying)
264                 goto resched;
265         if (x->lft.soft_add_expires_seconds) {
266                 long tmo = x->lft.soft_add_expires_seconds +
267                         x->curlft.add_time - now;
268                 if (tmo <= 0)
269                         warn = 1;
270                 else if (tmo < next)
271                         next = tmo;
272         }
273         if (x->lft.soft_use_expires_seconds) {
274                 long tmo = x->lft.soft_use_expires_seconds +
275                         (x->curlft.use_time ? : now) - now;
276                 if (tmo <= 0)
277                         warn = 1;
278                 else if (tmo < next)
279                         next = tmo;
280         }
281
282         x->km.dying = warn;
283         if (warn)
284                 km_state_expired(x, 0, 0);
285 resched:
286         if (next != LONG_MAX)
287                 mod_timer(&x->timer, jiffies + make_jiffies(next));
288
289         goto out;
290
291 expired:
292         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
293                 x->km.state = XFRM_STATE_EXPIRED;
294                 wake_up(&km_waitq);
295                 next = 2;
296                 goto resched;
297         }
298         if (!__xfrm_state_delete(x) && x->id.spi)
299                 km_state_expired(x, 1, 0);
300
301 out:
302         spin_unlock(&x->lock);
303 }
304
305 static void xfrm_replay_timer_handler(unsigned long data);
306
307 struct xfrm_state *xfrm_state_alloc(void)
308 {
309         struct xfrm_state *x;
310
311         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
312
313         if (x) {
314                 atomic_set(&x->refcnt, 1);
315                 atomic_set(&x->tunnel_users, 0);
316                 INIT_HLIST_NODE(&x->bydst);
317                 INIT_HLIST_NODE(&x->bysrc);
318                 INIT_HLIST_NODE(&x->byspi);
319                 init_timer(&x->timer);
320                 x->timer.function = xfrm_timer_handler;
321                 x->timer.data     = (unsigned long)x;
322                 init_timer(&x->rtimer);
323                 x->rtimer.function = xfrm_replay_timer_handler;
324                 x->rtimer.data     = (unsigned long)x;
325                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
326                 x->lft.soft_byte_limit = XFRM_INF;
327                 x->lft.soft_packet_limit = XFRM_INF;
328                 x->lft.hard_byte_limit = XFRM_INF;
329                 x->lft.hard_packet_limit = XFRM_INF;
330                 x->replay_maxage = 0;
331                 x->replay_maxdiff = 0;
332                 spin_lock_init(&x->lock);
333         }
334         return x;
335 }
336 EXPORT_SYMBOL(xfrm_state_alloc);
337
338 void __xfrm_state_destroy(struct xfrm_state *x)
339 {
340         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
341
342         spin_lock_bh(&xfrm_state_gc_lock);
343         hlist_add_head(&x->bydst, &xfrm_state_gc_list);
344         spin_unlock_bh(&xfrm_state_gc_lock);
345         schedule_work(&xfrm_state_gc_work);
346 }
347 EXPORT_SYMBOL(__xfrm_state_destroy);
348
349 int __xfrm_state_delete(struct xfrm_state *x)
350 {
351         int err = -ESRCH;
352
353         if (x->km.state != XFRM_STATE_DEAD) {
354                 x->km.state = XFRM_STATE_DEAD;
355                 spin_lock(&xfrm_state_lock);
356                 hlist_del(&x->bydst);
357                 hlist_del(&x->bysrc);
358                 if (x->id.spi)
359                         hlist_del(&x->byspi);
360                 xfrm_state_num--;
361                 spin_unlock(&xfrm_state_lock);
362
363                 /* All xfrm_state objects are created by xfrm_state_alloc.
364                  * The xfrm_state_alloc call gives a reference, and that
365                  * is what we are dropping here.
366                  */
367                 __xfrm_state_put(x);
368                 err = 0;
369         }
370
371         return err;
372 }
373 EXPORT_SYMBOL(__xfrm_state_delete);
374
375 int xfrm_state_delete(struct xfrm_state *x)
376 {
377         int err;
378
379         spin_lock_bh(&x->lock);
380         err = __xfrm_state_delete(x);
381         spin_unlock_bh(&x->lock);
382
383         return err;
384 }
385 EXPORT_SYMBOL(xfrm_state_delete);
386
387 void xfrm_state_flush(u8 proto)
388 {
389         int i;
390
391         spin_lock_bh(&xfrm_state_lock);
392         for (i = 0; i <= xfrm_state_hmask; i++) {
393                 struct hlist_node *entry;
394                 struct xfrm_state *x;
395 restart:
396                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
397                         if (!xfrm_state_kern(x) &&
398                             xfrm_id_proto_match(x->id.proto, proto)) {
399                                 xfrm_state_hold(x);
400                                 spin_unlock_bh(&xfrm_state_lock);
401
402                                 xfrm_state_delete(x);
403                                 xfrm_state_put(x);
404
405                                 spin_lock_bh(&xfrm_state_lock);
406                                 goto restart;
407                         }
408                 }
409         }
410         spin_unlock_bh(&xfrm_state_lock);
411         wake_up(&km_waitq);
412 }
413 EXPORT_SYMBOL(xfrm_state_flush);
414
415 static int
416 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
417                   struct xfrm_tmpl *tmpl,
418                   xfrm_address_t *daddr, xfrm_address_t *saddr,
419                   unsigned short family)
420 {
421         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
422         if (!afinfo)
423                 return -1;
424         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
425         xfrm_state_put_afinfo(afinfo);
426         return 0;
427 }
428
429 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
430 {
431         unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
432         struct xfrm_state *x;
433         struct hlist_node *entry;
434
435         hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
436                 if (x->props.family != family ||
437                     x->id.spi       != spi ||
438                     x->id.proto     != proto)
439                         continue;
440
441                 switch (family) {
442                 case AF_INET:
443                         if (x->id.daddr.a4 != daddr->a4)
444                                 continue;
445                         break;
446                 case AF_INET6:
447                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
448                                              (struct in6_addr *)
449                                              x->id.daddr.a6))
450                                 continue;
451                         break;
452                 };
453
454                 xfrm_state_hold(x);
455                 return x;
456         }
457
458         return NULL;
459 }
460
461 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
462 {
463         unsigned int h = xfrm_src_hash(daddr, saddr, family);
464         struct xfrm_state *x;
465         struct hlist_node *entry;
466
467         hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
468                 if (x->props.family != family ||
469                     x->id.proto     != proto)
470                         continue;
471
472                 switch (family) {
473                 case AF_INET:
474                         if (x->id.daddr.a4 != daddr->a4 ||
475                             x->props.saddr.a4 != saddr->a4)
476                                 continue;
477                         break;
478                 case AF_INET6:
479                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
480                                              (struct in6_addr *)
481                                              x->id.daddr.a6) ||
482                             !ipv6_addr_equal((struct in6_addr *)saddr,
483                                              (struct in6_addr *)
484                                              x->props.saddr.a6))
485                                 continue;
486                         break;
487                 };
488
489                 xfrm_state_hold(x);
490                 return x;
491         }
492
493         return NULL;
494 }
495
496 static inline struct xfrm_state *
497 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
498 {
499         if (use_spi)
500                 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
501                                            x->id.proto, family);
502         else
503                 return __xfrm_state_lookup_byaddr(&x->id.daddr,
504                                                   &x->props.saddr,
505                                                   x->id.proto, family);
506 }
507
508 static void xfrm_hash_grow_check(int have_hash_collision)
509 {
510         if (have_hash_collision &&
511             (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
512             xfrm_state_num > xfrm_state_hmask)
513                 schedule_work(&xfrm_hash_work);
514 }
515
516 struct xfrm_state *
517 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
518                 struct flowi *fl, struct xfrm_tmpl *tmpl,
519                 struct xfrm_policy *pol, int *err,
520                 unsigned short family)
521 {
522         unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
523         struct hlist_node *entry;
524         struct xfrm_state *x, *x0;
525         int acquire_in_progress = 0;
526         int error = 0;
527         struct xfrm_state *best = NULL;
528         
529         spin_lock_bh(&xfrm_state_lock);
530         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
531                 if (x->props.family == family &&
532                     x->props.reqid == tmpl->reqid &&
533                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
534                     xfrm_state_addr_check(x, daddr, saddr, family) &&
535                     tmpl->mode == x->props.mode &&
536                     tmpl->id.proto == x->id.proto &&
537                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
538                         /* Resolution logic:
539                            1. There is a valid state with matching selector.
540                               Done.
541                            2. Valid state with inappropriate selector. Skip.
542
543                            Entering area of "sysdeps".
544
545                            3. If state is not valid, selector is temporary,
546                               it selects only session which triggered
547                               previous resolution. Key manager will do
548                               something to install a state with proper
549                               selector.
550                          */
551                         if (x->km.state == XFRM_STATE_VALID) {
552                                 if (!xfrm_selector_match(&x->sel, fl, family) ||
553                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
554                                         continue;
555                                 if (!best ||
556                                     best->km.dying > x->km.dying ||
557                                     (best->km.dying == x->km.dying &&
558                                      best->curlft.add_time < x->curlft.add_time))
559                                         best = x;
560                         } else if (x->km.state == XFRM_STATE_ACQ) {
561                                 acquire_in_progress = 1;
562                         } else if (x->km.state == XFRM_STATE_ERROR ||
563                                    x->km.state == XFRM_STATE_EXPIRED) {
564                                 if (xfrm_selector_match(&x->sel, fl, family) &&
565                                     security_xfrm_state_pol_flow_match(x, pol, fl))
566                                         error = -ESRCH;
567                         }
568                 }
569         }
570
571         x = best;
572         if (!x && !error && !acquire_in_progress) {
573                 if (tmpl->id.spi &&
574                     (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
575                                               tmpl->id.proto, family)) != NULL) {
576                         xfrm_state_put(x0);
577                         error = -EEXIST;
578                         goto out;
579                 }
580                 x = xfrm_state_alloc();
581                 if (x == NULL) {
582                         error = -ENOMEM;
583                         goto out;
584                 }
585                 /* Initialize temporary selector matching only
586                  * to current session. */
587                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
588
589                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
590                 if (error) {
591                         x->km.state = XFRM_STATE_DEAD;
592                         xfrm_state_put(x);
593                         x = NULL;
594                         goto out;
595                 }
596
597                 if (km_query(x, tmpl, pol) == 0) {
598                         x->km.state = XFRM_STATE_ACQ;
599                         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
600                         h = xfrm_src_hash(daddr, saddr, family);
601                         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
602                         if (x->id.spi) {
603                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
604                                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
605                         }
606                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
607                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
608                         add_timer(&x->timer);
609                         xfrm_state_num++;
610                         xfrm_hash_grow_check(x->bydst.next != NULL);
611                 } else {
612                         x->km.state = XFRM_STATE_DEAD;
613                         xfrm_state_put(x);
614                         x = NULL;
615                         error = -ESRCH;
616                 }
617         }
618 out:
619         if (x)
620                 xfrm_state_hold(x);
621         else
622                 *err = acquire_in_progress ? -EAGAIN : error;
623         spin_unlock_bh(&xfrm_state_lock);
624         return x;
625 }
626
627 static void __xfrm_state_insert(struct xfrm_state *x)
628 {
629         unsigned int h;
630
631         x->genid = ++xfrm_state_genid;
632
633         h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
634                           x->props.reqid, x->props.family);
635         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
636
637         h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
638         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
639
640         if (x->id.spi) {
641                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
642                                   x->props.family);
643
644                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
645         }
646
647         mod_timer(&x->timer, jiffies + HZ);
648         if (x->replay_maxage)
649                 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
650
651         wake_up(&km_waitq);
652
653         xfrm_state_num++;
654
655         xfrm_hash_grow_check(x->bydst.next != NULL);
656 }
657
658 /* xfrm_state_lock is held */
659 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
660 {
661         unsigned short family = xnew->props.family;
662         u32 reqid = xnew->props.reqid;
663         struct xfrm_state *x;
664         struct hlist_node *entry;
665         unsigned int h;
666
667         h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
668         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
669                 if (x->props.family     == family &&
670                     x->props.reqid      == reqid &&
671                     !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
672                     !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
673                         x->genid = xfrm_state_genid;
674         }
675 }
676
677 void xfrm_state_insert(struct xfrm_state *x)
678 {
679         spin_lock_bh(&xfrm_state_lock);
680         __xfrm_state_bump_genids(x);
681         __xfrm_state_insert(x);
682         spin_unlock_bh(&xfrm_state_lock);
683 }
684 EXPORT_SYMBOL(xfrm_state_insert);
685
686 /* xfrm_state_lock is held */
687 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
688 {
689         unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
690         struct hlist_node *entry;
691         struct xfrm_state *x;
692
693         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
694                 if (x->props.reqid  != reqid ||
695                     x->props.mode   != mode ||
696                     x->props.family != family ||
697                     x->km.state     != XFRM_STATE_ACQ ||
698                     x->id.spi       != 0)
699                         continue;
700
701                 switch (family) {
702                 case AF_INET:
703                         if (x->id.daddr.a4    != daddr->a4 ||
704                             x->props.saddr.a4 != saddr->a4)
705                                 continue;
706                         break;
707                 case AF_INET6:
708                         if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
709                                              (struct in6_addr *)daddr) ||
710                             !ipv6_addr_equal((struct in6_addr *)
711                                              x->props.saddr.a6,
712                                              (struct in6_addr *)saddr))
713                                 continue;
714                         break;
715                 };
716
717                 xfrm_state_hold(x);
718                 return x;
719         }
720
721         if (!create)
722                 return NULL;
723
724         x = xfrm_state_alloc();
725         if (likely(x)) {
726                 switch (family) {
727                 case AF_INET:
728                         x->sel.daddr.a4 = daddr->a4;
729                         x->sel.saddr.a4 = saddr->a4;
730                         x->sel.prefixlen_d = 32;
731                         x->sel.prefixlen_s = 32;
732                         x->props.saddr.a4 = saddr->a4;
733                         x->id.daddr.a4 = daddr->a4;
734                         break;
735
736                 case AF_INET6:
737                         ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
738                                        (struct in6_addr *)daddr);
739                         ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
740                                        (struct in6_addr *)saddr);
741                         x->sel.prefixlen_d = 128;
742                         x->sel.prefixlen_s = 128;
743                         ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
744                                        (struct in6_addr *)saddr);
745                         ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
746                                        (struct in6_addr *)daddr);
747                         break;
748                 };
749
750                 x->km.state = XFRM_STATE_ACQ;
751                 x->id.proto = proto;
752                 x->props.family = family;
753                 x->props.mode = mode;
754                 x->props.reqid = reqid;
755                 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
756                 xfrm_state_hold(x);
757                 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
758                 add_timer(&x->timer);
759                 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
760                 h = xfrm_src_hash(daddr, saddr, family);
761                 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
762                 wake_up(&km_waitq);
763
764                 xfrm_state_num++;
765
766                 xfrm_hash_grow_check(x->bydst.next != NULL);
767         }
768
769         return x;
770 }
771
772 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
773
774 int xfrm_state_add(struct xfrm_state *x)
775 {
776         struct xfrm_state *x1;
777         int family;
778         int err;
779         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
780
781         family = x->props.family;
782
783         spin_lock_bh(&xfrm_state_lock);
784
785         x1 = __xfrm_state_locate(x, use_spi, family);
786         if (x1) {
787                 xfrm_state_put(x1);
788                 x1 = NULL;
789                 err = -EEXIST;
790                 goto out;
791         }
792
793         if (use_spi && x->km.seq) {
794                 x1 = __xfrm_find_acq_byseq(x->km.seq);
795                 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
796                         xfrm_state_put(x1);
797                         x1 = NULL;
798                 }
799         }
800
801         if (use_spi && !x1)
802                 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
803                                      x->id.proto,
804                                      &x->id.daddr, &x->props.saddr, 0);
805
806         __xfrm_state_bump_genids(x);
807         __xfrm_state_insert(x);
808         err = 0;
809
810 out:
811         spin_unlock_bh(&xfrm_state_lock);
812
813         if (x1) {
814                 xfrm_state_delete(x1);
815                 xfrm_state_put(x1);
816         }
817
818         return err;
819 }
820 EXPORT_SYMBOL(xfrm_state_add);
821
822 int xfrm_state_update(struct xfrm_state *x)
823 {
824         struct xfrm_state *x1;
825         int err;
826         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
827
828         spin_lock_bh(&xfrm_state_lock);
829         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
830
831         err = -ESRCH;
832         if (!x1)
833                 goto out;
834
835         if (xfrm_state_kern(x1)) {
836                 xfrm_state_put(x1);
837                 err = -EEXIST;
838                 goto out;
839         }
840
841         if (x1->km.state == XFRM_STATE_ACQ) {
842                 __xfrm_state_insert(x);
843                 x = NULL;
844         }
845         err = 0;
846
847 out:
848         spin_unlock_bh(&xfrm_state_lock);
849
850         if (err)
851                 return err;
852
853         if (!x) {
854                 xfrm_state_delete(x1);
855                 xfrm_state_put(x1);
856                 return 0;
857         }
858
859         err = -EINVAL;
860         spin_lock_bh(&x1->lock);
861         if (likely(x1->km.state == XFRM_STATE_VALID)) {
862                 if (x->encap && x1->encap)
863                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
864                 if (x->coaddr && x1->coaddr) {
865                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
866                 }
867                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
868                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
869                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
870                 x1->km.dying = 0;
871
872                 mod_timer(&x1->timer, jiffies + HZ);
873                 if (x1->curlft.use_time)
874                         xfrm_state_check_expire(x1);
875
876                 err = 0;
877         }
878         spin_unlock_bh(&x1->lock);
879
880         xfrm_state_put(x1);
881
882         return err;
883 }
884 EXPORT_SYMBOL(xfrm_state_update);
885
886 int xfrm_state_check_expire(struct xfrm_state *x)
887 {
888         if (!x->curlft.use_time)
889                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
890
891         if (x->km.state != XFRM_STATE_VALID)
892                 return -EINVAL;
893
894         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
895             x->curlft.packets >= x->lft.hard_packet_limit) {
896                 x->km.state = XFRM_STATE_EXPIRED;
897                 mod_timer(&x->timer, jiffies);
898                 return -EINVAL;
899         }
900
901         if (!x->km.dying &&
902             (x->curlft.bytes >= x->lft.soft_byte_limit ||
903              x->curlft.packets >= x->lft.soft_packet_limit)) {
904                 x->km.dying = 1;
905                 km_state_expired(x, 0, 0);
906         }
907         return 0;
908 }
909 EXPORT_SYMBOL(xfrm_state_check_expire);
910
911 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
912 {
913         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
914                 - skb_headroom(skb);
915
916         if (nhead > 0)
917                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
918
919         /* Check tail too... */
920         return 0;
921 }
922
923 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
924 {
925         int err = xfrm_state_check_expire(x);
926         if (err < 0)
927                 goto err;
928         err = xfrm_state_check_space(x, skb);
929 err:
930         return err;
931 }
932 EXPORT_SYMBOL(xfrm_state_check);
933
934 struct xfrm_state *
935 xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto,
936                   unsigned short family)
937 {
938         struct xfrm_state *x;
939
940         spin_lock_bh(&xfrm_state_lock);
941         x = __xfrm_state_lookup(daddr, spi, proto, family);
942         spin_unlock_bh(&xfrm_state_lock);
943         return x;
944 }
945 EXPORT_SYMBOL(xfrm_state_lookup);
946
947 struct xfrm_state *
948 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
949                          u8 proto, unsigned short family)
950 {
951         struct xfrm_state *x;
952
953         spin_lock_bh(&xfrm_state_lock);
954         x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
955         spin_unlock_bh(&xfrm_state_lock);
956         return x;
957 }
958 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
959
960 struct xfrm_state *
961 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
962               xfrm_address_t *daddr, xfrm_address_t *saddr, 
963               int create, unsigned short family)
964 {
965         struct xfrm_state *x;
966
967         spin_lock_bh(&xfrm_state_lock);
968         x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
969         spin_unlock_bh(&xfrm_state_lock);
970
971         return x;
972 }
973 EXPORT_SYMBOL(xfrm_find_acq);
974
975 #ifdef CONFIG_XFRM_SUB_POLICY
976 int
977 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
978                unsigned short family)
979 {
980         int err = 0;
981         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
982         if (!afinfo)
983                 return -EAFNOSUPPORT;
984
985         spin_lock_bh(&xfrm_state_lock);
986         if (afinfo->tmpl_sort)
987                 err = afinfo->tmpl_sort(dst, src, n);
988         spin_unlock_bh(&xfrm_state_lock);
989         xfrm_state_put_afinfo(afinfo);
990         return err;
991 }
992 EXPORT_SYMBOL(xfrm_tmpl_sort);
993
994 int
995 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
996                 unsigned short family)
997 {
998         int err = 0;
999         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1000         if (!afinfo)
1001                 return -EAFNOSUPPORT;
1002
1003         spin_lock_bh(&xfrm_state_lock);
1004         if (afinfo->state_sort)
1005                 err = afinfo->state_sort(dst, src, n);
1006         spin_unlock_bh(&xfrm_state_lock);
1007         xfrm_state_put_afinfo(afinfo);
1008         return err;
1009 }
1010 EXPORT_SYMBOL(xfrm_state_sort);
1011 #endif
1012
1013 /* Silly enough, but I'm lazy to build resolution list */
1014
1015 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1016 {
1017         int i;
1018
1019         for (i = 0; i <= xfrm_state_hmask; i++) {
1020                 struct hlist_node *entry;
1021                 struct xfrm_state *x;
1022
1023                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1024                         if (x->km.seq == seq &&
1025                             x->km.state == XFRM_STATE_ACQ) {
1026                                 xfrm_state_hold(x);
1027                                 return x;
1028                         }
1029                 }
1030         }
1031         return NULL;
1032 }
1033
1034 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1035 {
1036         struct xfrm_state *x;
1037
1038         spin_lock_bh(&xfrm_state_lock);
1039         x = __xfrm_find_acq_byseq(seq);
1040         spin_unlock_bh(&xfrm_state_lock);
1041         return x;
1042 }
1043 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1044
1045 u32 xfrm_get_acqseq(void)
1046 {
1047         u32 res;
1048         static u32 acqseq;
1049         static DEFINE_SPINLOCK(acqseq_lock);
1050
1051         spin_lock_bh(&acqseq_lock);
1052         res = (++acqseq ? : ++acqseq);
1053         spin_unlock_bh(&acqseq_lock);
1054         return res;
1055 }
1056 EXPORT_SYMBOL(xfrm_get_acqseq);
1057
1058 void
1059 xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi)
1060 {
1061         unsigned int h;
1062         struct xfrm_state *x0;
1063
1064         if (x->id.spi)
1065                 return;
1066
1067         if (minspi == maxspi) {
1068                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1069                 if (x0) {
1070                         xfrm_state_put(x0);
1071                         return;
1072                 }
1073                 x->id.spi = minspi;
1074         } else {
1075                 u32 spi = 0;
1076                 u32 low = ntohl(minspi);
1077                 u32 high = ntohl(maxspi);
1078                 for (h=0; h<high-low+1; h++) {
1079                         spi = low + net_random()%(high-low+1);
1080                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1081                         if (x0 == NULL) {
1082                                 x->id.spi = htonl(spi);
1083                                 break;
1084                         }
1085                         xfrm_state_put(x0);
1086                 }
1087         }
1088         if (x->id.spi) {
1089                 spin_lock_bh(&xfrm_state_lock);
1090                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1091                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1092                 spin_unlock_bh(&xfrm_state_lock);
1093                 wake_up(&km_waitq);
1094         }
1095 }
1096 EXPORT_SYMBOL(xfrm_alloc_spi);
1097
1098 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1099                     void *data)
1100 {
1101         int i;
1102         struct xfrm_state *x;
1103         struct hlist_node *entry;
1104         int count = 0;
1105         int err = 0;
1106
1107         spin_lock_bh(&xfrm_state_lock);
1108         for (i = 0; i <= xfrm_state_hmask; i++) {
1109                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1110                         if (xfrm_id_proto_match(x->id.proto, proto))
1111                                 count++;
1112                 }
1113         }
1114         if (count == 0) {
1115                 err = -ENOENT;
1116                 goto out;
1117         }
1118
1119         for (i = 0; i <= xfrm_state_hmask; i++) {
1120                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1121                         if (!xfrm_id_proto_match(x->id.proto, proto))
1122                                 continue;
1123                         err = func(x, --count, data);
1124                         if (err)
1125                                 goto out;
1126                 }
1127         }
1128 out:
1129         spin_unlock_bh(&xfrm_state_lock);
1130         return err;
1131 }
1132 EXPORT_SYMBOL(xfrm_state_walk);
1133
1134
1135 void xfrm_replay_notify(struct xfrm_state *x, int event)
1136 {
1137         struct km_event c;
1138         /* we send notify messages in case
1139          *  1. we updated on of the sequence numbers, and the seqno difference
1140          *     is at least x->replay_maxdiff, in this case we also update the
1141          *     timeout of our timer function
1142          *  2. if x->replay_maxage has elapsed since last update,
1143          *     and there were changes
1144          *
1145          *  The state structure must be locked!
1146          */
1147
1148         switch (event) {
1149         case XFRM_REPLAY_UPDATE:
1150                 if (x->replay_maxdiff &&
1151                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1152                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1153                         if (x->xflags & XFRM_TIME_DEFER)
1154                                 event = XFRM_REPLAY_TIMEOUT;
1155                         else
1156                                 return;
1157                 }
1158
1159                 break;
1160
1161         case XFRM_REPLAY_TIMEOUT:
1162                 if ((x->replay.seq == x->preplay.seq) &&
1163                     (x->replay.bitmap == x->preplay.bitmap) &&
1164                     (x->replay.oseq == x->preplay.oseq)) {
1165                         x->xflags |= XFRM_TIME_DEFER;
1166                         return;
1167                 }
1168
1169                 break;
1170         }
1171
1172         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1173         c.event = XFRM_MSG_NEWAE;
1174         c.data.aevent = event;
1175         km_state_notify(x, &c);
1176
1177         if (x->replay_maxage &&
1178             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1179                 x->xflags &= ~XFRM_TIME_DEFER;
1180 }
1181 EXPORT_SYMBOL(xfrm_replay_notify);
1182
1183 static void xfrm_replay_timer_handler(unsigned long data)
1184 {
1185         struct xfrm_state *x = (struct xfrm_state*)data;
1186
1187         spin_lock(&x->lock);
1188
1189         if (x->km.state == XFRM_STATE_VALID) {
1190                 if (xfrm_aevent_is_on())
1191                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1192                 else
1193                         x->xflags |= XFRM_TIME_DEFER;
1194         }
1195
1196         spin_unlock(&x->lock);
1197 }
1198
1199 int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
1200 {
1201         u32 diff;
1202         u32 seq = ntohl(net_seq);
1203
1204         if (unlikely(seq == 0))
1205                 return -EINVAL;
1206
1207         if (likely(seq > x->replay.seq))
1208                 return 0;
1209
1210         diff = x->replay.seq - seq;
1211         if (diff >= x->props.replay_window) {
1212                 x->stats.replay_window++;
1213                 return -EINVAL;
1214         }
1215
1216         if (x->replay.bitmap & (1U << diff)) {
1217                 x->stats.replay++;
1218                 return -EINVAL;
1219         }
1220         return 0;
1221 }
1222 EXPORT_SYMBOL(xfrm_replay_check);
1223
1224 void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1225 {
1226         u32 diff;
1227         u32 seq = ntohl(net_seq);
1228
1229         if (seq > x->replay.seq) {
1230                 diff = seq - x->replay.seq;
1231                 if (diff < x->props.replay_window)
1232                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1233                 else
1234                         x->replay.bitmap = 1;
1235                 x->replay.seq = seq;
1236         } else {
1237                 diff = x->replay.seq - seq;
1238                 x->replay.bitmap |= (1U << diff);
1239         }
1240
1241         if (xfrm_aevent_is_on())
1242                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1243 }
1244 EXPORT_SYMBOL(xfrm_replay_advance);
1245
1246 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1247 static DEFINE_RWLOCK(xfrm_km_lock);
1248
1249 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1250 {
1251         struct xfrm_mgr *km;
1252
1253         read_lock(&xfrm_km_lock);
1254         list_for_each_entry(km, &xfrm_km_list, list)
1255                 if (km->notify_policy)
1256                         km->notify_policy(xp, dir, c);
1257         read_unlock(&xfrm_km_lock);
1258 }
1259
1260 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1261 {
1262         struct xfrm_mgr *km;
1263         read_lock(&xfrm_km_lock);
1264         list_for_each_entry(km, &xfrm_km_list, list)
1265                 if (km->notify)
1266                         km->notify(x, c);
1267         read_unlock(&xfrm_km_lock);
1268 }
1269
1270 EXPORT_SYMBOL(km_policy_notify);
1271 EXPORT_SYMBOL(km_state_notify);
1272
1273 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1274 {
1275         struct km_event c;
1276
1277         c.data.hard = hard;
1278         c.pid = pid;
1279         c.event = XFRM_MSG_EXPIRE;
1280         km_state_notify(x, &c);
1281
1282         if (hard)
1283                 wake_up(&km_waitq);
1284 }
1285
1286 EXPORT_SYMBOL(km_state_expired);
1287 /*
1288  * We send to all registered managers regardless of failure
1289  * We are happy with one success
1290 */
1291 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1292 {
1293         int err = -EINVAL, acqret;
1294         struct xfrm_mgr *km;
1295
1296         read_lock(&xfrm_km_lock);
1297         list_for_each_entry(km, &xfrm_km_list, list) {
1298                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1299                 if (!acqret)
1300                         err = acqret;
1301         }
1302         read_unlock(&xfrm_km_lock);
1303         return err;
1304 }
1305 EXPORT_SYMBOL(km_query);
1306
1307 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1308 {
1309         int err = -EINVAL;
1310         struct xfrm_mgr *km;
1311
1312         read_lock(&xfrm_km_lock);
1313         list_for_each_entry(km, &xfrm_km_list, list) {
1314                 if (km->new_mapping)
1315                         err = km->new_mapping(x, ipaddr, sport);
1316                 if (!err)
1317                         break;
1318         }
1319         read_unlock(&xfrm_km_lock);
1320         return err;
1321 }
1322 EXPORT_SYMBOL(km_new_mapping);
1323
1324 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1325 {
1326         struct km_event c;
1327
1328         c.data.hard = hard;
1329         c.pid = pid;
1330         c.event = XFRM_MSG_POLEXPIRE;
1331         km_policy_notify(pol, dir, &c);
1332
1333         if (hard)
1334                 wake_up(&km_waitq);
1335 }
1336 EXPORT_SYMBOL(km_policy_expired);
1337
1338 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1339 {
1340         int err = -EINVAL;
1341         int ret;
1342         struct xfrm_mgr *km;
1343
1344         read_lock(&xfrm_km_lock);
1345         list_for_each_entry(km, &xfrm_km_list, list) {
1346                 if (km->report) {
1347                         ret = km->report(proto, sel, addr);
1348                         if (!ret)
1349                                 err = ret;
1350                 }
1351         }
1352         read_unlock(&xfrm_km_lock);
1353         return err;
1354 }
1355 EXPORT_SYMBOL(km_report);
1356
1357 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1358 {
1359         int err;
1360         u8 *data;
1361         struct xfrm_mgr *km;
1362         struct xfrm_policy *pol = NULL;
1363
1364         if (optlen <= 0 || optlen > PAGE_SIZE)
1365                 return -EMSGSIZE;
1366
1367         data = kmalloc(optlen, GFP_KERNEL);
1368         if (!data)
1369                 return -ENOMEM;
1370
1371         err = -EFAULT;
1372         if (copy_from_user(data, optval, optlen))
1373                 goto out;
1374
1375         err = -EINVAL;
1376         read_lock(&xfrm_km_lock);
1377         list_for_each_entry(km, &xfrm_km_list, list) {
1378                 pol = km->compile_policy(sk, optname, data,
1379                                          optlen, &err);
1380                 if (err >= 0)
1381                         break;
1382         }
1383         read_unlock(&xfrm_km_lock);
1384
1385         if (err >= 0) {
1386                 xfrm_sk_policy_insert(sk, err, pol);
1387                 xfrm_pol_put(pol);
1388                 err = 0;
1389         }
1390
1391 out:
1392         kfree(data);
1393         return err;
1394 }
1395 EXPORT_SYMBOL(xfrm_user_policy);
1396
1397 int xfrm_register_km(struct xfrm_mgr *km)
1398 {
1399         write_lock_bh(&xfrm_km_lock);
1400         list_add_tail(&km->list, &xfrm_km_list);
1401         write_unlock_bh(&xfrm_km_lock);
1402         return 0;
1403 }
1404 EXPORT_SYMBOL(xfrm_register_km);
1405
1406 int xfrm_unregister_km(struct xfrm_mgr *km)
1407 {
1408         write_lock_bh(&xfrm_km_lock);
1409         list_del(&km->list);
1410         write_unlock_bh(&xfrm_km_lock);
1411         return 0;
1412 }
1413 EXPORT_SYMBOL(xfrm_unregister_km);
1414
1415 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1416 {
1417         int err = 0;
1418         if (unlikely(afinfo == NULL))
1419                 return -EINVAL;
1420         if (unlikely(afinfo->family >= NPROTO))
1421                 return -EAFNOSUPPORT;
1422         write_lock_bh(&xfrm_state_afinfo_lock);
1423         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1424                 err = -ENOBUFS;
1425         else
1426                 xfrm_state_afinfo[afinfo->family] = afinfo;
1427         write_unlock_bh(&xfrm_state_afinfo_lock);
1428         return err;
1429 }
1430 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1431
1432 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1433 {
1434         int err = 0;
1435         if (unlikely(afinfo == NULL))
1436                 return -EINVAL;
1437         if (unlikely(afinfo->family >= NPROTO))
1438                 return -EAFNOSUPPORT;
1439         write_lock_bh(&xfrm_state_afinfo_lock);
1440         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1441                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1442                         err = -EINVAL;
1443                 else
1444                         xfrm_state_afinfo[afinfo->family] = NULL;
1445         }
1446         write_unlock_bh(&xfrm_state_afinfo_lock);
1447         return err;
1448 }
1449 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1450
1451 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1452 {
1453         struct xfrm_state_afinfo *afinfo;
1454         if (unlikely(family >= NPROTO))
1455                 return NULL;
1456         read_lock(&xfrm_state_afinfo_lock);
1457         afinfo = xfrm_state_afinfo[family];
1458         if (unlikely(!afinfo))
1459                 read_unlock(&xfrm_state_afinfo_lock);
1460         return afinfo;
1461 }
1462
1463 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1464 {
1465         read_unlock(&xfrm_state_afinfo_lock);
1466 }
1467
1468 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1469 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1470 {
1471         if (x->tunnel) {
1472                 struct xfrm_state *t = x->tunnel;
1473
1474                 if (atomic_read(&t->tunnel_users) == 2)
1475                         xfrm_state_delete(t);
1476                 atomic_dec(&t->tunnel_users);
1477                 xfrm_state_put(t);
1478                 x->tunnel = NULL;
1479         }
1480 }
1481 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1482
1483 /*
1484  * This function is NOT optimal.  For example, with ESP it will give an
1485  * MTU that's usually two bytes short of being optimal.  However, it will
1486  * usually give an answer that's a multiple of 4 provided the input is
1487  * also a multiple of 4.
1488  */
1489 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1490 {
1491         int res = mtu;
1492
1493         res -= x->props.header_len;
1494
1495         for (;;) {
1496                 int m = res;
1497
1498                 if (m < 68)
1499                         return 68;
1500
1501                 spin_lock_bh(&x->lock);
1502                 if (x->km.state == XFRM_STATE_VALID &&
1503                     x->type && x->type->get_max_size)
1504                         m = x->type->get_max_size(x, m);
1505                 else
1506                         m += x->props.header_len;
1507                 spin_unlock_bh(&x->lock);
1508
1509                 if (m <= mtu)
1510                         break;
1511                 res -= (m - mtu);
1512         }
1513
1514         return res;
1515 }
1516
1517 int xfrm_init_state(struct xfrm_state *x)
1518 {
1519         struct xfrm_state_afinfo *afinfo;
1520         int family = x->props.family;
1521         int err;
1522
1523         err = -EAFNOSUPPORT;
1524         afinfo = xfrm_state_get_afinfo(family);
1525         if (!afinfo)
1526                 goto error;
1527
1528         err = 0;
1529         if (afinfo->init_flags)
1530                 err = afinfo->init_flags(x);
1531
1532         xfrm_state_put_afinfo(afinfo);
1533
1534         if (err)
1535                 goto error;
1536
1537         err = -EPROTONOSUPPORT;
1538         x->type = xfrm_get_type(x->id.proto, family);
1539         if (x->type == NULL)
1540                 goto error;
1541
1542         err = x->type->init_state(x);
1543         if (err)
1544                 goto error;
1545
1546         x->mode = xfrm_get_mode(x->props.mode, family);
1547         if (x->mode == NULL)
1548                 goto error;
1549
1550         x->km.state = XFRM_STATE_VALID;
1551
1552 error:
1553         return err;
1554 }
1555
1556 EXPORT_SYMBOL(xfrm_init_state);
1557  
1558 void __init xfrm_state_init(void)
1559 {
1560         unsigned int sz;
1561
1562         sz = sizeof(struct hlist_head) * 8;
1563
1564         xfrm_state_bydst = xfrm_hash_alloc(sz);
1565         xfrm_state_bysrc = xfrm_hash_alloc(sz);
1566         xfrm_state_byspi = xfrm_hash_alloc(sz);
1567         if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1568                 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1569         xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1570
1571         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
1572 }
1573