Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[linux-drm-fsl-dcu.git] / net / netfilter / nf_conntrack_proto_tcp.c
1 /* (C) 1999-2001 Paul `Rusty' Russell
2  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  */
8
9 #include <linux/types.h>
10 #include <linux/timer.h>
11 #include <linux/netfilter.h>
12 #include <linux/module.h>
13 #include <linux/in.h>
14 #include <linux/tcp.h>
15 #include <linux/spinlock.h>
16 #include <linux/skbuff.h>
17 #include <linux/ipv6.h>
18 #include <net/ip6_checksum.h>
19
20 #include <net/tcp.h>
21
22 #include <linux/netfilter.h>
23 #include <linux/netfilter_ipv4.h>
24 #include <linux/netfilter_ipv6.h>
25 #include <net/netfilter/nf_conntrack.h>
26 #include <net/netfilter/nf_conntrack_l4proto.h>
27 #include <net/netfilter/nf_conntrack_ecache.h>
28
29 #if 0
30 #define DEBUGP printk
31 #define DEBUGP_VARS
32 #else
33 #define DEBUGP(format, args...)
34 #endif
35
36 /* Protects conntrack->proto.tcp */
37 static DEFINE_RWLOCK(tcp_lock);
38
39 /* "Be conservative in what you do,
40     be liberal in what you accept from others."
41     If it's non-zero, we mark only out of window RST segments as INVALID. */
42 static int nf_ct_tcp_be_liberal __read_mostly = 0;
43
44 /* If it is set to zero, we disable picking up already established
45    connections. */
46 static int nf_ct_tcp_loose __read_mostly = 1;
47
48 /* Max number of the retransmitted packets without receiving an (acceptable)
49    ACK from the destination. If this number is reached, a shorter timer
50    will be started. */
51 static int nf_ct_tcp_max_retrans __read_mostly = 3;
52
53   /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
54      closely.  They're more complex. --RR */
55
56 static const char *tcp_conntrack_names[] = {
57         "NONE",
58         "SYN_SENT",
59         "SYN_RECV",
60         "ESTABLISHED",
61         "FIN_WAIT",
62         "CLOSE_WAIT",
63         "LAST_ACK",
64         "TIME_WAIT",
65         "CLOSE",
66         "LISTEN"
67 };
68
69 #define SECS * HZ
70 #define MINS * 60 SECS
71 #define HOURS * 60 MINS
72 #define DAYS * 24 HOURS
73
74 static unsigned int nf_ct_tcp_timeout_syn_sent __read_mostly =      2 MINS;
75 static unsigned int nf_ct_tcp_timeout_syn_recv __read_mostly =     60 SECS;
76 static unsigned int nf_ct_tcp_timeout_established __read_mostly =   5 DAYS;
77 static unsigned int nf_ct_tcp_timeout_fin_wait __read_mostly =      2 MINS;
78 static unsigned int nf_ct_tcp_timeout_close_wait __read_mostly =   60 SECS;
79 static unsigned int nf_ct_tcp_timeout_last_ack __read_mostly =     30 SECS;
80 static unsigned int nf_ct_tcp_timeout_time_wait __read_mostly =     2 MINS;
81 static unsigned int nf_ct_tcp_timeout_close __read_mostly =        10 SECS;
82
83 /* RFC1122 says the R2 limit should be at least 100 seconds.
84    Linux uses 15 packets as limit, which corresponds
85    to ~13-30min depending on RTO. */
86 static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly =   5 MINS;
87
88 static unsigned int * tcp_timeouts[] = {
89     NULL,                              /* TCP_CONNTRACK_NONE */
90     &nf_ct_tcp_timeout_syn_sent,       /* TCP_CONNTRACK_SYN_SENT, */
91     &nf_ct_tcp_timeout_syn_recv,       /* TCP_CONNTRACK_SYN_RECV, */
92     &nf_ct_tcp_timeout_established,    /* TCP_CONNTRACK_ESTABLISHED, */
93     &nf_ct_tcp_timeout_fin_wait,       /* TCP_CONNTRACK_FIN_WAIT, */
94     &nf_ct_tcp_timeout_close_wait,     /* TCP_CONNTRACK_CLOSE_WAIT, */
95     &nf_ct_tcp_timeout_last_ack,       /* TCP_CONNTRACK_LAST_ACK, */
96     &nf_ct_tcp_timeout_time_wait,      /* TCP_CONNTRACK_TIME_WAIT, */
97     &nf_ct_tcp_timeout_close,          /* TCP_CONNTRACK_CLOSE, */
98     NULL,                              /* TCP_CONNTRACK_LISTEN */
99  };
100
101 #define sNO TCP_CONNTRACK_NONE
102 #define sSS TCP_CONNTRACK_SYN_SENT
103 #define sSR TCP_CONNTRACK_SYN_RECV
104 #define sES TCP_CONNTRACK_ESTABLISHED
105 #define sFW TCP_CONNTRACK_FIN_WAIT
106 #define sCW TCP_CONNTRACK_CLOSE_WAIT
107 #define sLA TCP_CONNTRACK_LAST_ACK
108 #define sTW TCP_CONNTRACK_TIME_WAIT
109 #define sCL TCP_CONNTRACK_CLOSE
110 #define sLI TCP_CONNTRACK_LISTEN
111 #define sIV TCP_CONNTRACK_MAX
112 #define sIG TCP_CONNTRACK_IGNORE
113
114 /* What TCP flags are set from RST/SYN/FIN/ACK. */
115 enum tcp_bit_set {
116         TCP_SYN_SET,
117         TCP_SYNACK_SET,
118         TCP_FIN_SET,
119         TCP_ACK_SET,
120         TCP_RST_SET,
121         TCP_NONE_SET,
122 };
123
124 /*
125  * The TCP state transition table needs a few words...
126  *
127  * We are the man in the middle. All the packets go through us
128  * but might get lost in transit to the destination.
129  * It is assumed that the destinations can't receive segments
130  * we haven't seen.
131  *
132  * The checked segment is in window, but our windows are *not*
133  * equivalent with the ones of the sender/receiver. We always
134  * try to guess the state of the current sender.
135  *
136  * The meaning of the states are:
137  *
138  * NONE:        initial state
139  * SYN_SENT:    SYN-only packet seen
140  * SYN_RECV:    SYN-ACK packet seen
141  * ESTABLISHED: ACK packet seen
142  * FIN_WAIT:    FIN packet seen
143  * CLOSE_WAIT:  ACK seen (after FIN)
144  * LAST_ACK:    FIN seen (after FIN)
145  * TIME_WAIT:   last ACK seen
146  * CLOSE:       closed connection
147  *
148  * LISTEN state is not used.
149  *
150  * Packets marked as IGNORED (sIG):
151  *      if they may be either invalid or valid
152  *      and the receiver may send back a connection
153  *      closing RST or a SYN/ACK.
154  *
155  * Packets marked as INVALID (sIV):
156  *      if they are invalid
157  *      or we do not support the request (simultaneous open)
158  */
159 static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
160         {
161 /* ORIGINAL */
162 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
163 /*syn*/    { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
164 /*
165  *      sNO -> sSS      Initialize a new connection
166  *      sSS -> sSS      Retransmitted SYN
167  *      sSR -> sIG      Late retransmitted SYN?
168  *      sES -> sIG      Error: SYNs in window outside the SYN_SENT state
169  *                      are errors. Receiver will reply with RST
170  *                      and close the connection.
171  *                      Or we are not in sync and hold a dead connection.
172  *      sFW -> sIG
173  *      sCW -> sIG
174  *      sLA -> sIG
175  *      sTW -> sSS      Reopened connection (RFC 1122).
176  *      sCL -> sSS
177  */
178 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
179 /*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
180 /*
181  * A SYN/ACK from the client is always invalid:
182  *      - either it tries to set up a simultaneous open, which is
183  *        not supported;
184  *      - or the firewall has just been inserted between the two hosts
185  *        during the session set-up. The SYN will be retransmitted
186  *        by the true client (or it'll time out).
187  */
188 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
189 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
190 /*
191  *      sNO -> sIV      Too late and no reason to do anything...
192  *      sSS -> sIV      Client migth not send FIN in this state:
193  *                      we enforce waiting for a SYN/ACK reply first.
194  *      sSR -> sFW      Close started.
195  *      sES -> sFW
196  *      sFW -> sLA      FIN seen in both directions, waiting for
197  *                      the last ACK.
198  *                      Migth be a retransmitted FIN as well...
199  *      sCW -> sLA
200  *      sLA -> sLA      Retransmitted FIN. Remain in the same state.
201  *      sTW -> sTW
202  *      sCL -> sCL
203  */
204 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
205 /*ack*/    { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
206 /*
207  *      sNO -> sES      Assumed.
208  *      sSS -> sIV      ACK is invalid: we haven't seen a SYN/ACK yet.
209  *      sSR -> sES      Established state is reached.
210  *      sES -> sES      :-)
211  *      sFW -> sCW      Normal close request answered by ACK.
212  *      sCW -> sCW
213  *      sLA -> sTW      Last ACK detected.
214  *      sTW -> sTW      Retransmitted last ACK. Remain in the same state.
215  *      sCL -> sCL
216  */
217 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
218 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
219 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
220         },
221         {
222 /* REPLY */
223 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
224 /*syn*/    { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
225 /*
226  *      sNO -> sIV      Never reached.
227  *      sSS -> sIV      Simultaneous open, not supported
228  *      sSR -> sIV      Simultaneous open, not supported.
229  *      sES -> sIV      Server may not initiate a connection.
230  *      sFW -> sIV
231  *      sCW -> sIV
232  *      sLA -> sIV
233  *      sTW -> sIV      Reopened connection, but server may not do it.
234  *      sCL -> sIV
235  */
236 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
237 /*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
238 /*
239  *      sSS -> sSR      Standard open.
240  *      sSR -> sSR      Retransmitted SYN/ACK.
241  *      sES -> sIG      Late retransmitted SYN/ACK?
242  *      sFW -> sIG      Might be SYN/ACK answering ignored SYN
243  *      sCW -> sIG
244  *      sLA -> sIG
245  *      sTW -> sIG
246  *      sCL -> sIG
247  */
248 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
249 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
250 /*
251  *      sSS -> sIV      Server might not send FIN in this state.
252  *      sSR -> sFW      Close started.
253  *      sES -> sFW
254  *      sFW -> sLA      FIN seen in both directions.
255  *      sCW -> sLA
256  *      sLA -> sLA      Retransmitted FIN.
257  *      sTW -> sTW
258  *      sCL -> sCL
259  */
260 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
261 /*ack*/    { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
262 /*
263  *      sSS -> sIG      Might be a half-open connection.
264  *      sSR -> sSR      Might answer late resent SYN.
265  *      sES -> sES      :-)
266  *      sFW -> sCW      Normal close request answered by ACK.
267  *      sCW -> sCW
268  *      sLA -> sTW      Last ACK detected.
269  *      sTW -> sTW      Retransmitted last ACK.
270  *      sCL -> sCL
271  */
272 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI   */
273 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
274 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
275         }
276 };
277
278 static int tcp_pkt_to_tuple(const struct sk_buff *skb,
279                             unsigned int dataoff,
280                             struct nf_conntrack_tuple *tuple)
281 {
282         struct tcphdr _hdr, *hp;
283
284         /* Actually only need first 8 bytes. */
285         hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
286         if (hp == NULL)
287                 return 0;
288
289         tuple->src.u.tcp.port = hp->source;
290         tuple->dst.u.tcp.port = hp->dest;
291
292         return 1;
293 }
294
295 static int tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
296                             const struct nf_conntrack_tuple *orig)
297 {
298         tuple->src.u.tcp.port = orig->dst.u.tcp.port;
299         tuple->dst.u.tcp.port = orig->src.u.tcp.port;
300         return 1;
301 }
302
303 /* Print out the per-protocol part of the tuple. */
304 static int tcp_print_tuple(struct seq_file *s,
305                            const struct nf_conntrack_tuple *tuple)
306 {
307         return seq_printf(s, "sport=%hu dport=%hu ",
308                           ntohs(tuple->src.u.tcp.port),
309                           ntohs(tuple->dst.u.tcp.port));
310 }
311
312 /* Print out the private part of the conntrack. */
313 static int tcp_print_conntrack(struct seq_file *s,
314                                const struct nf_conn *conntrack)
315 {
316         enum tcp_conntrack state;
317
318         read_lock_bh(&tcp_lock);
319         state = conntrack->proto.tcp.state;
320         read_unlock_bh(&tcp_lock);
321
322         return seq_printf(s, "%s ", tcp_conntrack_names[state]);
323 }
324
325 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
326 {
327         if (tcph->rst) return TCP_RST_SET;
328         else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
329         else if (tcph->fin) return TCP_FIN_SET;
330         else if (tcph->ack) return TCP_ACK_SET;
331         else return TCP_NONE_SET;
332 }
333
334 /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
335    in IP Filter' by Guido van Rooij.
336
337    http://www.nluug.nl/events/sane2000/papers.html
338    http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
339
340    The boundaries and the conditions are changed according to RFC793:
341    the packet must intersect the window (i.e. segments may be
342    after the right or before the left edge) and thus receivers may ACK
343    segments after the right edge of the window.
344
345         td_maxend = max(sack + max(win,1)) seen in reply packets
346         td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
347         td_maxwin += seq + len - sender.td_maxend
348                         if seq + len > sender.td_maxend
349         td_end    = max(seq + len) seen in sent packets
350
351    I.   Upper bound for valid data:     seq <= sender.td_maxend
352    II.  Lower bound for valid data:     seq + len >= sender.td_end - receiver.td_maxwin
353    III. Upper bound for valid ack:      sack <= receiver.td_end
354    IV.  Lower bound for valid ack:      ack >= receiver.td_end - MAXACKWINDOW
355
356    where sack is the highest right edge of sack block found in the packet.
357
358    The upper bound limit for a valid ack is not ignored -
359    we doesn't have to deal with fragments.
360 */
361
362 static inline __u32 segment_seq_plus_len(__u32 seq,
363                                          size_t len,
364                                          unsigned int dataoff,
365                                          struct tcphdr *tcph)
366 {
367         /* XXX Should I use payload length field in IP/IPv6 header ?
368          * - YK */
369         return (seq + len - dataoff - tcph->doff*4
370                 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
371 }
372
373 /* Fixme: what about big packets? */
374 #define MAXACKWINCONST                  66000
375 #define MAXACKWINDOW(sender)                                            \
376         ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin     \
377                                               : MAXACKWINCONST)
378
379 /*
380  * Simplified tcp_parse_options routine from tcp_input.c
381  */
382 static void tcp_options(const struct sk_buff *skb,
383                         unsigned int dataoff,
384                         struct tcphdr *tcph,
385                         struct ip_ct_tcp_state *state)
386 {
387         unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
388         unsigned char *ptr;
389         int length = (tcph->doff*4) - sizeof(struct tcphdr);
390
391         if (!length)
392                 return;
393
394         ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
395                                  length, buff);
396         BUG_ON(ptr == NULL);
397
398         state->td_scale =
399         state->flags = 0;
400
401         while (length > 0) {
402                 int opcode=*ptr++;
403                 int opsize;
404
405                 switch (opcode) {
406                 case TCPOPT_EOL:
407                         return;
408                 case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
409                         length--;
410                         continue;
411                 default:
412                         opsize=*ptr++;
413                         if (opsize < 2) /* "silly options" */
414                                 return;
415                         if (opsize > length)
416                                 break;  /* don't parse partial options */
417
418                         if (opcode == TCPOPT_SACK_PERM
419                             && opsize == TCPOLEN_SACK_PERM)
420                                 state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
421                         else if (opcode == TCPOPT_WINDOW
422                                  && opsize == TCPOLEN_WINDOW) {
423                                 state->td_scale = *(u_int8_t *)ptr;
424
425                                 if (state->td_scale > 14) {
426                                         /* See RFC1323 */
427                                         state->td_scale = 14;
428                                 }
429                                 state->flags |=
430                                         IP_CT_TCP_FLAG_WINDOW_SCALE;
431                         }
432                         ptr += opsize - 2;
433                         length -= opsize;
434                 }
435         }
436 }
437
438 static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
439                      struct tcphdr *tcph, __u32 *sack)
440 {
441         unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
442         unsigned char *ptr;
443         int length = (tcph->doff*4) - sizeof(struct tcphdr);
444         __u32 tmp;
445
446         if (!length)
447                 return;
448
449         ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
450                                  length, buff);
451         BUG_ON(ptr == NULL);
452
453         /* Fast path for timestamp-only option */
454         if (length == TCPOLEN_TSTAMP_ALIGNED*4
455             && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
456                                        | (TCPOPT_NOP << 16)
457                                        | (TCPOPT_TIMESTAMP << 8)
458                                        | TCPOLEN_TIMESTAMP))
459                 return;
460
461         while (length > 0) {
462                 int opcode = *ptr++;
463                 int opsize, i;
464
465                 switch (opcode) {
466                 case TCPOPT_EOL:
467                         return;
468                 case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
469                         length--;
470                         continue;
471                 default:
472                         opsize = *ptr++;
473                         if (opsize < 2) /* "silly options" */
474                                 return;
475                         if (opsize > length)
476                                 break;  /* don't parse partial options */
477
478                         if (opcode == TCPOPT_SACK
479                             && opsize >= (TCPOLEN_SACK_BASE
480                                           + TCPOLEN_SACK_PERBLOCK)
481                             && !((opsize - TCPOLEN_SACK_BASE)
482                                  % TCPOLEN_SACK_PERBLOCK)) {
483                                 for (i = 0;
484                                      i < (opsize - TCPOLEN_SACK_BASE);
485                                      i += TCPOLEN_SACK_PERBLOCK) {
486                                         tmp = ntohl(*((__be32 *)(ptr+i)+1));
487
488                                         if (after(tmp, *sack))
489                                                 *sack = tmp;
490                                 }
491                                 return;
492                         }
493                         ptr += opsize - 2;
494                         length -= opsize;
495                 }
496         }
497 }
498
499 static int tcp_in_window(struct ip_ct_tcp *state,
500                          enum ip_conntrack_dir dir,
501                          unsigned int index,
502                          const struct sk_buff *skb,
503                          unsigned int dataoff,
504                          struct tcphdr *tcph,
505                          int pf)
506 {
507         struct ip_ct_tcp_state *sender = &state->seen[dir];
508         struct ip_ct_tcp_state *receiver = &state->seen[!dir];
509         __u32 seq, ack, sack, end, win, swin;
510         int res;
511
512         /*
513          * Get the required data from the packet.
514          */
515         seq = ntohl(tcph->seq);
516         ack = sack = ntohl(tcph->ack_seq);
517         win = ntohs(tcph->window);
518         end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
519
520         if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
521                 tcp_sack(skb, dataoff, tcph, &sack);
522
523         DEBUGP("tcp_in_window: START\n");
524         DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
525                "seq=%u ack=%u sack=%u win=%u end=%u\n",
526                 NIPQUAD(iph->saddr), ntohs(tcph->source),
527                 NIPQUAD(iph->daddr), ntohs(tcph->dest),
528                 seq, ack, sack, win, end);
529         DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
530                "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
531                 sender->td_end, sender->td_maxend, sender->td_maxwin,
532                 sender->td_scale,
533                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
534                 receiver->td_scale);
535
536         if (sender->td_end == 0) {
537                 /*
538                  * Initialize sender data.
539                  */
540                 if (tcph->syn && tcph->ack) {
541                         /*
542                          * Outgoing SYN-ACK in reply to a SYN.
543                          */
544                         sender->td_end =
545                         sender->td_maxend = end;
546                         sender->td_maxwin = (win == 0 ? 1 : win);
547
548                         tcp_options(skb, dataoff, tcph, sender);
549                         /*
550                          * RFC 1323:
551                          * Both sides must send the Window Scale option
552                          * to enable window scaling in either direction.
553                          */
554                         if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
555                               && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
556                                 sender->td_scale =
557                                 receiver->td_scale = 0;
558                 } else {
559                         /*
560                          * We are in the middle of a connection,
561                          * its history is lost for us.
562                          * Let's try to use the data from the packet.
563                          */
564                         sender->td_end = end;
565                         sender->td_maxwin = (win == 0 ? 1 : win);
566                         sender->td_maxend = end + sender->td_maxwin;
567                 }
568         } else if (((state->state == TCP_CONNTRACK_SYN_SENT
569                      && dir == IP_CT_DIR_ORIGINAL)
570                    || (state->state == TCP_CONNTRACK_SYN_RECV
571                      && dir == IP_CT_DIR_REPLY))
572                    && after(end, sender->td_end)) {
573                 /*
574                  * RFC 793: "if a TCP is reinitialized ... then it need
575                  * not wait at all; it must only be sure to use sequence
576                  * numbers larger than those recently used."
577                  */
578                 sender->td_end =
579                 sender->td_maxend = end;
580                 sender->td_maxwin = (win == 0 ? 1 : win);
581
582                 tcp_options(skb, dataoff, tcph, sender);
583         }
584
585         if (!(tcph->ack)) {
586                 /*
587                  * If there is no ACK, just pretend it was set and OK.
588                  */
589                 ack = sack = receiver->td_end;
590         } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
591                     (TCP_FLAG_ACK|TCP_FLAG_RST))
592                    && (ack == 0)) {
593                 /*
594                  * Broken TCP stacks, that set ACK in RST packets as well
595                  * with zero ack value.
596                  */
597                 ack = sack = receiver->td_end;
598         }
599
600         if (seq == end
601             && (!tcph->rst
602                 || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
603                 /*
604                  * Packets contains no data: we assume it is valid
605                  * and check the ack value only.
606                  * However RST segments are always validated by their
607                  * SEQ number, except when seq == 0 (reset sent answering
608                  * SYN.
609                  */
610                 seq = end = sender->td_end;
611
612         DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
613                "seq=%u ack=%u sack =%u win=%u end=%u\n",
614                 NIPQUAD(iph->saddr), ntohs(tcph->source),
615                 NIPQUAD(iph->daddr), ntohs(tcph->dest),
616                 seq, ack, sack, win, end);
617         DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
618                "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
619                 sender->td_end, sender->td_maxend, sender->td_maxwin,
620                 sender->td_scale,
621                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
622                 receiver->td_scale);
623
624         DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
625                 before(seq, sender->td_maxend + 1),
626                 after(end, sender->td_end - receiver->td_maxwin - 1),
627                 before(sack, receiver->td_end + 1),
628                 after(ack, receiver->td_end - MAXACKWINDOW(sender)));
629
630         if (before(seq, sender->td_maxend + 1) &&
631             after(end, sender->td_end - receiver->td_maxwin - 1) &&
632             before(sack, receiver->td_end + 1) &&
633             after(ack, receiver->td_end - MAXACKWINDOW(sender))) {
634                 /*
635                  * Take into account window scaling (RFC 1323).
636                  */
637                 if (!tcph->syn)
638                         win <<= sender->td_scale;
639
640                 /*
641                  * Update sender data.
642                  */
643                 swin = win + (sack - ack);
644                 if (sender->td_maxwin < swin)
645                         sender->td_maxwin = swin;
646                 if (after(end, sender->td_end))
647                         sender->td_end = end;
648                 /*
649                  * Update receiver data.
650                  */
651                 if (after(end, sender->td_maxend))
652                         receiver->td_maxwin += end - sender->td_maxend;
653                 if (after(sack + win, receiver->td_maxend - 1)) {
654                         receiver->td_maxend = sack + win;
655                         if (win == 0)
656                                 receiver->td_maxend++;
657                 }
658
659                 /*
660                  * Check retransmissions.
661                  */
662                 if (index == TCP_ACK_SET) {
663                         if (state->last_dir == dir
664                             && state->last_seq == seq
665                             && state->last_ack == ack
666                             && state->last_end == end
667                             && state->last_win == win)
668                                 state->retrans++;
669                         else {
670                                 state->last_dir = dir;
671                                 state->last_seq = seq;
672                                 state->last_ack = ack;
673                                 state->last_end = end;
674                                 state->last_win = win;
675                                 state->retrans = 0;
676                         }
677                 }
678                 res = 1;
679         } else {
680                 res = 0;
681                 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
682                     nf_ct_tcp_be_liberal)
683                         res = 1;
684                 if (!res && LOG_INVALID(IPPROTO_TCP))
685                         nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
686                         "nf_ct_tcp: %s ",
687                         before(seq, sender->td_maxend + 1) ?
688                         after(end, sender->td_end - receiver->td_maxwin - 1) ?
689                         before(sack, receiver->td_end + 1) ?
690                         after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
691                         : "ACK is under the lower bound (possible overly delayed ACK)"
692                         : "ACK is over the upper bound (ACKed data not seen yet)"
693                         : "SEQ is under the lower bound (already ACKed data retransmitted)"
694                         : "SEQ is over the upper bound (over the window of the receiver)");
695         }
696
697         DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
698                "receiver end=%u maxend=%u maxwin=%u\n",
699                 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
700                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
701
702         return res;
703 }
704
705 #ifdef CONFIG_NF_NAT_NEEDED
706 /* Update sender->td_end after NAT successfully mangled the packet */
707 /* Caller must linearize skb at tcp header. */
708 void nf_conntrack_tcp_update(struct sk_buff *skb,
709                              unsigned int dataoff,
710                              struct nf_conn *conntrack,
711                              int dir)
712 {
713         struct tcphdr *tcph = (void *)skb->data + dataoff;
714         __u32 end;
715 #ifdef DEBUGP_VARS
716         struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir];
717         struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir];
718 #endif
719
720         end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph);
721
722         write_lock_bh(&tcp_lock);
723         /*
724          * We have to worry for the ack in the reply packet only...
725          */
726         if (after(end, conntrack->proto.tcp.seen[dir].td_end))
727                 conntrack->proto.tcp.seen[dir].td_end = end;
728         conntrack->proto.tcp.last_end = end;
729         write_unlock_bh(&tcp_lock);
730         DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
731                "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
732                 sender->td_end, sender->td_maxend, sender->td_maxwin,
733                 sender->td_scale,
734                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
735                 receiver->td_scale);
736 }
737 EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update);
738 #endif
739
740 #define TH_FIN  0x01
741 #define TH_SYN  0x02
742 #define TH_RST  0x04
743 #define TH_PUSH 0x08
744 #define TH_ACK  0x10
745 #define TH_URG  0x20
746 #define TH_ECE  0x40
747 #define TH_CWR  0x80
748
749 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
750 static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
751 {
752         [TH_SYN]                        = 1,
753         [TH_SYN|TH_URG]                 = 1,
754         [TH_SYN|TH_ACK]                 = 1,
755         [TH_RST]                        = 1,
756         [TH_RST|TH_ACK]                 = 1,
757         [TH_FIN|TH_ACK]                 = 1,
758         [TH_FIN|TH_ACK|TH_URG]          = 1,
759         [TH_ACK]                        = 1,
760         [TH_ACK|TH_URG]                 = 1,
761 };
762
763 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
764 static int tcp_error(struct sk_buff *skb,
765                      unsigned int dataoff,
766                      enum ip_conntrack_info *ctinfo,
767                      int pf,
768                      unsigned int hooknum)
769 {
770         struct tcphdr _tcph, *th;
771         unsigned int tcplen = skb->len - dataoff;
772         u_int8_t tcpflags;
773
774         /* Smaller that minimal TCP header? */
775         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
776         if (th == NULL) {
777                 if (LOG_INVALID(IPPROTO_TCP))
778                         nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
779                                 "nf_ct_tcp: short packet ");
780                 return -NF_ACCEPT;
781         }
782
783         /* Not whole TCP header or malformed packet */
784         if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
785                 if (LOG_INVALID(IPPROTO_TCP))
786                         nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
787                                 "nf_ct_tcp: truncated/malformed packet ");
788                 return -NF_ACCEPT;
789         }
790
791         /* Checksum invalid? Ignore.
792          * We skip checking packets on the outgoing path
793          * because the checksum is assumed to be correct.
794          */
795         /* FIXME: Source route IP option packets --RR */
796         if (nf_conntrack_checksum &&
797             ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) ||
798              (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING)) &&
799             nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
800                 if (LOG_INVALID(IPPROTO_TCP))
801                         nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
802                                   "nf_ct_tcp: bad TCP checksum ");
803                 return -NF_ACCEPT;
804         }
805
806         /* Check TCP flags. */
807         tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH));
808         if (!tcp_valid_flags[tcpflags]) {
809                 if (LOG_INVALID(IPPROTO_TCP))
810                         nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
811                                   "nf_ct_tcp: invalid TCP flag combination ");
812                 return -NF_ACCEPT;
813         }
814
815         return NF_ACCEPT;
816 }
817
818 /* Returns verdict for packet, or -1 for invalid. */
819 static int tcp_packet(struct nf_conn *conntrack,
820                       const struct sk_buff *skb,
821                       unsigned int dataoff,
822                       enum ip_conntrack_info ctinfo,
823                       int pf,
824                       unsigned int hooknum)
825 {
826         enum tcp_conntrack new_state, old_state;
827         enum ip_conntrack_dir dir;
828         struct tcphdr *th, _tcph;
829         unsigned long timeout;
830         unsigned int index;
831
832         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
833         BUG_ON(th == NULL);
834
835         write_lock_bh(&tcp_lock);
836         old_state = conntrack->proto.tcp.state;
837         dir = CTINFO2DIR(ctinfo);
838         index = get_conntrack_index(th);
839         new_state = tcp_conntracks[dir][index][old_state];
840
841         switch (new_state) {
842         case TCP_CONNTRACK_IGNORE:
843                 /* Ignored packets:
844                  *
845                  * a) SYN in ORIGINAL
846                  * b) SYN/ACK in REPLY
847                  * c) ACK in reply direction after initial SYN in original.
848                  */
849                 if (index == TCP_SYNACK_SET
850                     && conntrack->proto.tcp.last_index == TCP_SYN_SET
851                     && conntrack->proto.tcp.last_dir != dir
852                     && ntohl(th->ack_seq) ==
853                              conntrack->proto.tcp.last_end) {
854                         /* This SYN/ACK acknowledges a SYN that we earlier
855                          * ignored as invalid. This means that the client and
856                          * the server are both in sync, while the firewall is
857                          * not. We kill this session and block the SYN/ACK so
858                          * that the client cannot but retransmit its SYN and
859                          * thus initiate a clean new session.
860                          */
861                         write_unlock_bh(&tcp_lock);
862                         if (LOG_INVALID(IPPROTO_TCP))
863                                 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
864                                           "nf_ct_tcp: killing out of sync session ");
865                         if (del_timer(&conntrack->timeout))
866                                 conntrack->timeout.function((unsigned long)
867                                                             conntrack);
868                         return -NF_DROP;
869                 }
870                 conntrack->proto.tcp.last_index = index;
871                 conntrack->proto.tcp.last_dir = dir;
872                 conntrack->proto.tcp.last_seq = ntohl(th->seq);
873                 conntrack->proto.tcp.last_end =
874                     segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
875
876                 write_unlock_bh(&tcp_lock);
877                 if (LOG_INVALID(IPPROTO_TCP))
878                         nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
879                                   "nf_ct_tcp: invalid packed ignored ");
880                 return NF_ACCEPT;
881         case TCP_CONNTRACK_MAX:
882                 /* Invalid packet */
883                 DEBUGP("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
884                        dir, get_conntrack_index(th),
885                        old_state);
886                 write_unlock_bh(&tcp_lock);
887                 if (LOG_INVALID(IPPROTO_TCP))
888                         nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
889                                   "nf_ct_tcp: invalid state ");
890                 return -NF_ACCEPT;
891         case TCP_CONNTRACK_SYN_SENT:
892                 if (old_state < TCP_CONNTRACK_TIME_WAIT)
893                         break;
894                 if ((conntrack->proto.tcp.seen[dir].flags &
895                         IP_CT_TCP_FLAG_CLOSE_INIT)
896                     || after(ntohl(th->seq),
897                              conntrack->proto.tcp.seen[dir].td_end)) {
898                         /* Attempt to reopen a closed connection.
899                         * Delete this connection and look up again. */
900                         write_unlock_bh(&tcp_lock);
901                         if (del_timer(&conntrack->timeout))
902                                 conntrack->timeout.function((unsigned long)
903                                                             conntrack);
904                         return -NF_REPEAT;
905                 } else {
906                         write_unlock_bh(&tcp_lock);
907                         if (LOG_INVALID(IPPROTO_TCP))
908                                 nf_log_packet(pf, 0, skb, NULL, NULL,
909                                               NULL, "nf_ct_tcp: invalid SYN");
910                         return -NF_ACCEPT;
911                 }
912         case TCP_CONNTRACK_CLOSE:
913                 if (index == TCP_RST_SET
914                     && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
915                          && conntrack->proto.tcp.last_index == TCP_SYN_SET)
916                         || (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
917                             && conntrack->proto.tcp.last_index == TCP_ACK_SET))
918                     && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
919                         /* RST sent to invalid SYN or ACK we had let through
920                          * at a) and c) above:
921                          *
922                          * a) SYN was in window then
923                          * c) we hold a half-open connection.
924                          *
925                          * Delete our connection entry.
926                          * We skip window checking, because packet might ACK
927                          * segments we ignored. */
928                         goto in_window;
929                 }
930                 /* Just fall through */
931         default:
932                 /* Keep compilers happy. */
933                 break;
934         }
935
936         if (!tcp_in_window(&conntrack->proto.tcp, dir, index,
937                            skb, dataoff, th, pf)) {
938                 write_unlock_bh(&tcp_lock);
939                 return -NF_ACCEPT;
940         }
941      in_window:
942         /* From now on we have got in-window packets */
943         conntrack->proto.tcp.last_index = index;
944
945         DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
946                "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
947                 NIPQUAD(iph->saddr), ntohs(th->source),
948                 NIPQUAD(iph->daddr), ntohs(th->dest),
949                 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
950                 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
951                 old_state, new_state);
952
953         conntrack->proto.tcp.state = new_state;
954         if (old_state != new_state
955             && (new_state == TCP_CONNTRACK_FIN_WAIT
956                 || new_state == TCP_CONNTRACK_CLOSE))
957                 conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
958         timeout = conntrack->proto.tcp.retrans >= nf_ct_tcp_max_retrans
959                   && *tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans
960                   ? nf_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
961         write_unlock_bh(&tcp_lock);
962
963         nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
964         if (new_state != old_state)
965                 nf_conntrack_event_cache(IPCT_PROTOINFO, skb);
966
967         if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
968                 /* If only reply is a RST, we can consider ourselves not to
969                    have an established connection: this is a fairly common
970                    problem case, so we can delete the conntrack
971                    immediately.  --RR */
972                 if (th->rst) {
973                         if (del_timer(&conntrack->timeout))
974                                 conntrack->timeout.function((unsigned long)
975                                                             conntrack);
976                         return NF_ACCEPT;
977                 }
978         } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
979                    && (old_state == TCP_CONNTRACK_SYN_RECV
980                        || old_state == TCP_CONNTRACK_ESTABLISHED)
981                    && new_state == TCP_CONNTRACK_ESTABLISHED) {
982                 /* Set ASSURED if we see see valid ack in ESTABLISHED
983                    after SYN_RECV or a valid answer for a picked up
984                    connection. */
985                 set_bit(IPS_ASSURED_BIT, &conntrack->status);
986                 nf_conntrack_event_cache(IPCT_STATUS, skb);
987         }
988         nf_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
989
990         return NF_ACCEPT;
991 }
992
993 /* Called when a new connection for this protocol found. */
994 static int tcp_new(struct nf_conn *conntrack,
995                    const struct sk_buff *skb,
996                    unsigned int dataoff)
997 {
998         enum tcp_conntrack new_state;
999         struct tcphdr *th, _tcph;
1000 #ifdef DEBUGP_VARS
1001         struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0];
1002         struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1];
1003 #endif
1004
1005         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1006         BUG_ON(th == NULL);
1007
1008         /* Don't need lock here: this conntrack not in circulation yet */
1009         new_state
1010                 = tcp_conntracks[0][get_conntrack_index(th)]
1011                 [TCP_CONNTRACK_NONE];
1012
1013         /* Invalid: delete conntrack */
1014         if (new_state >= TCP_CONNTRACK_MAX) {
1015                 DEBUGP("nf_ct_tcp: invalid new deleting.\n");
1016                 return 0;
1017         }
1018
1019         if (new_state == TCP_CONNTRACK_SYN_SENT) {
1020                 /* SYN packet */
1021                 conntrack->proto.tcp.seen[0].td_end =
1022                         segment_seq_plus_len(ntohl(th->seq), skb->len,
1023                                              dataoff, th);
1024                 conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1025                 if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
1026                         conntrack->proto.tcp.seen[0].td_maxwin = 1;
1027                 conntrack->proto.tcp.seen[0].td_maxend =
1028                         conntrack->proto.tcp.seen[0].td_end;
1029
1030                 tcp_options(skb, dataoff, th, &conntrack->proto.tcp.seen[0]);
1031                 conntrack->proto.tcp.seen[1].flags = 0;
1032         } else if (nf_ct_tcp_loose == 0) {
1033                 /* Don't try to pick up connections. */
1034                 return 0;
1035         } else {
1036                 /*
1037                  * We are in the middle of a connection,
1038                  * its history is lost for us.
1039                  * Let's try to use the data from the packet.
1040                  */
1041                 conntrack->proto.tcp.seen[0].td_end =
1042                         segment_seq_plus_len(ntohl(th->seq), skb->len,
1043                                              dataoff, th);
1044                 conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1045                 if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
1046                         conntrack->proto.tcp.seen[0].td_maxwin = 1;
1047                 conntrack->proto.tcp.seen[0].td_maxend =
1048                         conntrack->proto.tcp.seen[0].td_end +
1049                         conntrack->proto.tcp.seen[0].td_maxwin;
1050                 conntrack->proto.tcp.seen[0].td_scale = 0;
1051
1052                 /* We assume SACK and liberal window checking to handle
1053                  * window scaling */
1054                 conntrack->proto.tcp.seen[0].flags =
1055                 conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1056                                                      IP_CT_TCP_FLAG_BE_LIBERAL;
1057         }
1058
1059         conntrack->proto.tcp.seen[1].td_end = 0;
1060         conntrack->proto.tcp.seen[1].td_maxend = 0;
1061         conntrack->proto.tcp.seen[1].td_maxwin = 1;
1062         conntrack->proto.tcp.seen[1].td_scale = 0;
1063
1064         /* tcp_packet will set them */
1065         conntrack->proto.tcp.state = TCP_CONNTRACK_NONE;
1066         conntrack->proto.tcp.last_index = TCP_NONE_SET;
1067
1068         DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1069                "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1070                 sender->td_end, sender->td_maxend, sender->td_maxwin,
1071                 sender->td_scale,
1072                 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1073                 receiver->td_scale);
1074         return 1;
1075 }
1076
1077 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
1078
1079 #include <linux/netfilter/nfnetlink.h>
1080 #include <linux/netfilter/nfnetlink_conntrack.h>
1081
1082 static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
1083                          const struct nf_conn *ct)
1084 {
1085         struct nfattr *nest_parms;
1086         struct nf_ct_tcp_flags tmp = {};
1087
1088         read_lock_bh(&tcp_lock);
1089         nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
1090         NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
1091                 &ct->proto.tcp.state);
1092
1093         NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, sizeof(u_int8_t),
1094                 &ct->proto.tcp.seen[0].td_scale);
1095
1096         NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, sizeof(u_int8_t),
1097                 &ct->proto.tcp.seen[1].td_scale);
1098
1099         tmp.flags = ct->proto.tcp.seen[0].flags;
1100         NFA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1101                 sizeof(struct nf_ct_tcp_flags), &tmp);
1102
1103         tmp.flags = ct->proto.tcp.seen[1].flags;
1104         NFA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1105                 sizeof(struct nf_ct_tcp_flags), &tmp);
1106         read_unlock_bh(&tcp_lock);
1107
1108         NFA_NEST_END(skb, nest_parms);
1109
1110         return 0;
1111
1112 nfattr_failure:
1113         read_unlock_bh(&tcp_lock);
1114         return -1;
1115 }
1116
1117 static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
1118         [CTA_PROTOINFO_TCP_STATE-1]           = sizeof(u_int8_t),
1119         [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1] = sizeof(u_int8_t),
1120         [CTA_PROTOINFO_TCP_WSCALE_REPLY-1]    = sizeof(u_int8_t),
1121         [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]  = sizeof(struct nf_ct_tcp_flags),
1122         [CTA_PROTOINFO_TCP_FLAGS_REPLY-1]     = sizeof(struct nf_ct_tcp_flags)
1123 };
1124
1125 static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct)
1126 {
1127         struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1];
1128         struct nfattr *tb[CTA_PROTOINFO_TCP_MAX];
1129
1130         /* updates could not contain anything about the private
1131          * protocol info, in that case skip the parsing */
1132         if (!attr)
1133                 return 0;
1134
1135         nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr);
1136
1137         if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp))
1138                 return -EINVAL;
1139
1140         if (!tb[CTA_PROTOINFO_TCP_STATE-1])
1141                 return -EINVAL;
1142
1143         write_lock_bh(&tcp_lock);
1144         ct->proto.tcp.state =
1145                 *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
1146
1147         if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]) {
1148                 struct nf_ct_tcp_flags *attr =
1149                         NFA_DATA(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]);
1150                 ct->proto.tcp.seen[0].flags &= ~attr->mask;
1151                 ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1152         }
1153
1154         if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY-1]) {
1155                 struct nf_ct_tcp_flags *attr =
1156                         NFA_DATA(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY-1]);
1157                 ct->proto.tcp.seen[1].flags &= ~attr->mask;
1158                 ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1159         }
1160
1161         if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1] &&
1162             tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1] &&
1163             ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1164             ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1165                 ct->proto.tcp.seen[0].td_scale = *(u_int8_t *)
1166                         NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1]);
1167                 ct->proto.tcp.seen[1].td_scale = *(u_int8_t *)
1168                         NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1]);
1169         }
1170         write_unlock_bh(&tcp_lock);
1171
1172         return 0;
1173 }
1174 #endif
1175
1176 #ifdef CONFIG_SYSCTL
1177 static unsigned int tcp_sysctl_table_users;
1178 static struct ctl_table_header *tcp_sysctl_header;
1179 static struct ctl_table tcp_sysctl_table[] = {
1180         {
1181                 .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT,
1182                 .procname       = "nf_conntrack_tcp_timeout_syn_sent",
1183                 .data           = &nf_ct_tcp_timeout_syn_sent,
1184                 .maxlen         = sizeof(unsigned int),
1185                 .mode           = 0644,
1186                 .proc_handler   = &proc_dointvec_jiffies,
1187         },
1188         {
1189                 .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV,
1190                 .procname       = "nf_conntrack_tcp_timeout_syn_recv",
1191                 .data           = &nf_ct_tcp_timeout_syn_recv,
1192                 .maxlen         = sizeof(unsigned int),
1193                 .mode           = 0644,
1194                 .proc_handler   = &proc_dointvec_jiffies,
1195         },
1196         {
1197                 .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED,
1198                 .procname       = "nf_conntrack_tcp_timeout_established",
1199                 .data           = &nf_ct_tcp_timeout_established,
1200                 .maxlen         = sizeof(unsigned int),
1201                 .mode           = 0644,
1202                 .proc_handler   = &proc_dointvec_jiffies,
1203         },
1204         {
1205                 .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT,
1206                 .procname       = "nf_conntrack_tcp_timeout_fin_wait",
1207                 .data           = &nf_ct_tcp_timeout_fin_wait,
1208                 .maxlen         = sizeof(unsigned int),
1209                 .mode           = 0644,
1210                 .proc_handler   = &proc_dointvec_jiffies,
1211         },
1212         {
1213                 .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT,
1214                 .procname       = "nf_conntrack_tcp_timeout_close_wait",
1215                 .data           = &nf_ct_tcp_timeout_close_wait,
1216                 .maxlen         = sizeof(unsigned int),
1217                 .mode           = 0644,
1218                 .proc_handler   = &proc_dointvec_jiffies,
1219         },
1220         {
1221                 .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK,
1222                 .procname       = "nf_conntrack_tcp_timeout_last_ack",
1223                 .data           = &nf_ct_tcp_timeout_last_ack,
1224                 .maxlen         = sizeof(unsigned int),
1225                 .mode           = 0644,
1226                 .proc_handler   = &proc_dointvec_jiffies,
1227         },
1228         {
1229                 .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT,
1230                 .procname       = "nf_conntrack_tcp_timeout_time_wait",
1231                 .data           = &nf_ct_tcp_timeout_time_wait,
1232                 .maxlen         = sizeof(unsigned int),
1233                 .mode           = 0644,
1234                 .proc_handler   = &proc_dointvec_jiffies,
1235         },
1236         {
1237                 .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE,
1238                 .procname       = "nf_conntrack_tcp_timeout_close",
1239                 .data           = &nf_ct_tcp_timeout_close,
1240                 .maxlen         = sizeof(unsigned int),
1241                 .mode           = 0644,
1242                 .proc_handler   = &proc_dointvec_jiffies,
1243         },
1244         {
1245                 .ctl_name       = NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS,
1246                 .procname       = "nf_conntrack_tcp_timeout_max_retrans",
1247                 .data           = &nf_ct_tcp_timeout_max_retrans,
1248                 .maxlen         = sizeof(unsigned int),
1249                 .mode           = 0644,
1250                 .proc_handler   = &proc_dointvec_jiffies,
1251         },
1252         {
1253                 .ctl_name       = NET_NF_CONNTRACK_TCP_LOOSE,
1254                 .procname       = "nf_conntrack_tcp_loose",
1255                 .data           = &nf_ct_tcp_loose,
1256                 .maxlen         = sizeof(unsigned int),
1257                 .mode           = 0644,
1258                 .proc_handler   = &proc_dointvec,
1259         },
1260         {
1261                 .ctl_name       = NET_NF_CONNTRACK_TCP_BE_LIBERAL,
1262                 .procname       = "nf_conntrack_tcp_be_liberal",
1263                 .data           = &nf_ct_tcp_be_liberal,
1264                 .maxlen         = sizeof(unsigned int),
1265                 .mode           = 0644,
1266                 .proc_handler   = &proc_dointvec,
1267         },
1268         {
1269                 .ctl_name       = NET_NF_CONNTRACK_TCP_MAX_RETRANS,
1270                 .procname       = "nf_conntrack_tcp_max_retrans",
1271                 .data           = &nf_ct_tcp_max_retrans,
1272                 .maxlen         = sizeof(unsigned int),
1273                 .mode           = 0644,
1274                 .proc_handler   = &proc_dointvec,
1275         },
1276         {
1277                 .ctl_name       = 0
1278         }
1279 };
1280
1281 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1282 static struct ctl_table tcp_compat_sysctl_table[] = {
1283         {
1284                 .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT,
1285                 .procname       = "ip_conntrack_tcp_timeout_syn_sent",
1286                 .data           = &nf_ct_tcp_timeout_syn_sent,
1287                 .maxlen         = sizeof(unsigned int),
1288                 .mode           = 0644,
1289                 .proc_handler   = &proc_dointvec_jiffies,
1290         },
1291         {
1292                 .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV,
1293                 .procname       = "ip_conntrack_tcp_timeout_syn_recv",
1294                 .data           = &nf_ct_tcp_timeout_syn_recv,
1295                 .maxlen         = sizeof(unsigned int),
1296                 .mode           = 0644,
1297                 .proc_handler   = &proc_dointvec_jiffies,
1298         },
1299         {
1300                 .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED,
1301                 .procname       = "ip_conntrack_tcp_timeout_established",
1302                 .data           = &nf_ct_tcp_timeout_established,
1303                 .maxlen         = sizeof(unsigned int),
1304                 .mode           = 0644,
1305                 .proc_handler   = &proc_dointvec_jiffies,
1306         },
1307         {
1308                 .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT,
1309                 .procname       = "ip_conntrack_tcp_timeout_fin_wait",
1310                 .data           = &nf_ct_tcp_timeout_fin_wait,
1311                 .maxlen         = sizeof(unsigned int),
1312                 .mode           = 0644,
1313                 .proc_handler   = &proc_dointvec_jiffies,
1314         },
1315         {
1316                 .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT,
1317                 .procname       = "ip_conntrack_tcp_timeout_close_wait",
1318                 .data           = &nf_ct_tcp_timeout_close_wait,
1319                 .maxlen         = sizeof(unsigned int),
1320                 .mode           = 0644,
1321                 .proc_handler   = &proc_dointvec_jiffies,
1322         },
1323         {
1324                 .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK,
1325                 .procname       = "ip_conntrack_tcp_timeout_last_ack",
1326                 .data           = &nf_ct_tcp_timeout_last_ack,
1327                 .maxlen         = sizeof(unsigned int),
1328                 .mode           = 0644,
1329                 .proc_handler   = &proc_dointvec_jiffies,
1330         },
1331         {
1332                 .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT,
1333                 .procname       = "ip_conntrack_tcp_timeout_time_wait",
1334                 .data           = &nf_ct_tcp_timeout_time_wait,
1335                 .maxlen         = sizeof(unsigned int),
1336                 .mode           = 0644,
1337                 .proc_handler   = &proc_dointvec_jiffies,
1338         },
1339         {
1340                 .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE,
1341                 .procname       = "ip_conntrack_tcp_timeout_close",
1342                 .data           = &nf_ct_tcp_timeout_close,
1343                 .maxlen         = sizeof(unsigned int),
1344                 .mode           = 0644,
1345                 .proc_handler   = &proc_dointvec_jiffies,
1346         },
1347         {
1348                 .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS,
1349                 .procname       = "ip_conntrack_tcp_timeout_max_retrans",
1350                 .data           = &nf_ct_tcp_timeout_max_retrans,
1351                 .maxlen         = sizeof(unsigned int),
1352                 .mode           = 0644,
1353                 .proc_handler   = &proc_dointvec_jiffies,
1354         },
1355         {
1356                 .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_LOOSE,
1357                 .procname       = "ip_conntrack_tcp_loose",
1358                 .data           = &nf_ct_tcp_loose,
1359                 .maxlen         = sizeof(unsigned int),
1360                 .mode           = 0644,
1361                 .proc_handler   = &proc_dointvec,
1362         },
1363         {
1364                 .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL,
1365                 .procname       = "ip_conntrack_tcp_be_liberal",
1366                 .data           = &nf_ct_tcp_be_liberal,
1367                 .maxlen         = sizeof(unsigned int),
1368                 .mode           = 0644,
1369                 .proc_handler   = &proc_dointvec,
1370         },
1371         {
1372                 .ctl_name       = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS,
1373                 .procname       = "ip_conntrack_tcp_max_retrans",
1374                 .data           = &nf_ct_tcp_max_retrans,
1375                 .maxlen         = sizeof(unsigned int),
1376                 .mode           = 0644,
1377                 .proc_handler   = &proc_dointvec,
1378         },
1379         {
1380                 .ctl_name       = 0
1381         }
1382 };
1383 #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
1384 #endif /* CONFIG_SYSCTL */
1385
1386 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
1387 {
1388         .l3proto                = PF_INET,
1389         .l4proto                = IPPROTO_TCP,
1390         .name                   = "tcp",
1391         .pkt_to_tuple           = tcp_pkt_to_tuple,
1392         .invert_tuple           = tcp_invert_tuple,
1393         .print_tuple            = tcp_print_tuple,
1394         .print_conntrack        = tcp_print_conntrack,
1395         .packet                 = tcp_packet,
1396         .new                    = tcp_new,
1397         .error                  = tcp_error,
1398 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
1399         .to_nfattr              = tcp_to_nfattr,
1400         .from_nfattr            = nfattr_to_tcp,
1401         .tuple_to_nfattr        = nf_ct_port_tuple_to_nfattr,
1402         .nfattr_to_tuple        = nf_ct_port_nfattr_to_tuple,
1403 #endif
1404 #ifdef CONFIG_SYSCTL
1405         .ctl_table_users        = &tcp_sysctl_table_users,
1406         .ctl_table_header       = &tcp_sysctl_header,
1407         .ctl_table              = tcp_sysctl_table,
1408 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1409         .ctl_compat_table       = tcp_compat_sysctl_table,
1410 #endif
1411 #endif
1412 };
1413 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1414
1415 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
1416 {
1417         .l3proto                = PF_INET6,
1418         .l4proto                = IPPROTO_TCP,
1419         .name                   = "tcp",
1420         .pkt_to_tuple           = tcp_pkt_to_tuple,
1421         .invert_tuple           = tcp_invert_tuple,
1422         .print_tuple            = tcp_print_tuple,
1423         .print_conntrack        = tcp_print_conntrack,
1424         .packet                 = tcp_packet,
1425         .new                    = tcp_new,
1426         .error                  = tcp_error,
1427 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
1428         .to_nfattr              = tcp_to_nfattr,
1429         .from_nfattr            = nfattr_to_tcp,
1430         .tuple_to_nfattr        = nf_ct_port_tuple_to_nfattr,
1431         .nfattr_to_tuple        = nf_ct_port_nfattr_to_tuple,
1432 #endif
1433 #ifdef CONFIG_SYSCTL
1434         .ctl_table_users        = &tcp_sysctl_table_users,
1435         .ctl_table_header       = &tcp_sysctl_header,
1436         .ctl_table              = tcp_sysctl_table,
1437 #endif
1438 };
1439 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);