From: David S. Miller Date: Tue, 5 Nov 2013 00:46:58 +0000 (-0500) Subject: Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next X-Git-Tag: drm-fsl-dcu-for-next~6436^2~64 X-Git-Url: http://git.agner.ch/gitweb/?p=linux-drm-fsl-dcu.git;a=commitdiff_plain;h=72c39a0ade6229a938736fe1aa1d5e471fc7face;hp=-c Merge branch 'master' of git://git./linux/kernel/git/pablo/nf-next Pablo Neira Ayuso says: ==================== This is another batch containing Netfilter/IPVS updates for your net-next tree, they are: * Six patches to make the ipt_CLUSTERIP target support netnamespace, from Gao feng. * Two cleanups for the nf_conntrack_acct infrastructure, introducing a new structure to encapsulate conntrack counters, from Holger Eitzenberger. * Fix missing verdict in SCTP support for IPVS, from Daniel Borkmann. * Skip checksum recalculation in SCTP support for IPVS, also from Daniel Borkmann. * Fix behavioural change in xt_socket after IP early demux, from Florian Westphal. * Fix bogus large memory allocation in the bitmap port set type in ipset, from Jozsef Kadlecsik. * Fix possible compilation issues in the hash netnet set type in ipset, also from Jozsef Kadlecsik. * Define constants to identify netlink callback data in ipset dumps, again from Jozsef Kadlecsik. * Use sock_gen_put() in xt_socket to replace xt_socket_put_sk, from Eric Dumazet. * Improvements for the SH scheduler in IPVS, from Alexander Frolkin. * Remove extra delay due to unneeded rcu barrier in IPVS net namespace cleanup path, from Julian Anastasov. * Save some cycles in ip6t_REJECT by skipping checksum validation in packets leaving from our stack, from Stanislav Fomichev. * Fix IPVS_CMD_ATTR_MAX definition in IPVS, larger that required, from Julian Anastasov. ==================== Signed-off-by: David S. Miller --- 72c39a0ade6229a938736fe1aa1d5e471fc7face diff --combined net/bridge/br_netfilter.c index 878f008afefa,3d553120ff04..80cad2cf02a7 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@@ -559,6 -559,8 +559,8 @@@ static struct net_device *setup_pre_rou else if (skb->protocol == htons(ETH_P_PPP_SES)) nf_bridge->mask |= BRNF_PPPoE; + /* Must drop socket now because of tproxy. */ + skb_orphan(skb); return skb->dev; } @@@ -619,7 -621,7 +621,7 @@@ bad /* Replicate the checks that IPv6 does on packet reception and pass the packet * to ip6tables, which doesn't support NAT, so things are fairly simple. */ -static unsigned int br_nf_pre_routing_ipv6(unsigned int hook, +static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@@ -669,8 -671,7 +671,8 @@@ * receiving device) to make netfilter happy, the REDIRECT * target in particular. Save the original destination IP * address to be able to detect DNAT afterwards. */ -static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@@ -692,7 -693,7 +694,7 @@@ return NF_ACCEPT; nf_bridge_pull_encap_header_rcsum(skb); - return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn); + return br_nf_pre_routing_ipv6(ops, skb, in, out, okfn); } if (!brnf_call_iptables && !br->nf_call_iptables) @@@ -728,8 -729,7 +730,8 @@@ * took place when the packet entered the bridge), but we * register an IPv4 PRE_ROUTING 'sabotage' hook that will * prevent this from happening. */ -static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_local_in(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@@ -767,8 -767,7 +769,8 @@@ static int br_nf_forward_finish(struct * but we are still able to filter on the 'real' indev/outdev * because of the physdev module. For ARP, indev and outdev are the * bridge ports. */ -static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@@ -821,8 -820,7 +823,8 @@@ return NF_STOLEN; } -static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@@ -882,8 -880,7 +884,8 @@@ static int br_nf_dev_queue_xmit(struct #endif /* PF_BRIDGE/POST_ROUTING ********************************************/ -static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@@ -928,8 -925,7 +930,8 @@@ /* IP/SABOTAGE *****************************************************/ /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING * for the second time. */ -static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb, +static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) diff --combined net/ipv4/netfilter/ipt_CLUSTERIP.c index a2e2b61cd7da,ecd808a93b63..2510c02c2d21 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@@ -28,6 -28,7 +28,7 @@@ #include #include #include + #include #include #include @@@ -57,15 -58,21 +58,21 @@@ struct clusterip_config struct rcu_head rcu; }; - static LIST_HEAD(clusterip_configs); + #ifdef CONFIG_PROC_FS + static const struct file_operations clusterip_proc_fops; + #endif - /* clusterip_lock protects the clusterip_configs list */ - static DEFINE_SPINLOCK(clusterip_lock); + static int clusterip_net_id __read_mostly; + + struct clusterip_net { + struct list_head configs; + /* lock protects the configs list */ + spinlock_t lock; #ifdef CONFIG_PROC_FS - static const struct file_operations clusterip_proc_fops; - static struct proc_dir_entry *clusterip_procdir; + struct proc_dir_entry *procdir; #endif + }; static inline void clusterip_config_get(struct clusterip_config *c) @@@ -92,10 -99,13 +99,13 @@@ clusterip_config_put(struct clusterip_c static inline void clusterip_config_entry_put(struct clusterip_config *c) { + struct net *net = dev_net(c->dev); + struct clusterip_net *cn = net_generic(net, clusterip_net_id); + local_bh_disable(); - if (atomic_dec_and_lock(&c->entries, &clusterip_lock)) { + if (atomic_dec_and_lock(&c->entries, &cn->lock)) { list_del_rcu(&c->list); - spin_unlock(&clusterip_lock); + spin_unlock(&cn->lock); local_bh_enable(); dev_mc_del(c->dev, c->clustermac); @@@ -113,11 -123,12 +123,12 @@@ } static struct clusterip_config * - __clusterip_config_find(__be32 clusterip) + __clusterip_config_find(struct net *net, __be32 clusterip) { struct clusterip_config *c; + struct clusterip_net *cn = net_generic(net, clusterip_net_id); - list_for_each_entry_rcu(c, &clusterip_configs, list) { + list_for_each_entry_rcu(c, &cn->configs, list) { if (c->clusterip == clusterip) return c; } @@@ -126,12 -137,12 +137,12 @@@ } static inline struct clusterip_config * - clusterip_config_find_get(__be32 clusterip, int entry) + clusterip_config_find_get(struct net *net, __be32 clusterip, int entry) { struct clusterip_config *c; rcu_read_lock_bh(); - c = __clusterip_config_find(clusterip); + c = __clusterip_config_find(net, clusterip); if (c) { if (unlikely(!atomic_inc_not_zero(&c->refcount))) c = NULL; @@@ -158,6 -169,7 +169,7 @@@ clusterip_config_init(const struct ipt_ struct net_device *dev) { struct clusterip_config *c; + struct clusterip_net *cn = net_generic(dev_net(dev), clusterip_net_id); c = kzalloc(sizeof(*c), GFP_ATOMIC); if (!c) @@@ -180,7 -192,7 +192,7 @@@ /* create proc dir entry */ sprintf(buffer, "%pI4", &ip); c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR, - clusterip_procdir, + cn->procdir, &clusterip_proc_fops, c); if (!c->pde) { kfree(c); @@@ -189,9 -201,9 +201,9 @@@ } #endif - spin_lock_bh(&clusterip_lock); - list_add_rcu(&c->list, &clusterip_configs); - spin_unlock_bh(&clusterip_lock); + spin_lock_bh(&cn->lock); + list_add_rcu(&c->list, &cn->configs); + spin_unlock_bh(&cn->lock); return c; } @@@ -370,7 -382,7 +382,7 @@@ static int clusterip_tg_check(const str /* FIXME: further sanity checks */ - config = clusterip_config_find_get(e->ip.dst.s_addr, 1); + config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1); if (!config) { if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { pr_info("no config found for %pI4, need 'new'\n", @@@ -384,7 -396,7 +396,7 @@@ return -EINVAL; } - dev = dev_get_by_name(&init_net, e->ip.iniface); + dev = dev_get_by_name(par->net, e->ip.iniface); if (!dev) { pr_info("no such interface %s\n", e->ip.iniface); @@@ -483,7 -495,7 +495,7 @@@ static void arp_print(struct arp_payloa #endif static unsigned int -arp_mangle(unsigned int hook, +arp_mangle(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@@ -492,6 -504,7 +504,7 @@@ struct arphdr *arp = arp_hdr(skb); struct arp_payload *payload; struct clusterip_config *c; + struct net *net = dev_net(in ? in : out); /* we don't care about non-ethernet and non-ipv4 ARP */ if (arp->ar_hrd != htons(ARPHRD_ETHER) || @@@ -508,7 -521,7 +521,7 @@@ /* if there is no clusterip configuration for the arp reply's * source ip, we don't want to mangle it */ - c = clusterip_config_find_get(payload->src_ip, 0); + c = clusterip_config_find_get(net, payload->src_ip, 0); if (!c) return NF_ACCEPT; @@@ -698,48 -711,75 +711,75 @@@ static const struct file_operations clu #endif /* CONFIG_PROC_FS */ + static int clusterip_net_init(struct net *net) + { + struct clusterip_net *cn = net_generic(net, clusterip_net_id); + + INIT_LIST_HEAD(&cn->configs); + + spin_lock_init(&cn->lock); + + #ifdef CONFIG_PROC_FS + cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net); + if (!cn->procdir) { + pr_err("Unable to proc dir entry\n"); + return -ENOMEM; + } + #endif /* CONFIG_PROC_FS */ + + return 0; + } + + static void clusterip_net_exit(struct net *net) + { + #ifdef CONFIG_PROC_FS + struct clusterip_net *cn = net_generic(net, clusterip_net_id); + proc_remove(cn->procdir); + #endif + } + + static struct pernet_operations clusterip_net_ops = { + .init = clusterip_net_init, + .exit = clusterip_net_exit, + .id = &clusterip_net_id, + .size = sizeof(struct clusterip_net), + }; + static int __init clusterip_tg_init(void) { int ret; - ret = xt_register_target(&clusterip_tg_reg); + ret = register_pernet_subsys(&clusterip_net_ops); if (ret < 0) return ret; + ret = xt_register_target(&clusterip_tg_reg); + if (ret < 0) + goto cleanup_subsys; + ret = nf_register_hook(&cip_arp_ops); if (ret < 0) goto cleanup_target; - #ifdef CONFIG_PROC_FS - clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net); - if (!clusterip_procdir) { - pr_err("Unable to proc dir entry\n"); - ret = -ENOMEM; - goto cleanup_hook; - } - #endif /* CONFIG_PROC_FS */ - pr_info("ClusterIP Version %s loaded successfully\n", CLUSTERIP_VERSION); + return 0; - #ifdef CONFIG_PROC_FS - cleanup_hook: - nf_unregister_hook(&cip_arp_ops); - #endif /* CONFIG_PROC_FS */ cleanup_target: xt_unregister_target(&clusterip_tg_reg); + cleanup_subsys: + unregister_pernet_subsys(&clusterip_net_ops); return ret; } static void __exit clusterip_tg_exit(void) { pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION); - #ifdef CONFIG_PROC_FS - proc_remove(clusterip_procdir); - #endif + nf_unregister_hook(&cip_arp_ops); xt_unregister_target(&clusterip_tg_reg); + unregister_pernet_subsys(&clusterip_net_ops); /* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */ rcu_barrier_bh();